diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,58516 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.22460003916103247, + "global_step": 19500, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 1.0000000000000002e-06, + "loss": 6.4378, + "step": 2 + }, + { + "epoch": 0.0, + "learning_rate": 2.0000000000000003e-06, + "loss": 5.9931, + "step": 4 + }, + { + "epoch": 0.0, + "learning_rate": 3e-06, + "loss": 6.5129, + "step": 6 + }, + { + "epoch": 0.0, + "learning_rate": 4.000000000000001e-06, + "loss": 6.2221, + "step": 8 + }, + { + "epoch": 0.0, + "learning_rate": 5e-06, + "loss": 6.866, + "step": 10 + }, + { + "epoch": 0.0, + "learning_rate": 6e-06, + "loss": 6.2606, + "step": 12 + }, + { + "epoch": 0.0, + "learning_rate": 7.000000000000001e-06, + "loss": 5.9494, + "step": 14 + }, + { + "epoch": 0.0, + "learning_rate": 8.000000000000001e-06, + "loss": 6.0689, + "step": 16 + }, + { + "epoch": 0.0, + "learning_rate": 9e-06, + "loss": 6.0829, + "step": 18 + }, + { + "epoch": 0.0, + "learning_rate": 1e-05, + "loss": 5.8833, + "step": 20 + }, + { + "epoch": 0.0, + "learning_rate": 1.1000000000000001e-05, + "loss": 5.9735, + "step": 22 + }, + { + "epoch": 0.0, + "learning_rate": 1.2e-05, + "loss": 5.5391, + "step": 24 + }, + { + "epoch": 0.0, + "learning_rate": 1.3000000000000001e-05, + "loss": 5.6865, + "step": 26 + }, + { + "epoch": 0.0, + "learning_rate": 1.4000000000000001e-05, + "loss": 5.286, + "step": 28 + }, + { + "epoch": 0.0, + "learning_rate": 1.5e-05, + "loss": 5.8669, + "step": 30 + }, + { + "epoch": 0.0, + "learning_rate": 1.6000000000000003e-05, + "loss": 5.4368, + "step": 32 + }, + { + "epoch": 0.0, + "learning_rate": 1.7000000000000003e-05, + "loss": 5.2647, + "step": 34 + }, + { + "epoch": 0.0, + "learning_rate": 1.8e-05, + "loss": 5.9864, + "step": 36 + }, + { + "epoch": 0.0, + "learning_rate": 1.9e-05, + "loss": 5.2934, + "step": 38 + }, + { + "epoch": 0.0, + "learning_rate": 2e-05, + "loss": 4.8785, + "step": 40 + }, + { + "epoch": 0.0, + "learning_rate": 2.1e-05, + "loss": 4.8154, + "step": 42 + }, + { + "epoch": 0.0, + "learning_rate": 2.2000000000000003e-05, + "loss": 4.3948, + "step": 44 + }, + { + "epoch": 0.0, + "learning_rate": 2.3000000000000003e-05, + "loss": 4.6187, + "step": 46 + }, + { + "epoch": 0.0, + "learning_rate": 2.4e-05, + "loss": 4.2847, + "step": 48 + }, + { + "epoch": 0.0, + "learning_rate": 2.5e-05, + "loss": 4.0182, + "step": 50 + }, + { + "epoch": 0.0, + "learning_rate": 2.6000000000000002e-05, + "loss": 4.91, + "step": 52 + }, + { + "epoch": 0.0, + "learning_rate": 2.7000000000000002e-05, + "loss": 4.2794, + "step": 54 + }, + { + "epoch": 0.0, + "learning_rate": 2.8000000000000003e-05, + "loss": 4.5266, + "step": 56 + }, + { + "epoch": 0.0, + "learning_rate": 2.9e-05, + "loss": 3.6455, + "step": 58 + }, + { + "epoch": 0.0, + "learning_rate": 3e-05, + "loss": 5.0947, + "step": 60 + }, + { + "epoch": 0.0, + "learning_rate": 3.1e-05, + "loss": 5.3402, + "step": 62 + }, + { + "epoch": 0.0, + "learning_rate": 3.2000000000000005e-05, + "loss": 4.1979, + "step": 64 + }, + { + "epoch": 0.0, + "learning_rate": 3.3e-05, + "loss": 3.6514, + "step": 66 + }, + { + "epoch": 0.0, + "learning_rate": 3.4000000000000007e-05, + "loss": 4.3941, + "step": 68 + }, + { + "epoch": 0.0, + "learning_rate": 3.5e-05, + "loss": 5.016, + "step": 70 + }, + { + "epoch": 0.0, + "learning_rate": 3.6e-05, + "loss": 3.7781, + "step": 72 + }, + { + "epoch": 0.0, + "learning_rate": 3.7e-05, + "loss": 2.9272, + "step": 74 + }, + { + "epoch": 0.0, + "learning_rate": 3.8e-05, + "loss": 5.2388, + "step": 76 + }, + { + "epoch": 0.0, + "learning_rate": 3.9000000000000006e-05, + "loss": 5.0177, + "step": 78 + }, + { + "epoch": 0.0, + "learning_rate": 4e-05, + "loss": 3.0727, + "step": 80 + }, + { + "epoch": 0.0, + "learning_rate": 4.1e-05, + "loss": 3.5789, + "step": 82 + }, + { + "epoch": 0.0, + "learning_rate": 4.2e-05, + "loss": 4.4052, + "step": 84 + }, + { + "epoch": 0.0, + "learning_rate": 4.3e-05, + "loss": 4.2854, + "step": 86 + }, + { + "epoch": 0.0, + "learning_rate": 4.4000000000000006e-05, + "loss": 2.4508, + "step": 88 + }, + { + "epoch": 0.0, + "learning_rate": 4.5e-05, + "loss": 3.4353, + "step": 90 + }, + { + "epoch": 0.0, + "learning_rate": 4.600000000000001e-05, + "loss": 4.8879, + "step": 92 + }, + { + "epoch": 0.0, + "learning_rate": 4.7e-05, + "loss": 5.5409, + "step": 94 + }, + { + "epoch": 0.0, + "learning_rate": 4.8e-05, + "loss": 4.0023, + "step": 96 + }, + { + "epoch": 0.0, + "learning_rate": 4.9e-05, + "loss": 4.37, + "step": 98 + }, + { + "epoch": 0.0, + "learning_rate": 5e-05, + "loss": 4.303, + "step": 100 + }, + { + "epoch": 0.0, + "learning_rate": 4.999537165602148e-05, + "loss": 3.3043, + "step": 102 + }, + { + "epoch": 0.0, + "learning_rate": 4.999074331204295e-05, + "loss": 2.4998, + "step": 104 + }, + { + "epoch": 0.0, + "learning_rate": 4.998611496806443e-05, + "loss": 3.153, + "step": 106 + }, + { + "epoch": 0.0, + "learning_rate": 4.9981486624085904e-05, + "loss": 1.6494, + "step": 108 + }, + { + "epoch": 0.0, + "learning_rate": 4.997685828010738e-05, + "loss": 3.1824, + "step": 110 + }, + { + "epoch": 0.0, + "learning_rate": 4.9972229936128855e-05, + "loss": 1.9534, + "step": 112 + }, + { + "epoch": 0.0, + "learning_rate": 4.9967601592150334e-05, + "loss": 3.1306, + "step": 114 + }, + { + "epoch": 0.0, + "learning_rate": 4.9962973248171806e-05, + "loss": 3.7542, + "step": 116 + }, + { + "epoch": 0.0, + "learning_rate": 4.9958344904193285e-05, + "loss": 2.0625, + "step": 118 + }, + { + "epoch": 0.0, + "learning_rate": 4.995371656021476e-05, + "loss": 5.8539, + "step": 120 + }, + { + "epoch": 0.0, + "learning_rate": 4.9949088216236235e-05, + "loss": 5.0081, + "step": 122 + }, + { + "epoch": 0.0, + "learning_rate": 4.994445987225771e-05, + "loss": 2.7199, + "step": 124 + }, + { + "epoch": 0.0, + "learning_rate": 4.9939831528279186e-05, + "loss": 2.7836, + "step": 126 + }, + { + "epoch": 0.0, + "learning_rate": 4.993520318430066e-05, + "loss": 4.3907, + "step": 128 + }, + { + "epoch": 0.0, + "learning_rate": 4.993057484032214e-05, + "loss": 2.0928, + "step": 130 + }, + { + "epoch": 0.0, + "learning_rate": 4.992594649634361e-05, + "loss": 4.4573, + "step": 132 + }, + { + "epoch": 0.0, + "learning_rate": 4.992131815236509e-05, + "loss": 1.8654, + "step": 134 + }, + { + "epoch": 0.0, + "learning_rate": 4.991668980838656e-05, + "loss": 1.2354, + "step": 136 + }, + { + "epoch": 0.0, + "learning_rate": 4.991206146440804e-05, + "loss": 2.2481, + "step": 138 + }, + { + "epoch": 0.0, + "learning_rate": 4.990743312042951e-05, + "loss": 3.9755, + "step": 140 + }, + { + "epoch": 0.0, + "learning_rate": 4.990280477645099e-05, + "loss": 2.7043, + "step": 142 + }, + { + "epoch": 0.0, + "learning_rate": 4.989817643247247e-05, + "loss": 2.062, + "step": 144 + }, + { + "epoch": 0.0, + "learning_rate": 4.989354808849394e-05, + "loss": 1.191, + "step": 146 + }, + { + "epoch": 0.0, + "learning_rate": 4.988891974451542e-05, + "loss": 2.0809, + "step": 148 + }, + { + "epoch": 0.0, + "learning_rate": 4.988429140053689e-05, + "loss": 5.0056, + "step": 150 + }, + { + "epoch": 0.0, + "learning_rate": 4.987966305655837e-05, + "loss": 2.7314, + "step": 152 + }, + { + "epoch": 0.0, + "learning_rate": 4.987503471257984e-05, + "loss": 3.4277, + "step": 154 + }, + { + "epoch": 0.0, + "learning_rate": 4.987040636860132e-05, + "loss": 4.2682, + "step": 156 + }, + { + "epoch": 0.0, + "learning_rate": 4.986577802462279e-05, + "loss": 3.6359, + "step": 158 + }, + { + "epoch": 0.0, + "learning_rate": 4.986114968064427e-05, + "loss": 4.2905, + "step": 160 + }, + { + "epoch": 0.0, + "learning_rate": 4.9856521336665744e-05, + "loss": 3.0396, + "step": 162 + }, + { + "epoch": 0.0, + "learning_rate": 4.985189299268722e-05, + "loss": 2.766, + "step": 164 + }, + { + "epoch": 0.0, + "learning_rate": 4.9847264648708695e-05, + "loss": 4.4905, + "step": 166 + }, + { + "epoch": 0.0, + "learning_rate": 4.9842636304730174e-05, + "loss": 5.7682, + "step": 168 + }, + { + "epoch": 0.0, + "learning_rate": 4.9838007960751646e-05, + "loss": 4.1597, + "step": 170 + }, + { + "epoch": 0.0, + "learning_rate": 4.9833379616773124e-05, + "loss": 4.2055, + "step": 172 + }, + { + "epoch": 0.0, + "learning_rate": 4.9828751272794596e-05, + "loss": 3.0727, + "step": 174 + }, + { + "epoch": 0.0, + "learning_rate": 4.982412292881607e-05, + "loss": 2.741, + "step": 176 + }, + { + "epoch": 0.0, + "learning_rate": 4.981949458483755e-05, + "loss": 2.2629, + "step": 178 + }, + { + "epoch": 0.0, + "learning_rate": 4.981486624085902e-05, + "loss": 5.8604, + "step": 180 + }, + { + "epoch": 0.0, + "learning_rate": 4.98102378968805e-05, + "loss": 2.8624, + "step": 182 + }, + { + "epoch": 0.0, + "learning_rate": 4.980560955290197e-05, + "loss": 2.5825, + "step": 184 + }, + { + "epoch": 0.0, + "learning_rate": 4.980098120892345e-05, + "loss": 3.3554, + "step": 186 + }, + { + "epoch": 0.0, + "learning_rate": 4.979635286494492e-05, + "loss": 2.8004, + "step": 188 + }, + { + "epoch": 0.0, + "learning_rate": 4.97917245209664e-05, + "loss": 2.6874, + "step": 190 + }, + { + "epoch": 0.0, + "learning_rate": 4.978709617698787e-05, + "loss": 3.5117, + "step": 192 + }, + { + "epoch": 0.0, + "learning_rate": 4.978246783300935e-05, + "loss": 5.0183, + "step": 194 + }, + { + "epoch": 0.0, + "learning_rate": 4.977783948903082e-05, + "loss": 2.3389, + "step": 196 + }, + { + "epoch": 0.0, + "learning_rate": 4.97732111450523e-05, + "loss": 4.539, + "step": 198 + }, + { + "epoch": 0.0, + "learning_rate": 4.9768582801073774e-05, + "loss": 5.2634, + "step": 200 + }, + { + "epoch": 0.0, + "learning_rate": 4.976395445709525e-05, + "loss": 5.6091, + "step": 202 + }, + { + "epoch": 0.0, + "learning_rate": 4.9759326113116724e-05, + "loss": 4.631, + "step": 204 + }, + { + "epoch": 0.0, + "learning_rate": 4.97546977691382e-05, + "loss": 3.0488, + "step": 206 + }, + { + "epoch": 0.0, + "learning_rate": 4.975006942515968e-05, + "loss": 2.1015, + "step": 208 + }, + { + "epoch": 0.0, + "learning_rate": 4.9745441081181154e-05, + "loss": 4.2247, + "step": 210 + }, + { + "epoch": 0.0, + "learning_rate": 4.974081273720263e-05, + "loss": 2.0447, + "step": 212 + }, + { + "epoch": 0.0, + "learning_rate": 4.9736184393224105e-05, + "loss": 0.9285, + "step": 214 + }, + { + "epoch": 0.0, + "learning_rate": 4.9731556049245584e-05, + "loss": 1.6242, + "step": 216 + }, + { + "epoch": 0.0, + "learning_rate": 4.9726927705267056e-05, + "loss": 1.4645, + "step": 218 + }, + { + "epoch": 0.0, + "learning_rate": 4.9722299361288535e-05, + "loss": 2.7375, + "step": 220 + }, + { + "epoch": 0.0, + "learning_rate": 4.971767101731001e-05, + "loss": 5.7341, + "step": 222 + }, + { + "epoch": 0.0, + "learning_rate": 4.9713042673331485e-05, + "loss": 5.8455, + "step": 224 + }, + { + "epoch": 0.0, + "learning_rate": 4.970841432935296e-05, + "loss": 1.864, + "step": 226 + }, + { + "epoch": 0.0, + "learning_rate": 4.9703785985374436e-05, + "loss": 1.9969, + "step": 228 + }, + { + "epoch": 0.0, + "learning_rate": 4.969915764139591e-05, + "loss": 1.8399, + "step": 230 + }, + { + "epoch": 0.0, + "learning_rate": 4.969452929741739e-05, + "loss": 0.997, + "step": 232 + }, + { + "epoch": 0.0, + "learning_rate": 4.968990095343886e-05, + "loss": 4.495, + "step": 234 + }, + { + "epoch": 0.0, + "learning_rate": 4.968527260946034e-05, + "loss": 1.8839, + "step": 236 + }, + { + "epoch": 0.0, + "learning_rate": 4.968064426548181e-05, + "loss": 2.5412, + "step": 238 + }, + { + "epoch": 0.0, + "learning_rate": 4.967601592150329e-05, + "loss": 2.5321, + "step": 240 + }, + { + "epoch": 0.0, + "learning_rate": 4.967138757752476e-05, + "loss": 2.7879, + "step": 242 + }, + { + "epoch": 0.0, + "learning_rate": 4.966675923354624e-05, + "loss": 4.2651, + "step": 244 + }, + { + "epoch": 0.0, + "learning_rate": 4.966213088956771e-05, + "loss": 1.4989, + "step": 246 + }, + { + "epoch": 0.0, + "learning_rate": 4.965750254558919e-05, + "loss": 1.574, + "step": 248 + }, + { + "epoch": 0.0, + "learning_rate": 4.965287420161067e-05, + "loss": 2.9886, + "step": 250 + }, + { + "epoch": 0.0, + "learning_rate": 4.964824585763214e-05, + "loss": 3.6707, + "step": 252 + }, + { + "epoch": 0.0, + "learning_rate": 4.964361751365362e-05, + "loss": 5.8323, + "step": 254 + }, + { + "epoch": 0.0, + "learning_rate": 4.963898916967509e-05, + "loss": 5.6814, + "step": 256 + }, + { + "epoch": 0.0, + "learning_rate": 4.963436082569657e-05, + "loss": 4.8639, + "step": 258 + }, + { + "epoch": 0.0, + "learning_rate": 4.962973248171804e-05, + "loss": 3.5252, + "step": 260 + }, + { + "epoch": 0.0, + "learning_rate": 4.962510413773952e-05, + "loss": 4.6291, + "step": 262 + }, + { + "epoch": 0.0, + "learning_rate": 4.9620475793760994e-05, + "loss": 1.9104, + "step": 264 + }, + { + "epoch": 0.0, + "learning_rate": 4.961584744978247e-05, + "loss": 4.8404, + "step": 266 + }, + { + "epoch": 0.0, + "learning_rate": 4.9611219105803945e-05, + "loss": 2.6891, + "step": 268 + }, + { + "epoch": 0.0, + "learning_rate": 4.9606590761825424e-05, + "loss": 3.4303, + "step": 270 + }, + { + "epoch": 0.0, + "learning_rate": 4.9601962417846896e-05, + "loss": 1.1275, + "step": 272 + }, + { + "epoch": 0.0, + "learning_rate": 4.9597334073868374e-05, + "loss": 1.2095, + "step": 274 + }, + { + "epoch": 0.0, + "learning_rate": 4.9592705729889847e-05, + "loss": 3.8646, + "step": 276 + }, + { + "epoch": 0.0, + "learning_rate": 4.9588077385911325e-05, + "loss": 3.9609, + "step": 278 + }, + { + "epoch": 0.0, + "learning_rate": 4.95834490419328e-05, + "loss": 2.5968, + "step": 280 + }, + { + "epoch": 0.0, + "learning_rate": 4.9578820697954276e-05, + "loss": 2.2243, + "step": 282 + }, + { + "epoch": 0.0, + "learning_rate": 4.957419235397575e-05, + "loss": 5.3337, + "step": 284 + }, + { + "epoch": 0.0, + "learning_rate": 4.956956400999723e-05, + "loss": 3.0658, + "step": 286 + }, + { + "epoch": 0.0, + "learning_rate": 4.9564935666018706e-05, + "loss": 1.1265, + "step": 288 + }, + { + "epoch": 0.0, + "learning_rate": 4.956030732204018e-05, + "loss": 3.2748, + "step": 290 + }, + { + "epoch": 0.0, + "learning_rate": 4.955567897806166e-05, + "loss": 2.1312, + "step": 292 + }, + { + "epoch": 0.0, + "learning_rate": 4.955105063408313e-05, + "loss": 2.9288, + "step": 294 + }, + { + "epoch": 0.0, + "learning_rate": 4.954642229010461e-05, + "loss": 2.1133, + "step": 296 + }, + { + "epoch": 0.0, + "learning_rate": 4.954179394612608e-05, + "loss": 2.795, + "step": 298 + }, + { + "epoch": 0.0, + "learning_rate": 4.953716560214756e-05, + "loss": 1.094, + "step": 300 + }, + { + "epoch": 0.0, + "learning_rate": 4.953253725816903e-05, + "loss": 3.2405, + "step": 302 + }, + { + "epoch": 0.0, + "learning_rate": 4.952790891419051e-05, + "loss": 2.0241, + "step": 304 + }, + { + "epoch": 0.0, + "learning_rate": 4.952328057021198e-05, + "loss": 1.3409, + "step": 306 + }, + { + "epoch": 0.0, + "learning_rate": 4.951865222623346e-05, + "loss": 0.7884, + "step": 308 + }, + { + "epoch": 0.0, + "learning_rate": 4.951402388225493e-05, + "loss": 3.2695, + "step": 310 + }, + { + "epoch": 0.0, + "learning_rate": 4.950939553827641e-05, + "loss": 3.1318, + "step": 312 + }, + { + "epoch": 0.0, + "learning_rate": 4.950476719429788e-05, + "loss": 2.9392, + "step": 314 + }, + { + "epoch": 0.0, + "learning_rate": 4.950013885031936e-05, + "loss": 1.1243, + "step": 316 + }, + { + "epoch": 0.0, + "learning_rate": 4.9495510506340834e-05, + "loss": 3.6301, + "step": 318 + }, + { + "epoch": 0.0, + "learning_rate": 4.9490882162362306e-05, + "loss": 1.2792, + "step": 320 + }, + { + "epoch": 0.0, + "learning_rate": 4.9486253818383785e-05, + "loss": 4.2146, + "step": 322 + }, + { + "epoch": 0.0, + "learning_rate": 4.948162547440526e-05, + "loss": 4.8716, + "step": 324 + }, + { + "epoch": 0.0, + "learning_rate": 4.9476997130426736e-05, + "loss": 4.0848, + "step": 326 + }, + { + "epoch": 0.0, + "learning_rate": 4.947236878644821e-05, + "loss": 1.2625, + "step": 328 + }, + { + "epoch": 0.0, + "learning_rate": 4.9467740442469686e-05, + "loss": 2.7873, + "step": 330 + }, + { + "epoch": 0.0, + "learning_rate": 4.946311209849116e-05, + "loss": 1.6087, + "step": 332 + }, + { + "epoch": 0.0, + "learning_rate": 4.945848375451264e-05, + "loss": 3.5022, + "step": 334 + }, + { + "epoch": 0.0, + "learning_rate": 4.945385541053411e-05, + "loss": 6.9392, + "step": 336 + }, + { + "epoch": 0.0, + "learning_rate": 4.944922706655559e-05, + "loss": 1.8005, + "step": 338 + }, + { + "epoch": 0.0, + "learning_rate": 4.944459872257706e-05, + "loss": 1.533, + "step": 340 + }, + { + "epoch": 0.0, + "learning_rate": 4.943997037859854e-05, + "loss": 3.0175, + "step": 342 + }, + { + "epoch": 0.0, + "learning_rate": 4.943534203462001e-05, + "loss": 1.2817, + "step": 344 + }, + { + "epoch": 0.0, + "learning_rate": 4.943071369064149e-05, + "loss": 2.4534, + "step": 346 + }, + { + "epoch": 0.0, + "learning_rate": 4.942608534666296e-05, + "loss": 2.2906, + "step": 348 + }, + { + "epoch": 0.0, + "learning_rate": 4.942145700268444e-05, + "loss": 1.2994, + "step": 350 + }, + { + "epoch": 0.0, + "learning_rate": 4.941682865870591e-05, + "loss": 3.0533, + "step": 352 + }, + { + "epoch": 0.0, + "learning_rate": 4.941220031472739e-05, + "loss": 3.9938, + "step": 354 + }, + { + "epoch": 0.0, + "learning_rate": 4.940757197074887e-05, + "loss": 3.9988, + "step": 356 + }, + { + "epoch": 0.0, + "learning_rate": 4.940294362677034e-05, + "loss": 1.8571, + "step": 358 + }, + { + "epoch": 0.0, + "learning_rate": 4.939831528279182e-05, + "loss": 1.4432, + "step": 360 + }, + { + "epoch": 0.0, + "learning_rate": 4.939368693881329e-05, + "loss": 3.0705, + "step": 362 + }, + { + "epoch": 0.0, + "learning_rate": 4.938905859483477e-05, + "loss": 1.6334, + "step": 364 + }, + { + "epoch": 0.0, + "learning_rate": 4.9384430250856244e-05, + "loss": 3.8257, + "step": 366 + }, + { + "epoch": 0.0, + "learning_rate": 4.937980190687772e-05, + "loss": 2.7239, + "step": 368 + }, + { + "epoch": 0.0, + "learning_rate": 4.9375173562899195e-05, + "loss": 0.5185, + "step": 370 + }, + { + "epoch": 0.0, + "learning_rate": 4.9370545218920674e-05, + "loss": 3.8395, + "step": 372 + }, + { + "epoch": 0.0, + "learning_rate": 4.9365916874942146e-05, + "loss": 5.5341, + "step": 374 + }, + { + "epoch": 0.0, + "learning_rate": 4.9361288530963625e-05, + "loss": 4.0835, + "step": 376 + }, + { + "epoch": 0.0, + "learning_rate": 4.9356660186985097e-05, + "loss": 5.1902, + "step": 378 + }, + { + "epoch": 0.0, + "learning_rate": 4.9352031843006575e-05, + "loss": 5.7215, + "step": 380 + }, + { + "epoch": 0.0, + "learning_rate": 4.934740349902805e-05, + "loss": 4.9042, + "step": 382 + }, + { + "epoch": 0.0, + "learning_rate": 4.9342775155049526e-05, + "loss": 5.1123, + "step": 384 + }, + { + "epoch": 0.0, + "learning_rate": 4.9338146811071e-05, + "loss": 4.6615, + "step": 386 + }, + { + "epoch": 0.0, + "learning_rate": 4.933351846709248e-05, + "loss": 4.1794, + "step": 388 + }, + { + "epoch": 0.0, + "learning_rate": 4.932889012311395e-05, + "loss": 3.5466, + "step": 390 + }, + { + "epoch": 0.0, + "learning_rate": 4.932426177913543e-05, + "loss": 4.929, + "step": 392 + }, + { + "epoch": 0.0, + "learning_rate": 4.931963343515691e-05, + "loss": 3.9037, + "step": 394 + }, + { + "epoch": 0.0, + "learning_rate": 4.931500509117838e-05, + "loss": 5.3111, + "step": 396 + }, + { + "epoch": 0.0, + "learning_rate": 4.931037674719986e-05, + "loss": 4.9355, + "step": 398 + }, + { + "epoch": 0.0, + "learning_rate": 4.930574840322133e-05, + "loss": 2.9619, + "step": 400 + }, + { + "epoch": 0.0, + "learning_rate": 4.930112005924281e-05, + "loss": 3.3045, + "step": 402 + }, + { + "epoch": 0.0, + "learning_rate": 4.929649171526428e-05, + "loss": 4.3384, + "step": 404 + }, + { + "epoch": 0.0, + "learning_rate": 4.929186337128576e-05, + "loss": 2.4603, + "step": 406 + }, + { + "epoch": 0.0, + "learning_rate": 4.928723502730723e-05, + "loss": 3.3011, + "step": 408 + }, + { + "epoch": 0.0, + "learning_rate": 4.928260668332871e-05, + "loss": 2.0653, + "step": 410 + }, + { + "epoch": 0.0, + "learning_rate": 4.927797833935018e-05, + "loss": 4.6271, + "step": 412 + }, + { + "epoch": 0.0, + "learning_rate": 4.927334999537166e-05, + "loss": 4.0511, + "step": 414 + }, + { + "epoch": 0.0, + "learning_rate": 4.926872165139313e-05, + "loss": 2.9378, + "step": 416 + }, + { + "epoch": 0.0, + "learning_rate": 4.926409330741461e-05, + "loss": 4.206, + "step": 418 + }, + { + "epoch": 0.0, + "learning_rate": 4.9259464963436084e-05, + "loss": 3.0345, + "step": 420 + }, + { + "epoch": 0.0, + "learning_rate": 4.925483661945756e-05, + "loss": 3.3985, + "step": 422 + }, + { + "epoch": 0.0, + "learning_rate": 4.9250208275479035e-05, + "loss": 1.1632, + "step": 424 + }, + { + "epoch": 0.0, + "learning_rate": 4.9245579931500514e-05, + "loss": 1.4889, + "step": 426 + }, + { + "epoch": 0.0, + "learning_rate": 4.9240951587521986e-05, + "loss": 3.6674, + "step": 428 + }, + { + "epoch": 0.0, + "learning_rate": 4.9236323243543464e-05, + "loss": 3.624, + "step": 430 + }, + { + "epoch": 0.0, + "learning_rate": 4.9231694899564936e-05, + "loss": 3.553, + "step": 432 + }, + { + "epoch": 0.0, + "learning_rate": 4.9227066555586415e-05, + "loss": 4.4441, + "step": 434 + }, + { + "epoch": 0.01, + "learning_rate": 4.9222438211607894e-05, + "loss": 2.822, + "step": 436 + }, + { + "epoch": 0.01, + "learning_rate": 4.9217809867629366e-05, + "loss": 2.4839, + "step": 438 + }, + { + "epoch": 0.01, + "learning_rate": 4.9213181523650845e-05, + "loss": 0.9495, + "step": 440 + }, + { + "epoch": 0.01, + "learning_rate": 4.920855317967232e-05, + "loss": 2.0021, + "step": 442 + }, + { + "epoch": 0.01, + "learning_rate": 4.9203924835693796e-05, + "loss": 3.4342, + "step": 444 + }, + { + "epoch": 0.01, + "learning_rate": 4.919929649171527e-05, + "loss": 4.0067, + "step": 446 + }, + { + "epoch": 0.01, + "learning_rate": 4.9194668147736747e-05, + "loss": 6.0481, + "step": 448 + }, + { + "epoch": 0.01, + "learning_rate": 4.919003980375822e-05, + "loss": 3.6963, + "step": 450 + }, + { + "epoch": 0.01, + "learning_rate": 4.91854114597797e-05, + "loss": 3.6426, + "step": 452 + }, + { + "epoch": 0.01, + "learning_rate": 4.918078311580117e-05, + "loss": 1.3831, + "step": 454 + }, + { + "epoch": 0.01, + "learning_rate": 4.917615477182265e-05, + "loss": 4.9499, + "step": 456 + }, + { + "epoch": 0.01, + "learning_rate": 4.917152642784412e-05, + "loss": 4.6878, + "step": 458 + }, + { + "epoch": 0.01, + "learning_rate": 4.91668980838656e-05, + "loss": 3.8595, + "step": 460 + }, + { + "epoch": 0.01, + "learning_rate": 4.916226973988707e-05, + "loss": 3.3701, + "step": 462 + }, + { + "epoch": 0.01, + "learning_rate": 4.915764139590854e-05, + "loss": 3.3446, + "step": 464 + }, + { + "epoch": 0.01, + "learning_rate": 4.915301305193002e-05, + "loss": 2.8526, + "step": 466 + }, + { + "epoch": 0.01, + "learning_rate": 4.9148384707951494e-05, + "loss": 1.4705, + "step": 468 + }, + { + "epoch": 0.01, + "learning_rate": 4.914375636397297e-05, + "loss": 2.4518, + "step": 470 + }, + { + "epoch": 0.01, + "learning_rate": 4.9139128019994445e-05, + "loss": 1.545, + "step": 472 + }, + { + "epoch": 0.01, + "learning_rate": 4.9134499676015924e-05, + "loss": 3.7903, + "step": 474 + }, + { + "epoch": 0.01, + "learning_rate": 4.9129871332037396e-05, + "loss": 6.0971, + "step": 476 + }, + { + "epoch": 0.01, + "learning_rate": 4.9125242988058875e-05, + "loss": 1.1406, + "step": 478 + }, + { + "epoch": 0.01, + "learning_rate": 4.912061464408035e-05, + "loss": 1.9598, + "step": 480 + }, + { + "epoch": 0.01, + "learning_rate": 4.9115986300101825e-05, + "loss": 2.7455, + "step": 482 + }, + { + "epoch": 0.01, + "learning_rate": 4.91113579561233e-05, + "loss": 0.6213, + "step": 484 + }, + { + "epoch": 0.01, + "learning_rate": 4.9106729612144776e-05, + "loss": 1.0346, + "step": 486 + }, + { + "epoch": 0.01, + "learning_rate": 4.910210126816625e-05, + "loss": 0.9245, + "step": 488 + }, + { + "epoch": 0.01, + "learning_rate": 4.909747292418773e-05, + "loss": 2.8286, + "step": 490 + }, + { + "epoch": 0.01, + "learning_rate": 4.90928445802092e-05, + "loss": 7.059, + "step": 492 + }, + { + "epoch": 0.01, + "learning_rate": 4.908821623623068e-05, + "loss": 3.4778, + "step": 494 + }, + { + "epoch": 0.01, + "learning_rate": 4.908358789225215e-05, + "loss": 3.1724, + "step": 496 + }, + { + "epoch": 0.01, + "learning_rate": 4.907895954827363e-05, + "loss": 4.3651, + "step": 498 + }, + { + "epoch": 0.01, + "learning_rate": 4.90743312042951e-05, + "loss": 5.1223, + "step": 500 + }, + { + "epoch": 0.01, + "learning_rate": 4.906970286031658e-05, + "loss": 1.8804, + "step": 502 + }, + { + "epoch": 0.01, + "learning_rate": 4.906507451633806e-05, + "loss": 0.892, + "step": 504 + }, + { + "epoch": 0.01, + "learning_rate": 4.906044617235953e-05, + "loss": 4.1566, + "step": 506 + }, + { + "epoch": 0.01, + "learning_rate": 4.905581782838101e-05, + "loss": 5.5558, + "step": 508 + }, + { + "epoch": 0.01, + "learning_rate": 4.905118948440248e-05, + "loss": 2.0906, + "step": 510 + }, + { + "epoch": 0.01, + "learning_rate": 4.904656114042396e-05, + "loss": 0.9953, + "step": 512 + }, + { + "epoch": 0.01, + "learning_rate": 4.904193279644543e-05, + "loss": 0.6079, + "step": 514 + }, + { + "epoch": 0.01, + "learning_rate": 4.903730445246691e-05, + "loss": 1.3432, + "step": 516 + }, + { + "epoch": 0.01, + "learning_rate": 4.903267610848838e-05, + "loss": 0.7198, + "step": 518 + }, + { + "epoch": 0.01, + "learning_rate": 4.902804776450986e-05, + "loss": 3.1756, + "step": 520 + }, + { + "epoch": 0.01, + "learning_rate": 4.9023419420531334e-05, + "loss": 3.1296, + "step": 522 + }, + { + "epoch": 0.01, + "learning_rate": 4.901879107655281e-05, + "loss": 0.8081, + "step": 524 + }, + { + "epoch": 0.01, + "learning_rate": 4.9014162732574285e-05, + "loss": 0.7372, + "step": 526 + }, + { + "epoch": 0.01, + "learning_rate": 4.9009534388595764e-05, + "loss": 1.5253, + "step": 528 + }, + { + "epoch": 0.01, + "learning_rate": 4.9004906044617236e-05, + "loss": 4.889, + "step": 530 + }, + { + "epoch": 0.01, + "learning_rate": 4.9000277700638714e-05, + "loss": 8.4378, + "step": 532 + }, + { + "epoch": 0.01, + "learning_rate": 4.8995649356660186e-05, + "loss": 1.3812, + "step": 534 + }, + { + "epoch": 0.01, + "learning_rate": 4.8991021012681665e-05, + "loss": 4.1242, + "step": 536 + }, + { + "epoch": 0.01, + "learning_rate": 4.898639266870314e-05, + "loss": 0.7897, + "step": 538 + }, + { + "epoch": 0.01, + "learning_rate": 4.8981764324724616e-05, + "loss": 5.4463, + "step": 540 + }, + { + "epoch": 0.01, + "learning_rate": 4.8977135980746095e-05, + "loss": 4.1206, + "step": 542 + }, + { + "epoch": 0.01, + "learning_rate": 4.897250763676757e-05, + "loss": 1.6076, + "step": 544 + }, + { + "epoch": 0.01, + "learning_rate": 4.8967879292789046e-05, + "loss": 0.7202, + "step": 546 + }, + { + "epoch": 0.01, + "learning_rate": 4.896325094881052e-05, + "loss": 1.5387, + "step": 548 + }, + { + "epoch": 0.01, + "learning_rate": 4.8958622604832e-05, + "loss": 0.6171, + "step": 550 + }, + { + "epoch": 0.01, + "learning_rate": 4.895399426085347e-05, + "loss": 2.5452, + "step": 552 + }, + { + "epoch": 0.01, + "learning_rate": 4.894936591687495e-05, + "loss": 1.3003, + "step": 554 + }, + { + "epoch": 0.01, + "learning_rate": 4.894473757289642e-05, + "loss": 3.9205, + "step": 556 + }, + { + "epoch": 0.01, + "learning_rate": 4.89401092289179e-05, + "loss": 3.4166, + "step": 558 + }, + { + "epoch": 0.01, + "learning_rate": 4.893548088493937e-05, + "loss": 1.7375, + "step": 560 + }, + { + "epoch": 0.01, + "learning_rate": 4.893085254096085e-05, + "loss": 1.5945, + "step": 562 + }, + { + "epoch": 0.01, + "learning_rate": 4.892622419698232e-05, + "loss": 5.5027, + "step": 564 + }, + { + "epoch": 0.01, + "learning_rate": 4.89215958530038e-05, + "loss": 4.3663, + "step": 566 + }, + { + "epoch": 0.01, + "learning_rate": 4.891696750902527e-05, + "loss": 5.005, + "step": 568 + }, + { + "epoch": 0.01, + "learning_rate": 4.891233916504675e-05, + "loss": 4.8787, + "step": 570 + }, + { + "epoch": 0.01, + "learning_rate": 4.890771082106822e-05, + "loss": 3.9977, + "step": 572 + }, + { + "epoch": 0.01, + "learning_rate": 4.89030824770897e-05, + "loss": 3.4692, + "step": 574 + }, + { + "epoch": 0.01, + "learning_rate": 4.8898454133111174e-05, + "loss": 1.2738, + "step": 576 + }, + { + "epoch": 0.01, + "learning_rate": 4.889382578913265e-05, + "loss": 4.8468, + "step": 578 + }, + { + "epoch": 0.01, + "learning_rate": 4.8889197445154125e-05, + "loss": 2.7556, + "step": 580 + }, + { + "epoch": 0.01, + "learning_rate": 4.8884569101175603e-05, + "loss": 0.5404, + "step": 582 + }, + { + "epoch": 0.01, + "learning_rate": 4.887994075719708e-05, + "loss": 3.9631, + "step": 584 + }, + { + "epoch": 0.01, + "learning_rate": 4.8875312413218554e-05, + "loss": 1.4846, + "step": 586 + }, + { + "epoch": 0.01, + "learning_rate": 4.887068406924003e-05, + "loss": 3.8237, + "step": 588 + }, + { + "epoch": 0.01, + "learning_rate": 4.8866055725261505e-05, + "loss": 3.8888, + "step": 590 + }, + { + "epoch": 0.01, + "learning_rate": 4.8861427381282984e-05, + "loss": 4.2396, + "step": 592 + }, + { + "epoch": 0.01, + "learning_rate": 4.8856799037304456e-05, + "loss": 1.8337, + "step": 594 + }, + { + "epoch": 0.01, + "learning_rate": 4.8852170693325935e-05, + "loss": 4.2532, + "step": 596 + }, + { + "epoch": 0.01, + "learning_rate": 4.884754234934741e-05, + "loss": 1.136, + "step": 598 + }, + { + "epoch": 0.01, + "learning_rate": 4.8842914005368886e-05, + "loss": 0.7694, + "step": 600 + }, + { + "epoch": 0.01, + "learning_rate": 4.883828566139036e-05, + "loss": 0.7106, + "step": 602 + }, + { + "epoch": 0.01, + "learning_rate": 4.8833657317411836e-05, + "loss": 4.6913, + "step": 604 + }, + { + "epoch": 0.01, + "learning_rate": 4.882902897343331e-05, + "loss": 2.3532, + "step": 606 + }, + { + "epoch": 0.01, + "learning_rate": 4.882440062945478e-05, + "loss": 4.6793, + "step": 608 + }, + { + "epoch": 0.01, + "learning_rate": 4.881977228547626e-05, + "loss": 1.1938, + "step": 610 + }, + { + "epoch": 0.01, + "learning_rate": 4.881514394149773e-05, + "loss": 0.7594, + "step": 612 + }, + { + "epoch": 0.01, + "learning_rate": 4.881051559751921e-05, + "loss": 3.1088, + "step": 614 + }, + { + "epoch": 0.01, + "learning_rate": 4.880588725354068e-05, + "loss": 4.2103, + "step": 616 + }, + { + "epoch": 0.01, + "learning_rate": 4.880125890956216e-05, + "loss": 0.5989, + "step": 618 + }, + { + "epoch": 0.01, + "learning_rate": 4.879663056558363e-05, + "loss": 0.1589, + "step": 620 + }, + { + "epoch": 0.01, + "learning_rate": 4.879200222160511e-05, + "loss": 7.2191, + "step": 622 + }, + { + "epoch": 0.01, + "learning_rate": 4.8787373877626584e-05, + "loss": 4.7788, + "step": 624 + }, + { + "epoch": 0.01, + "learning_rate": 4.878274553364806e-05, + "loss": 2.4446, + "step": 626 + }, + { + "epoch": 0.01, + "learning_rate": 4.8778117189669535e-05, + "loss": 0.5752, + "step": 628 + }, + { + "epoch": 0.01, + "learning_rate": 4.8773488845691014e-05, + "loss": 2.4578, + "step": 630 + }, + { + "epoch": 0.01, + "learning_rate": 4.8768860501712486e-05, + "loss": 4.306, + "step": 632 + }, + { + "epoch": 0.01, + "learning_rate": 4.8764232157733964e-05, + "loss": 2.3142, + "step": 634 + }, + { + "epoch": 0.01, + "learning_rate": 4.8759603813755437e-05, + "loss": 4.3862, + "step": 636 + }, + { + "epoch": 0.01, + "learning_rate": 4.8754975469776915e-05, + "loss": 2.2431, + "step": 638 + }, + { + "epoch": 0.01, + "learning_rate": 4.875034712579839e-05, + "loss": 3.1251, + "step": 640 + }, + { + "epoch": 0.01, + "learning_rate": 4.8745718781819866e-05, + "loss": 4.3176, + "step": 642 + }, + { + "epoch": 0.01, + "learning_rate": 4.874109043784134e-05, + "loss": 1.3775, + "step": 644 + }, + { + "epoch": 0.01, + "learning_rate": 4.873646209386282e-05, + "loss": 2.3126, + "step": 646 + }, + { + "epoch": 0.01, + "learning_rate": 4.8731833749884296e-05, + "loss": 0.081, + "step": 648 + }, + { + "epoch": 0.01, + "learning_rate": 4.872720540590577e-05, + "loss": 3.7923, + "step": 650 + }, + { + "epoch": 0.01, + "learning_rate": 4.872257706192725e-05, + "loss": 1.8392, + "step": 652 + }, + { + "epoch": 0.01, + "learning_rate": 4.871794871794872e-05, + "loss": 1.9216, + "step": 654 + }, + { + "epoch": 0.01, + "learning_rate": 4.87133203739702e-05, + "loss": 3.298, + "step": 656 + }, + { + "epoch": 0.01, + "learning_rate": 4.870869202999167e-05, + "loss": 2.7509, + "step": 658 + }, + { + "epoch": 0.01, + "learning_rate": 4.870406368601315e-05, + "loss": 1.967, + "step": 660 + }, + { + "epoch": 0.01, + "learning_rate": 4.869943534203462e-05, + "loss": 5.5098, + "step": 662 + }, + { + "epoch": 0.01, + "learning_rate": 4.86948069980561e-05, + "loss": 0.5408, + "step": 664 + }, + { + "epoch": 0.01, + "learning_rate": 4.869017865407757e-05, + "loss": 3.6871, + "step": 666 + }, + { + "epoch": 0.01, + "learning_rate": 4.868555031009905e-05, + "loss": 2.8948, + "step": 668 + }, + { + "epoch": 0.01, + "learning_rate": 4.868092196612052e-05, + "loss": 4.973, + "step": 670 + }, + { + "epoch": 0.01, + "learning_rate": 4.8676293622142e-05, + "loss": 5.2988, + "step": 672 + }, + { + "epoch": 0.01, + "learning_rate": 4.867166527816347e-05, + "loss": 3.648, + "step": 674 + }, + { + "epoch": 0.01, + "learning_rate": 4.866703693418495e-05, + "loss": 2.7992, + "step": 676 + }, + { + "epoch": 0.01, + "learning_rate": 4.8662408590206424e-05, + "loss": 2.3377, + "step": 678 + }, + { + "epoch": 0.01, + "learning_rate": 4.86577802462279e-05, + "loss": 2.461, + "step": 680 + }, + { + "epoch": 0.01, + "learning_rate": 4.8653151902249375e-05, + "loss": 4.7363, + "step": 682 + }, + { + "epoch": 0.01, + "learning_rate": 4.8648523558270853e-05, + "loss": 3.8594, + "step": 684 + }, + { + "epoch": 0.01, + "learning_rate": 4.8643895214292326e-05, + "loss": 4.734, + "step": 686 + }, + { + "epoch": 0.01, + "learning_rate": 4.8639266870313804e-05, + "loss": 3.6982, + "step": 688 + }, + { + "epoch": 0.01, + "learning_rate": 4.863463852633528e-05, + "loss": 4.6073, + "step": 690 + }, + { + "epoch": 0.01, + "learning_rate": 4.8630010182356755e-05, + "loss": 3.7078, + "step": 692 + }, + { + "epoch": 0.01, + "learning_rate": 4.8625381838378234e-05, + "loss": 3.6916, + "step": 694 + }, + { + "epoch": 0.01, + "learning_rate": 4.8620753494399706e-05, + "loss": 5.4478, + "step": 696 + }, + { + "epoch": 0.01, + "learning_rate": 4.8616125150421185e-05, + "loss": 3.211, + "step": 698 + }, + { + "epoch": 0.01, + "learning_rate": 4.861149680644266e-05, + "loss": 2.4355, + "step": 700 + }, + { + "epoch": 0.01, + "learning_rate": 4.8606868462464136e-05, + "loss": 1.4907, + "step": 702 + }, + { + "epoch": 0.01, + "learning_rate": 4.860224011848561e-05, + "loss": 1.1325, + "step": 704 + }, + { + "epoch": 0.01, + "learning_rate": 4.8597611774507087e-05, + "loss": 1.8143, + "step": 706 + }, + { + "epoch": 0.01, + "learning_rate": 4.859298343052856e-05, + "loss": 0.7965, + "step": 708 + }, + { + "epoch": 0.01, + "learning_rate": 4.858835508655004e-05, + "loss": 2.4407, + "step": 710 + }, + { + "epoch": 0.01, + "learning_rate": 4.858372674257151e-05, + "loss": 4.7933, + "step": 712 + }, + { + "epoch": 0.01, + "learning_rate": 4.857909839859299e-05, + "loss": 0.0581, + "step": 714 + }, + { + "epoch": 0.01, + "learning_rate": 4.857447005461446e-05, + "loss": 2.051, + "step": 716 + }, + { + "epoch": 0.01, + "learning_rate": 4.856984171063594e-05, + "loss": 2.0997, + "step": 718 + }, + { + "epoch": 0.01, + "learning_rate": 4.856521336665741e-05, + "loss": 0.146, + "step": 720 + }, + { + "epoch": 0.01, + "learning_rate": 4.856058502267889e-05, + "loss": 0.2953, + "step": 722 + }, + { + "epoch": 0.01, + "learning_rate": 4.855595667870036e-05, + "loss": 3.3334, + "step": 724 + }, + { + "epoch": 0.01, + "learning_rate": 4.855132833472184e-05, + "loss": 0.3533, + "step": 726 + }, + { + "epoch": 0.01, + "learning_rate": 4.854669999074332e-05, + "loss": 0.0405, + "step": 728 + }, + { + "epoch": 0.01, + "learning_rate": 4.854207164676479e-05, + "loss": 3.2629, + "step": 730 + }, + { + "epoch": 0.01, + "learning_rate": 4.853744330278627e-05, + "loss": 6.1213, + "step": 732 + }, + { + "epoch": 0.01, + "learning_rate": 4.853281495880774e-05, + "loss": 7.558, + "step": 734 + }, + { + "epoch": 0.01, + "learning_rate": 4.852818661482922e-05, + "loss": 5.9161, + "step": 736 + }, + { + "epoch": 0.01, + "learning_rate": 4.852355827085069e-05, + "loss": 4.3886, + "step": 738 + }, + { + "epoch": 0.01, + "learning_rate": 4.851892992687217e-05, + "loss": 4.0207, + "step": 740 + }, + { + "epoch": 0.01, + "learning_rate": 4.8514301582893644e-05, + "loss": 3.3258, + "step": 742 + }, + { + "epoch": 0.01, + "learning_rate": 4.850967323891512e-05, + "loss": 2.4253, + "step": 744 + }, + { + "epoch": 0.01, + "learning_rate": 4.8505044894936595e-05, + "loss": 3.8694, + "step": 746 + }, + { + "epoch": 0.01, + "learning_rate": 4.8500416550958074e-05, + "loss": 3.3424, + "step": 748 + }, + { + "epoch": 0.01, + "learning_rate": 4.8495788206979546e-05, + "loss": 2.9284, + "step": 750 + }, + { + "epoch": 0.01, + "learning_rate": 4.8491159863001025e-05, + "loss": 1.9573, + "step": 752 + }, + { + "epoch": 0.01, + "learning_rate": 4.84865315190225e-05, + "loss": 3.541, + "step": 754 + }, + { + "epoch": 0.01, + "learning_rate": 4.848190317504397e-05, + "loss": 4.2104, + "step": 756 + }, + { + "epoch": 0.01, + "learning_rate": 4.847727483106545e-05, + "loss": 2.616, + "step": 758 + }, + { + "epoch": 0.01, + "learning_rate": 4.847264648708692e-05, + "loss": 2.9753, + "step": 760 + }, + { + "epoch": 0.01, + "learning_rate": 4.84680181431084e-05, + "loss": 3.9563, + "step": 762 + }, + { + "epoch": 0.01, + "learning_rate": 4.846338979912987e-05, + "loss": 1.3559, + "step": 764 + }, + { + "epoch": 0.01, + "learning_rate": 4.845876145515135e-05, + "loss": 3.4534, + "step": 766 + }, + { + "epoch": 0.01, + "learning_rate": 4.845413311117282e-05, + "loss": 3.7294, + "step": 768 + }, + { + "epoch": 0.01, + "learning_rate": 4.84495047671943e-05, + "loss": 4.3839, + "step": 770 + }, + { + "epoch": 0.01, + "learning_rate": 4.844487642321577e-05, + "loss": 0.8009, + "step": 772 + }, + { + "epoch": 0.01, + "learning_rate": 4.844024807923725e-05, + "loss": 2.3561, + "step": 774 + }, + { + "epoch": 0.01, + "learning_rate": 4.843561973525872e-05, + "loss": 4.2417, + "step": 776 + }, + { + "epoch": 0.01, + "learning_rate": 4.84309913912802e-05, + "loss": 2.0557, + "step": 778 + }, + { + "epoch": 0.01, + "learning_rate": 4.8426363047301674e-05, + "loss": 2.45, + "step": 780 + }, + { + "epoch": 0.01, + "learning_rate": 4.842173470332315e-05, + "loss": 2.6521, + "step": 782 + }, + { + "epoch": 0.01, + "learning_rate": 4.8417106359344625e-05, + "loss": 1.4045, + "step": 784 + }, + { + "epoch": 0.01, + "learning_rate": 4.8412478015366104e-05, + "loss": 1.2441, + "step": 786 + }, + { + "epoch": 0.01, + "learning_rate": 4.8407849671387576e-05, + "loss": 4.9924, + "step": 788 + }, + { + "epoch": 0.01, + "learning_rate": 4.8403221327409054e-05, + "loss": 4.7287, + "step": 790 + }, + { + "epoch": 0.01, + "learning_rate": 4.8398592983430526e-05, + "loss": 4.5657, + "step": 792 + }, + { + "epoch": 0.01, + "learning_rate": 4.8393964639452005e-05, + "loss": 0.8063, + "step": 794 + }, + { + "epoch": 0.01, + "learning_rate": 4.8389336295473484e-05, + "loss": 1.8242, + "step": 796 + }, + { + "epoch": 0.01, + "learning_rate": 4.8384707951494956e-05, + "loss": 3.2677, + "step": 798 + }, + { + "epoch": 0.01, + "learning_rate": 4.8380079607516435e-05, + "loss": 0.5575, + "step": 800 + }, + { + "epoch": 0.01, + "learning_rate": 4.837545126353791e-05, + "loss": 2.2497, + "step": 802 + }, + { + "epoch": 0.01, + "learning_rate": 4.8370822919559386e-05, + "loss": 6.6717, + "step": 804 + }, + { + "epoch": 0.01, + "learning_rate": 4.836619457558086e-05, + "loss": 0.9386, + "step": 806 + }, + { + "epoch": 0.01, + "learning_rate": 4.8361566231602337e-05, + "loss": 1.7609, + "step": 808 + }, + { + "epoch": 0.01, + "learning_rate": 4.835693788762381e-05, + "loss": 4.1894, + "step": 810 + }, + { + "epoch": 0.01, + "learning_rate": 4.835230954364529e-05, + "loss": 1.7989, + "step": 812 + }, + { + "epoch": 0.01, + "learning_rate": 4.834768119966676e-05, + "loss": 3.2573, + "step": 814 + }, + { + "epoch": 0.01, + "learning_rate": 4.834305285568824e-05, + "loss": 2.7204, + "step": 816 + }, + { + "epoch": 0.01, + "learning_rate": 4.833842451170971e-05, + "loss": 1.7687, + "step": 818 + }, + { + "epoch": 0.01, + "learning_rate": 4.833379616773119e-05, + "loss": 2.5711, + "step": 820 + }, + { + "epoch": 0.01, + "learning_rate": 4.832916782375266e-05, + "loss": 1.7332, + "step": 822 + }, + { + "epoch": 0.01, + "learning_rate": 4.832453947977414e-05, + "loss": 2.1463, + "step": 824 + }, + { + "epoch": 0.01, + "learning_rate": 4.831991113579561e-05, + "loss": 2.1947, + "step": 826 + }, + { + "epoch": 0.01, + "learning_rate": 4.831528279181709e-05, + "loss": 0.4706, + "step": 828 + }, + { + "epoch": 0.01, + "learning_rate": 4.831065444783856e-05, + "loss": 3.9636, + "step": 830 + }, + { + "epoch": 0.01, + "learning_rate": 4.830602610386004e-05, + "loss": 0.602, + "step": 832 + }, + { + "epoch": 0.01, + "learning_rate": 4.8301397759881514e-05, + "loss": 1.0117, + "step": 834 + }, + { + "epoch": 0.01, + "learning_rate": 4.829676941590299e-05, + "loss": 1.9096, + "step": 836 + }, + { + "epoch": 0.01, + "learning_rate": 4.829214107192447e-05, + "loss": 1.9683, + "step": 838 + }, + { + "epoch": 0.01, + "learning_rate": 4.828751272794594e-05, + "loss": 1.4493, + "step": 840 + }, + { + "epoch": 0.01, + "learning_rate": 4.828288438396742e-05, + "loss": 4.6948, + "step": 842 + }, + { + "epoch": 0.01, + "learning_rate": 4.8278256039988894e-05, + "loss": 3.917, + "step": 844 + }, + { + "epoch": 0.01, + "learning_rate": 4.827362769601037e-05, + "loss": 2.5498, + "step": 846 + }, + { + "epoch": 0.01, + "learning_rate": 4.8268999352031845e-05, + "loss": 1.7735, + "step": 848 + }, + { + "epoch": 0.01, + "learning_rate": 4.8264371008053324e-05, + "loss": 3.3149, + "step": 850 + }, + { + "epoch": 0.01, + "learning_rate": 4.8259742664074796e-05, + "loss": 4.0292, + "step": 852 + }, + { + "epoch": 0.01, + "learning_rate": 4.8255114320096275e-05, + "loss": 1.8101, + "step": 854 + }, + { + "epoch": 0.01, + "learning_rate": 4.825048597611775e-05, + "loss": 0.3017, + "step": 856 + }, + { + "epoch": 0.01, + "learning_rate": 4.8245857632139226e-05, + "loss": 1.8147, + "step": 858 + }, + { + "epoch": 0.01, + "learning_rate": 4.82412292881607e-05, + "loss": 2.2203, + "step": 860 + }, + { + "epoch": 0.01, + "learning_rate": 4.8236600944182176e-05, + "loss": 5.7717, + "step": 862 + }, + { + "epoch": 0.01, + "learning_rate": 4.823197260020365e-05, + "loss": 2.463, + "step": 864 + }, + { + "epoch": 0.01, + "learning_rate": 4.822734425622513e-05, + "loss": 3.7682, + "step": 866 + }, + { + "epoch": 0.01, + "learning_rate": 4.82227159122466e-05, + "loss": 3.6976, + "step": 868 + }, + { + "epoch": 0.01, + "learning_rate": 4.821808756826808e-05, + "loss": 1.8342, + "step": 870 + }, + { + "epoch": 0.01, + "learning_rate": 4.821345922428955e-05, + "loss": 2.4817, + "step": 872 + }, + { + "epoch": 0.01, + "learning_rate": 4.820883088031103e-05, + "loss": 2.0065, + "step": 874 + }, + { + "epoch": 0.01, + "learning_rate": 4.820420253633251e-05, + "loss": 1.5848, + "step": 876 + }, + { + "epoch": 0.01, + "learning_rate": 4.819957419235398e-05, + "loss": 2.1813, + "step": 878 + }, + { + "epoch": 0.01, + "learning_rate": 4.819494584837546e-05, + "loss": 1.1161, + "step": 880 + }, + { + "epoch": 0.01, + "learning_rate": 4.819031750439693e-05, + "loss": 2.0882, + "step": 882 + }, + { + "epoch": 0.01, + "learning_rate": 4.818568916041841e-05, + "loss": 3.7063, + "step": 884 + }, + { + "epoch": 0.01, + "learning_rate": 4.818106081643988e-05, + "loss": 6.5713, + "step": 886 + }, + { + "epoch": 0.01, + "learning_rate": 4.817643247246136e-05, + "loss": 1.6113, + "step": 888 + }, + { + "epoch": 0.01, + "learning_rate": 4.817180412848283e-05, + "loss": 1.4084, + "step": 890 + }, + { + "epoch": 0.01, + "learning_rate": 4.816717578450431e-05, + "loss": 0.8458, + "step": 892 + }, + { + "epoch": 0.01, + "learning_rate": 4.816254744052578e-05, + "loss": 1.0353, + "step": 894 + }, + { + "epoch": 0.01, + "learning_rate": 4.815791909654726e-05, + "loss": 1.0656, + "step": 896 + }, + { + "epoch": 0.01, + "learning_rate": 4.815329075256873e-05, + "loss": 4.2288, + "step": 898 + }, + { + "epoch": 0.01, + "learning_rate": 4.8148662408590206e-05, + "loss": 4.648, + "step": 900 + }, + { + "epoch": 0.01, + "learning_rate": 4.8144034064611685e-05, + "loss": 1.6827, + "step": 902 + }, + { + "epoch": 0.01, + "learning_rate": 4.813940572063316e-05, + "loss": 0.1, + "step": 904 + }, + { + "epoch": 0.01, + "learning_rate": 4.8134777376654636e-05, + "loss": 2.2172, + "step": 906 + }, + { + "epoch": 0.01, + "learning_rate": 4.813014903267611e-05, + "loss": 4.0769, + "step": 908 + }, + { + "epoch": 0.01, + "learning_rate": 4.812552068869759e-05, + "loss": 2.6073, + "step": 910 + }, + { + "epoch": 0.01, + "learning_rate": 4.812089234471906e-05, + "loss": 1.4241, + "step": 912 + }, + { + "epoch": 0.01, + "learning_rate": 4.811626400074054e-05, + "loss": 1.6268, + "step": 914 + }, + { + "epoch": 0.01, + "learning_rate": 4.811163565676201e-05, + "loss": 1.5726, + "step": 916 + }, + { + "epoch": 0.01, + "learning_rate": 4.810700731278349e-05, + "loss": 2.3525, + "step": 918 + }, + { + "epoch": 0.01, + "learning_rate": 4.810237896880496e-05, + "loss": 2.3828, + "step": 920 + }, + { + "epoch": 0.01, + "learning_rate": 4.809775062482644e-05, + "loss": 6.4977, + "step": 922 + }, + { + "epoch": 0.01, + "learning_rate": 4.809312228084791e-05, + "loss": 6.0585, + "step": 924 + }, + { + "epoch": 0.01, + "learning_rate": 4.808849393686939e-05, + "loss": 5.0878, + "step": 926 + }, + { + "epoch": 0.01, + "learning_rate": 4.808386559289086e-05, + "loss": 4.847, + "step": 928 + }, + { + "epoch": 0.01, + "learning_rate": 4.807923724891234e-05, + "loss": 3.2003, + "step": 930 + }, + { + "epoch": 0.01, + "learning_rate": 4.807460890493381e-05, + "loss": 2.2836, + "step": 932 + }, + { + "epoch": 0.01, + "learning_rate": 4.806998056095529e-05, + "loss": 3.1416, + "step": 934 + }, + { + "epoch": 0.01, + "learning_rate": 4.8065352216976764e-05, + "loss": 2.0173, + "step": 936 + }, + { + "epoch": 0.01, + "learning_rate": 4.806072387299824e-05, + "loss": 3.2826, + "step": 938 + }, + { + "epoch": 0.01, + "learning_rate": 4.8056095529019715e-05, + "loss": 2.8288, + "step": 940 + }, + { + "epoch": 0.01, + "learning_rate": 4.8051467185041193e-05, + "loss": 2.2719, + "step": 942 + }, + { + "epoch": 0.01, + "learning_rate": 4.804683884106267e-05, + "loss": 1.6449, + "step": 944 + }, + { + "epoch": 0.01, + "learning_rate": 4.8042210497084144e-05, + "loss": 2.3678, + "step": 946 + }, + { + "epoch": 0.01, + "learning_rate": 4.803758215310562e-05, + "loss": 2.3104, + "step": 948 + }, + { + "epoch": 0.01, + "learning_rate": 4.8032953809127095e-05, + "loss": 0.9652, + "step": 950 + }, + { + "epoch": 0.01, + "learning_rate": 4.8028325465148574e-05, + "loss": 6.1099, + "step": 952 + }, + { + "epoch": 0.01, + "learning_rate": 4.8023697121170046e-05, + "loss": 0.4649, + "step": 954 + }, + { + "epoch": 0.01, + "learning_rate": 4.8019068777191525e-05, + "loss": 2.009, + "step": 956 + }, + { + "epoch": 0.01, + "learning_rate": 4.8014440433213e-05, + "loss": 2.3097, + "step": 958 + }, + { + "epoch": 0.01, + "learning_rate": 4.8009812089234476e-05, + "loss": 0.0982, + "step": 960 + }, + { + "epoch": 0.01, + "learning_rate": 4.800518374525595e-05, + "loss": 1.0323, + "step": 962 + }, + { + "epoch": 0.01, + "learning_rate": 4.8000555401277426e-05, + "loss": 3.1926, + "step": 964 + }, + { + "epoch": 0.01, + "learning_rate": 4.79959270572989e-05, + "loss": 2.0062, + "step": 966 + }, + { + "epoch": 0.01, + "learning_rate": 4.799129871332038e-05, + "loss": 2.9063, + "step": 968 + }, + { + "epoch": 0.01, + "learning_rate": 4.798667036934185e-05, + "loss": 2.1665, + "step": 970 + }, + { + "epoch": 0.01, + "learning_rate": 4.798204202536333e-05, + "loss": 0.1262, + "step": 972 + }, + { + "epoch": 0.01, + "learning_rate": 4.79774136813848e-05, + "loss": 5.0226, + "step": 974 + }, + { + "epoch": 0.01, + "learning_rate": 4.797278533740628e-05, + "loss": 1.4879, + "step": 976 + }, + { + "epoch": 0.01, + "learning_rate": 4.796815699342775e-05, + "loss": 4.3901, + "step": 978 + }, + { + "epoch": 0.01, + "learning_rate": 4.796352864944923e-05, + "loss": 2.0089, + "step": 980 + }, + { + "epoch": 0.01, + "learning_rate": 4.795890030547071e-05, + "loss": 2.4167, + "step": 982 + }, + { + "epoch": 0.01, + "learning_rate": 4.795427196149218e-05, + "loss": 2.0942, + "step": 984 + }, + { + "epoch": 0.01, + "learning_rate": 4.794964361751366e-05, + "loss": 2.2375, + "step": 986 + }, + { + "epoch": 0.01, + "learning_rate": 4.794501527353513e-05, + "loss": 1.3551, + "step": 988 + }, + { + "epoch": 0.01, + "learning_rate": 4.794038692955661e-05, + "loss": 3.0897, + "step": 990 + }, + { + "epoch": 0.01, + "learning_rate": 4.793575858557808e-05, + "loss": 3.2452, + "step": 992 + }, + { + "epoch": 0.01, + "learning_rate": 4.793113024159956e-05, + "loss": 0.1318, + "step": 994 + }, + { + "epoch": 0.01, + "learning_rate": 4.792650189762103e-05, + "loss": 3.9303, + "step": 996 + }, + { + "epoch": 0.01, + "learning_rate": 4.792187355364251e-05, + "loss": 6.3771, + "step": 998 + }, + { + "epoch": 0.01, + "learning_rate": 4.7917245209663984e-05, + "loss": 5.8938, + "step": 1000 + }, + { + "epoch": 0.01, + "learning_rate": 4.791261686568546e-05, + "loss": 2.6714, + "step": 1002 + }, + { + "epoch": 0.01, + "learning_rate": 4.7907988521706935e-05, + "loss": 1.3251, + "step": 1004 + }, + { + "epoch": 0.01, + "learning_rate": 4.7903360177728414e-05, + "loss": 7.1614, + "step": 1006 + }, + { + "epoch": 0.01, + "learning_rate": 4.7898731833749886e-05, + "loss": 2.2772, + "step": 1008 + }, + { + "epoch": 0.01, + "learning_rate": 4.7894103489771365e-05, + "loss": 1.617, + "step": 1010 + }, + { + "epoch": 0.01, + "learning_rate": 4.788947514579284e-05, + "loss": 4.3641, + "step": 1012 + }, + { + "epoch": 0.01, + "learning_rate": 4.7884846801814315e-05, + "loss": 1.5456, + "step": 1014 + }, + { + "epoch": 0.01, + "learning_rate": 4.788021845783579e-05, + "loss": 4.6484, + "step": 1016 + }, + { + "epoch": 0.01, + "learning_rate": 4.7875590113857266e-05, + "loss": 4.0756, + "step": 1018 + }, + { + "epoch": 0.01, + "learning_rate": 4.787096176987874e-05, + "loss": 1.2109, + "step": 1020 + }, + { + "epoch": 0.01, + "learning_rate": 4.786633342590022e-05, + "loss": 1.7558, + "step": 1022 + }, + { + "epoch": 0.01, + "learning_rate": 4.7861705081921696e-05, + "loss": 2.5459, + "step": 1024 + }, + { + "epoch": 0.01, + "learning_rate": 4.785707673794317e-05, + "loss": 3.3481, + "step": 1026 + }, + { + "epoch": 0.01, + "learning_rate": 4.785244839396465e-05, + "loss": 3.3966, + "step": 1028 + }, + { + "epoch": 0.01, + "learning_rate": 4.784782004998612e-05, + "loss": 2.0624, + "step": 1030 + }, + { + "epoch": 0.01, + "learning_rate": 4.78431917060076e-05, + "loss": 0.0244, + "step": 1032 + }, + { + "epoch": 0.01, + "learning_rate": 4.783856336202907e-05, + "loss": 0.8578, + "step": 1034 + }, + { + "epoch": 0.01, + "learning_rate": 4.783393501805055e-05, + "loss": 2.8285, + "step": 1036 + }, + { + "epoch": 0.01, + "learning_rate": 4.782930667407202e-05, + "loss": 1.2024, + "step": 1038 + }, + { + "epoch": 0.01, + "learning_rate": 4.78246783300935e-05, + "loss": 0.2802, + "step": 1040 + }, + { + "epoch": 0.01, + "learning_rate": 4.782004998611497e-05, + "loss": 2.2741, + "step": 1042 + }, + { + "epoch": 0.01, + "learning_rate": 4.7815421642136443e-05, + "loss": 0.4118, + "step": 1044 + }, + { + "epoch": 0.01, + "learning_rate": 4.7810793298157915e-05, + "loss": 2.8413, + "step": 1046 + }, + { + "epoch": 0.01, + "learning_rate": 4.7806164954179394e-05, + "loss": 1.1633, + "step": 1048 + }, + { + "epoch": 0.01, + "learning_rate": 4.780153661020087e-05, + "loss": 3.6136, + "step": 1050 + }, + { + "epoch": 0.01, + "learning_rate": 4.7796908266222345e-05, + "loss": 0.0636, + "step": 1052 + }, + { + "epoch": 0.01, + "learning_rate": 4.7792279922243824e-05, + "loss": 1.746, + "step": 1054 + }, + { + "epoch": 0.01, + "learning_rate": 4.7787651578265296e-05, + "loss": 3.7166, + "step": 1056 + }, + { + "epoch": 0.01, + "learning_rate": 4.7783023234286775e-05, + "loss": 4.5057, + "step": 1058 + }, + { + "epoch": 0.01, + "learning_rate": 4.777839489030825e-05, + "loss": 2.6392, + "step": 1060 + }, + { + "epoch": 0.01, + "learning_rate": 4.7773766546329726e-05, + "loss": 2.7932, + "step": 1062 + }, + { + "epoch": 0.01, + "learning_rate": 4.77691382023512e-05, + "loss": 4.3139, + "step": 1064 + }, + { + "epoch": 0.01, + "learning_rate": 4.7764509858372677e-05, + "loss": 3.9198, + "step": 1066 + }, + { + "epoch": 0.01, + "learning_rate": 4.775988151439415e-05, + "loss": 0.9431, + "step": 1068 + }, + { + "epoch": 0.01, + "learning_rate": 4.775525317041563e-05, + "loss": 3.6874, + "step": 1070 + }, + { + "epoch": 0.01, + "learning_rate": 4.77506248264371e-05, + "loss": 1.9133, + "step": 1072 + }, + { + "epoch": 0.01, + "learning_rate": 4.774599648245858e-05, + "loss": 1.7569, + "step": 1074 + }, + { + "epoch": 0.01, + "learning_rate": 4.774136813848005e-05, + "loss": 5.1998, + "step": 1076 + }, + { + "epoch": 0.01, + "learning_rate": 4.773673979450153e-05, + "loss": 0.2728, + "step": 1078 + }, + { + "epoch": 0.01, + "learning_rate": 4.7732111450523e-05, + "loss": 4.4559, + "step": 1080 + }, + { + "epoch": 0.01, + "learning_rate": 4.772748310654448e-05, + "loss": 2.8929, + "step": 1082 + }, + { + "epoch": 0.01, + "learning_rate": 4.772285476256595e-05, + "loss": 0.0211, + "step": 1084 + }, + { + "epoch": 0.01, + "learning_rate": 4.771822641858743e-05, + "loss": 4.8431, + "step": 1086 + }, + { + "epoch": 0.01, + "learning_rate": 4.771359807460891e-05, + "loss": 0.0501, + "step": 1088 + }, + { + "epoch": 0.01, + "learning_rate": 4.770896973063038e-05, + "loss": 6.2431, + "step": 1090 + }, + { + "epoch": 0.01, + "learning_rate": 4.770434138665186e-05, + "loss": 1.4295, + "step": 1092 + }, + { + "epoch": 0.01, + "learning_rate": 4.769971304267333e-05, + "loss": 0.0235, + "step": 1094 + }, + { + "epoch": 0.01, + "learning_rate": 4.769508469869481e-05, + "loss": 2.5061, + "step": 1096 + }, + { + "epoch": 0.01, + "learning_rate": 4.769045635471628e-05, + "loss": 2.8102, + "step": 1098 + }, + { + "epoch": 0.01, + "learning_rate": 4.768582801073776e-05, + "loss": 10.0612, + "step": 1100 + }, + { + "epoch": 0.01, + "learning_rate": 4.7681199666759234e-05, + "loss": 0.1956, + "step": 1102 + }, + { + "epoch": 0.01, + "learning_rate": 4.767657132278071e-05, + "loss": 1.5776, + "step": 1104 + }, + { + "epoch": 0.01, + "learning_rate": 4.7671942978802185e-05, + "loss": 2.259, + "step": 1106 + }, + { + "epoch": 0.01, + "learning_rate": 4.7667314634823664e-05, + "loss": 1.4943, + "step": 1108 + }, + { + "epoch": 0.01, + "learning_rate": 4.7662686290845136e-05, + "loss": 4.6757, + "step": 1110 + }, + { + "epoch": 0.01, + "learning_rate": 4.7658057946866615e-05, + "loss": 3.1171, + "step": 1112 + }, + { + "epoch": 0.01, + "learning_rate": 4.765342960288809e-05, + "loss": 3.6605, + "step": 1114 + }, + { + "epoch": 0.01, + "learning_rate": 4.7648801258909566e-05, + "loss": 3.1163, + "step": 1116 + }, + { + "epoch": 0.01, + "learning_rate": 4.764417291493104e-05, + "loss": 1.498, + "step": 1118 + }, + { + "epoch": 0.01, + "learning_rate": 4.7639544570952516e-05, + "loss": 1.723, + "step": 1120 + }, + { + "epoch": 0.01, + "learning_rate": 4.763491622697399e-05, + "loss": 2.3644, + "step": 1122 + }, + { + "epoch": 0.01, + "learning_rate": 4.763028788299547e-05, + "loss": 2.5378, + "step": 1124 + }, + { + "epoch": 0.01, + "learning_rate": 4.762565953901694e-05, + "loss": 3.1757, + "step": 1126 + }, + { + "epoch": 0.01, + "learning_rate": 4.762103119503842e-05, + "loss": 3.868, + "step": 1128 + }, + { + "epoch": 0.01, + "learning_rate": 4.76164028510599e-05, + "loss": 3.4017, + "step": 1130 + }, + { + "epoch": 0.01, + "learning_rate": 4.761177450708137e-05, + "loss": 0.5591, + "step": 1132 + }, + { + "epoch": 0.01, + "learning_rate": 4.760714616310285e-05, + "loss": 0.0602, + "step": 1134 + }, + { + "epoch": 0.01, + "learning_rate": 4.760251781912432e-05, + "loss": 2.1817, + "step": 1136 + }, + { + "epoch": 0.01, + "learning_rate": 4.75978894751458e-05, + "loss": 1.0818, + "step": 1138 + }, + { + "epoch": 0.01, + "learning_rate": 4.759326113116727e-05, + "loss": 2.8268, + "step": 1140 + }, + { + "epoch": 0.01, + "learning_rate": 4.758863278718875e-05, + "loss": 0.0036, + "step": 1142 + }, + { + "epoch": 0.01, + "learning_rate": 4.758400444321022e-05, + "loss": 0.088, + "step": 1144 + }, + { + "epoch": 0.01, + "learning_rate": 4.75793760992317e-05, + "loss": 0.8022, + "step": 1146 + }, + { + "epoch": 0.01, + "learning_rate": 4.757474775525317e-05, + "loss": 1.0228, + "step": 1148 + }, + { + "epoch": 0.01, + "learning_rate": 4.757011941127465e-05, + "loss": 0.7927, + "step": 1150 + }, + { + "epoch": 0.01, + "learning_rate": 4.756549106729612e-05, + "loss": 0.0035, + "step": 1152 + }, + { + "epoch": 0.01, + "learning_rate": 4.75608627233176e-05, + "loss": 0.0874, + "step": 1154 + }, + { + "epoch": 0.01, + "learning_rate": 4.7556234379339074e-05, + "loss": 0.0305, + "step": 1156 + }, + { + "epoch": 0.01, + "learning_rate": 4.755160603536055e-05, + "loss": 6.2571, + "step": 1158 + }, + { + "epoch": 0.01, + "learning_rate": 4.7546977691382025e-05, + "loss": 8.5162, + "step": 1160 + }, + { + "epoch": 0.01, + "learning_rate": 4.7542349347403504e-05, + "loss": 12.1018, + "step": 1162 + }, + { + "epoch": 0.01, + "learning_rate": 4.7537721003424976e-05, + "loss": 3.5762, + "step": 1164 + }, + { + "epoch": 0.01, + "learning_rate": 4.7533092659446455e-05, + "loss": 2.0671, + "step": 1166 + }, + { + "epoch": 0.01, + "learning_rate": 4.7528464315467927e-05, + "loss": 3.1923, + "step": 1168 + }, + { + "epoch": 0.01, + "learning_rate": 4.7523835971489405e-05, + "loss": 3.4033, + "step": 1170 + }, + { + "epoch": 0.01, + "learning_rate": 4.7519207627510884e-05, + "loss": 3.8199, + "step": 1172 + }, + { + "epoch": 0.01, + "learning_rate": 4.7514579283532356e-05, + "loss": 3.4156, + "step": 1174 + }, + { + "epoch": 0.01, + "learning_rate": 4.7509950939553835e-05, + "loss": 2.3559, + "step": 1176 + }, + { + "epoch": 0.01, + "learning_rate": 4.750532259557531e-05, + "loss": 4.286, + "step": 1178 + }, + { + "epoch": 0.01, + "learning_rate": 4.7500694251596786e-05, + "loss": 3.4982, + "step": 1180 + }, + { + "epoch": 0.01, + "learning_rate": 4.749606590761826e-05, + "loss": 2.1, + "step": 1182 + }, + { + "epoch": 0.01, + "learning_rate": 4.749143756363974e-05, + "loss": 5.3904, + "step": 1184 + }, + { + "epoch": 0.01, + "learning_rate": 4.748680921966121e-05, + "loss": 0.4751, + "step": 1186 + }, + { + "epoch": 0.01, + "learning_rate": 4.748218087568268e-05, + "loss": 3.8687, + "step": 1188 + }, + { + "epoch": 0.01, + "learning_rate": 4.747755253170415e-05, + "loss": 0.5556, + "step": 1190 + }, + { + "epoch": 0.01, + "learning_rate": 4.747292418772563e-05, + "loss": 3.1869, + "step": 1192 + }, + { + "epoch": 0.01, + "learning_rate": 4.7468295843747104e-05, + "loss": 5.0351, + "step": 1194 + }, + { + "epoch": 0.01, + "learning_rate": 4.746366749976858e-05, + "loss": 2.7529, + "step": 1196 + }, + { + "epoch": 0.01, + "learning_rate": 4.745903915579006e-05, + "loss": 2.7592, + "step": 1198 + }, + { + "epoch": 0.01, + "learning_rate": 4.745441081181153e-05, + "loss": 4.916, + "step": 1200 + }, + { + "epoch": 0.01, + "learning_rate": 4.744978246783301e-05, + "loss": 1.9435, + "step": 1202 + }, + { + "epoch": 0.01, + "learning_rate": 4.7445154123854484e-05, + "loss": 1.746, + "step": 1204 + }, + { + "epoch": 0.01, + "learning_rate": 4.744052577987596e-05, + "loss": 3.8881, + "step": 1206 + }, + { + "epoch": 0.01, + "learning_rate": 4.7435897435897435e-05, + "loss": 2.4149, + "step": 1208 + }, + { + "epoch": 0.01, + "learning_rate": 4.7431269091918914e-05, + "loss": 3.1034, + "step": 1210 + }, + { + "epoch": 0.01, + "learning_rate": 4.7426640747940386e-05, + "loss": 4.1012, + "step": 1212 + }, + { + "epoch": 0.01, + "learning_rate": 4.7422012403961865e-05, + "loss": 2.249, + "step": 1214 + }, + { + "epoch": 0.01, + "learning_rate": 4.741738405998334e-05, + "loss": 5.4898, + "step": 1216 + }, + { + "epoch": 0.01, + "learning_rate": 4.7412755716004816e-05, + "loss": 3.3004, + "step": 1218 + }, + { + "epoch": 0.01, + "learning_rate": 4.740812737202629e-05, + "loss": 4.122, + "step": 1220 + }, + { + "epoch": 0.01, + "learning_rate": 4.7403499028047766e-05, + "loss": 5.1891, + "step": 1222 + }, + { + "epoch": 0.01, + "learning_rate": 4.739887068406924e-05, + "loss": 2.6417, + "step": 1224 + }, + { + "epoch": 0.01, + "learning_rate": 4.739424234009072e-05, + "loss": 1.9748, + "step": 1226 + }, + { + "epoch": 0.01, + "learning_rate": 4.738961399611219e-05, + "loss": 3.2174, + "step": 1228 + }, + { + "epoch": 0.01, + "learning_rate": 4.738498565213367e-05, + "loss": 4.7975, + "step": 1230 + }, + { + "epoch": 0.01, + "learning_rate": 4.738035730815514e-05, + "loss": 1.3937, + "step": 1232 + }, + { + "epoch": 0.01, + "learning_rate": 4.737572896417662e-05, + "loss": 0.4165, + "step": 1234 + }, + { + "epoch": 0.01, + "learning_rate": 4.73711006201981e-05, + "loss": 3.279, + "step": 1236 + }, + { + "epoch": 0.01, + "learning_rate": 4.736647227621957e-05, + "loss": 1.9009, + "step": 1238 + }, + { + "epoch": 0.01, + "learning_rate": 4.736184393224105e-05, + "loss": 3.4443, + "step": 1240 + }, + { + "epoch": 0.01, + "learning_rate": 4.735721558826252e-05, + "loss": 5.6324, + "step": 1242 + }, + { + "epoch": 0.01, + "learning_rate": 4.7352587244284e-05, + "loss": 3.6197, + "step": 1244 + }, + { + "epoch": 0.01, + "learning_rate": 4.734795890030547e-05, + "loss": 5.076, + "step": 1246 + }, + { + "epoch": 0.01, + "learning_rate": 4.734333055632695e-05, + "loss": 0.6866, + "step": 1248 + }, + { + "epoch": 0.01, + "learning_rate": 4.733870221234842e-05, + "loss": 4.533, + "step": 1250 + }, + { + "epoch": 0.01, + "learning_rate": 4.73340738683699e-05, + "loss": 1.2522, + "step": 1252 + }, + { + "epoch": 0.01, + "learning_rate": 4.732944552439137e-05, + "loss": 3.1234, + "step": 1254 + }, + { + "epoch": 0.01, + "learning_rate": 4.732481718041285e-05, + "loss": 2.1485, + "step": 1256 + }, + { + "epoch": 0.01, + "learning_rate": 4.7320188836434324e-05, + "loss": 1.3479, + "step": 1258 + }, + { + "epoch": 0.01, + "learning_rate": 4.73155604924558e-05, + "loss": 0.9807, + "step": 1260 + }, + { + "epoch": 0.01, + "learning_rate": 4.7310932148477275e-05, + "loss": 2.0259, + "step": 1262 + }, + { + "epoch": 0.01, + "learning_rate": 4.7306303804498754e-05, + "loss": 3.2539, + "step": 1264 + }, + { + "epoch": 0.01, + "learning_rate": 4.7301675460520226e-05, + "loss": 1.5512, + "step": 1266 + }, + { + "epoch": 0.01, + "learning_rate": 4.7297047116541705e-05, + "loss": 0.6629, + "step": 1268 + }, + { + "epoch": 0.01, + "learning_rate": 4.7292418772563177e-05, + "loss": 0.94, + "step": 1270 + }, + { + "epoch": 0.01, + "learning_rate": 4.7287790428584655e-05, + "loss": 1.3583, + "step": 1272 + }, + { + "epoch": 0.01, + "learning_rate": 4.728316208460613e-05, + "loss": 0.0072, + "step": 1274 + }, + { + "epoch": 0.01, + "learning_rate": 4.7278533740627606e-05, + "loss": 3.2477, + "step": 1276 + }, + { + "epoch": 0.01, + "learning_rate": 4.7273905396649085e-05, + "loss": 4.2692, + "step": 1278 + }, + { + "epoch": 0.01, + "learning_rate": 4.726927705267056e-05, + "loss": 4.784, + "step": 1280 + }, + { + "epoch": 0.01, + "learning_rate": 4.7264648708692036e-05, + "loss": 2.0589, + "step": 1282 + }, + { + "epoch": 0.01, + "learning_rate": 4.726002036471351e-05, + "loss": 0.7848, + "step": 1284 + }, + { + "epoch": 0.01, + "learning_rate": 4.725539202073499e-05, + "loss": 1.1568, + "step": 1286 + }, + { + "epoch": 0.01, + "learning_rate": 4.725076367675646e-05, + "loss": 5.0926, + "step": 1288 + }, + { + "epoch": 0.01, + "learning_rate": 4.724613533277794e-05, + "loss": 2.0791, + "step": 1290 + }, + { + "epoch": 0.01, + "learning_rate": 4.724150698879941e-05, + "loss": 3.311, + "step": 1292 + }, + { + "epoch": 0.01, + "learning_rate": 4.723687864482089e-05, + "loss": 3.7369, + "step": 1294 + }, + { + "epoch": 0.01, + "learning_rate": 4.723225030084236e-05, + "loss": 2.28, + "step": 1296 + }, + { + "epoch": 0.01, + "learning_rate": 4.722762195686384e-05, + "loss": 0.9142, + "step": 1298 + }, + { + "epoch": 0.01, + "learning_rate": 4.722299361288531e-05, + "loss": 4.9183, + "step": 1300 + }, + { + "epoch": 0.01, + "learning_rate": 4.721836526890679e-05, + "loss": 3.4118, + "step": 1302 + }, + { + "epoch": 0.02, + "learning_rate": 4.721373692492826e-05, + "loss": 4.4652, + "step": 1304 + }, + { + "epoch": 0.02, + "learning_rate": 4.720910858094974e-05, + "loss": 1.2692, + "step": 1306 + }, + { + "epoch": 0.02, + "learning_rate": 4.720448023697121e-05, + "loss": 2.6757, + "step": 1308 + }, + { + "epoch": 0.02, + "learning_rate": 4.719985189299269e-05, + "loss": 2.8063, + "step": 1310 + }, + { + "epoch": 0.02, + "learning_rate": 4.7195223549014164e-05, + "loss": 3.1885, + "step": 1312 + }, + { + "epoch": 0.02, + "learning_rate": 4.719059520503564e-05, + "loss": 0.8567, + "step": 1314 + }, + { + "epoch": 0.02, + "learning_rate": 4.718596686105712e-05, + "loss": 3.2438, + "step": 1316 + }, + { + "epoch": 0.02, + "learning_rate": 4.7181338517078594e-05, + "loss": 1.719, + "step": 1318 + }, + { + "epoch": 0.02, + "learning_rate": 4.717671017310007e-05, + "loss": 1.287, + "step": 1320 + }, + { + "epoch": 0.02, + "learning_rate": 4.7172081829121544e-05, + "loss": 1.4392, + "step": 1322 + }, + { + "epoch": 0.02, + "learning_rate": 4.716745348514302e-05, + "loss": 5.0394, + "step": 1324 + }, + { + "epoch": 0.02, + "learning_rate": 4.7162825141164495e-05, + "loss": 3.4047, + "step": 1326 + }, + { + "epoch": 0.02, + "learning_rate": 4.7158196797185974e-05, + "loss": 1.6015, + "step": 1328 + }, + { + "epoch": 0.02, + "learning_rate": 4.7153568453207446e-05, + "loss": 3.368, + "step": 1330 + }, + { + "epoch": 0.02, + "learning_rate": 4.7148940109228925e-05, + "loss": 1.796, + "step": 1332 + }, + { + "epoch": 0.02, + "learning_rate": 4.714431176525039e-05, + "loss": 0.9212, + "step": 1334 + }, + { + "epoch": 0.02, + "learning_rate": 4.713968342127187e-05, + "loss": 0.4147, + "step": 1336 + }, + { + "epoch": 0.02, + "learning_rate": 4.713505507729334e-05, + "loss": 0.2373, + "step": 1338 + }, + { + "epoch": 0.02, + "learning_rate": 4.713042673331482e-05, + "loss": 3.3293, + "step": 1340 + }, + { + "epoch": 0.02, + "learning_rate": 4.71257983893363e-05, + "loss": 8.4659, + "step": 1342 + }, + { + "epoch": 0.02, + "learning_rate": 4.712117004535777e-05, + "loss": 1.597, + "step": 1344 + }, + { + "epoch": 0.02, + "learning_rate": 4.711654170137925e-05, + "loss": 1.5281, + "step": 1346 + }, + { + "epoch": 0.02, + "learning_rate": 4.711191335740072e-05, + "loss": 0.0873, + "step": 1348 + }, + { + "epoch": 0.02, + "learning_rate": 4.71072850134222e-05, + "loss": 2.4538, + "step": 1350 + }, + { + "epoch": 0.02, + "learning_rate": 4.710265666944367e-05, + "loss": 3.7778, + "step": 1352 + }, + { + "epoch": 0.02, + "learning_rate": 4.709802832546515e-05, + "loss": 3.5011, + "step": 1354 + }, + { + "epoch": 0.02, + "learning_rate": 4.709339998148662e-05, + "loss": 2.4013, + "step": 1356 + }, + { + "epoch": 0.02, + "learning_rate": 4.70887716375081e-05, + "loss": 3.8523, + "step": 1358 + }, + { + "epoch": 0.02, + "learning_rate": 4.7084143293529574e-05, + "loss": 5.9043, + "step": 1360 + }, + { + "epoch": 0.02, + "learning_rate": 4.707951494955105e-05, + "loss": 3.6095, + "step": 1362 + }, + { + "epoch": 0.02, + "learning_rate": 4.7074886605572525e-05, + "loss": 2.0603, + "step": 1364 + }, + { + "epoch": 0.02, + "learning_rate": 4.7070258261594004e-05, + "loss": 1.9184, + "step": 1366 + }, + { + "epoch": 0.02, + "learning_rate": 4.7065629917615476e-05, + "loss": 2.148, + "step": 1368 + }, + { + "epoch": 0.02, + "learning_rate": 4.7061001573636955e-05, + "loss": 4.1726, + "step": 1370 + }, + { + "epoch": 0.02, + "learning_rate": 4.705637322965843e-05, + "loss": 2.5698, + "step": 1372 + }, + { + "epoch": 0.02, + "learning_rate": 4.7051744885679905e-05, + "loss": 2.2628, + "step": 1374 + }, + { + "epoch": 0.02, + "learning_rate": 4.704711654170138e-05, + "loss": 0.1632, + "step": 1376 + }, + { + "epoch": 0.02, + "learning_rate": 4.7042488197722856e-05, + "loss": 4.9712, + "step": 1378 + }, + { + "epoch": 0.02, + "learning_rate": 4.703785985374433e-05, + "loss": 2.634, + "step": 1380 + }, + { + "epoch": 0.02, + "learning_rate": 4.703323150976581e-05, + "loss": 1.1508, + "step": 1382 + }, + { + "epoch": 0.02, + "learning_rate": 4.7028603165787286e-05, + "loss": 6.1856, + "step": 1384 + }, + { + "epoch": 0.02, + "learning_rate": 4.702397482180876e-05, + "loss": 1.4108, + "step": 1386 + }, + { + "epoch": 0.02, + "learning_rate": 4.701934647783024e-05, + "loss": 3.7405, + "step": 1388 + }, + { + "epoch": 0.02, + "learning_rate": 4.701471813385171e-05, + "loss": 0.0315, + "step": 1390 + }, + { + "epoch": 0.02, + "learning_rate": 4.701008978987319e-05, + "loss": 4.4584, + "step": 1392 + }, + { + "epoch": 0.02, + "learning_rate": 4.700546144589466e-05, + "loss": 3.6878, + "step": 1394 + }, + { + "epoch": 0.02, + "learning_rate": 4.700083310191614e-05, + "loss": 2.3816, + "step": 1396 + }, + { + "epoch": 0.02, + "learning_rate": 4.699620475793761e-05, + "loss": 2.7244, + "step": 1398 + }, + { + "epoch": 0.02, + "learning_rate": 4.699157641395909e-05, + "loss": 0.7517, + "step": 1400 + }, + { + "epoch": 0.02, + "learning_rate": 4.698694806998056e-05, + "loss": 2.4456, + "step": 1402 + }, + { + "epoch": 0.02, + "learning_rate": 4.698231972600204e-05, + "loss": 2.1693, + "step": 1404 + }, + { + "epoch": 0.02, + "learning_rate": 4.697769138202351e-05, + "loss": 5.5787, + "step": 1406 + }, + { + "epoch": 0.02, + "learning_rate": 4.697306303804499e-05, + "loss": 3.1504, + "step": 1408 + }, + { + "epoch": 0.02, + "learning_rate": 4.696843469406646e-05, + "loss": 0.8895, + "step": 1410 + }, + { + "epoch": 0.02, + "learning_rate": 4.696380635008794e-05, + "loss": 3.5219, + "step": 1412 + }, + { + "epoch": 0.02, + "learning_rate": 4.6959178006109414e-05, + "loss": 1.7531, + "step": 1414 + }, + { + "epoch": 0.02, + "learning_rate": 4.695454966213089e-05, + "loss": 4.6263, + "step": 1416 + }, + { + "epoch": 0.02, + "learning_rate": 4.6949921318152365e-05, + "loss": 6.6263, + "step": 1418 + }, + { + "epoch": 0.02, + "learning_rate": 4.6945292974173844e-05, + "loss": 2.2338, + "step": 1420 + }, + { + "epoch": 0.02, + "learning_rate": 4.6940664630195316e-05, + "loss": 1.1218, + "step": 1422 + }, + { + "epoch": 0.02, + "learning_rate": 4.6936036286216794e-05, + "loss": 2.904, + "step": 1424 + }, + { + "epoch": 0.02, + "learning_rate": 4.693140794223827e-05, + "loss": 2.2043, + "step": 1426 + }, + { + "epoch": 0.02, + "learning_rate": 4.6926779598259745e-05, + "loss": 1.9168, + "step": 1428 + }, + { + "epoch": 0.02, + "learning_rate": 4.6922151254281224e-05, + "loss": 4.4268, + "step": 1430 + }, + { + "epoch": 0.02, + "learning_rate": 4.6917522910302696e-05, + "loss": 2.6503, + "step": 1432 + }, + { + "epoch": 0.02, + "learning_rate": 4.6912894566324175e-05, + "loss": 1.7103, + "step": 1434 + }, + { + "epoch": 0.02, + "learning_rate": 4.690826622234565e-05, + "loss": 0.2614, + "step": 1436 + }, + { + "epoch": 0.02, + "learning_rate": 4.6903637878367126e-05, + "loss": 4.9934, + "step": 1438 + }, + { + "epoch": 0.02, + "learning_rate": 4.68990095343886e-05, + "loss": 2.4072, + "step": 1440 + }, + { + "epoch": 0.02, + "learning_rate": 4.689438119041008e-05, + "loss": 4.9945, + "step": 1442 + }, + { + "epoch": 0.02, + "learning_rate": 4.688975284643155e-05, + "loss": 6.048, + "step": 1444 + }, + { + "epoch": 0.02, + "learning_rate": 4.688512450245303e-05, + "loss": 1.2272, + "step": 1446 + }, + { + "epoch": 0.02, + "learning_rate": 4.68804961584745e-05, + "loss": 5.4235, + "step": 1448 + }, + { + "epoch": 0.02, + "learning_rate": 4.687586781449598e-05, + "loss": 3.3449, + "step": 1450 + }, + { + "epoch": 0.02, + "learning_rate": 4.687123947051745e-05, + "loss": 3.3197, + "step": 1452 + }, + { + "epoch": 0.02, + "learning_rate": 4.686661112653893e-05, + "loss": 1.8447, + "step": 1454 + }, + { + "epoch": 0.02, + "learning_rate": 4.68619827825604e-05, + "loss": 2.2925, + "step": 1456 + }, + { + "epoch": 0.02, + "learning_rate": 4.685735443858188e-05, + "loss": 3.6103, + "step": 1458 + }, + { + "epoch": 0.02, + "learning_rate": 4.685272609460335e-05, + "loss": 0.566, + "step": 1460 + }, + { + "epoch": 0.02, + "learning_rate": 4.684809775062483e-05, + "loss": 4.8005, + "step": 1462 + }, + { + "epoch": 0.02, + "learning_rate": 4.684346940664631e-05, + "loss": 2.8564, + "step": 1464 + }, + { + "epoch": 0.02, + "learning_rate": 4.683884106266778e-05, + "loss": 3.8502, + "step": 1466 + }, + { + "epoch": 0.02, + "learning_rate": 4.683421271868926e-05, + "loss": 2.3621, + "step": 1468 + }, + { + "epoch": 0.02, + "learning_rate": 4.682958437471073e-05, + "loss": 1.4551, + "step": 1470 + }, + { + "epoch": 0.02, + "learning_rate": 4.682495603073221e-05, + "loss": 2.2992, + "step": 1472 + }, + { + "epoch": 0.02, + "learning_rate": 4.6820327686753683e-05, + "loss": 3.1056, + "step": 1474 + }, + { + "epoch": 0.02, + "learning_rate": 4.681569934277516e-05, + "loss": 2.9353, + "step": 1476 + }, + { + "epoch": 0.02, + "learning_rate": 4.681107099879663e-05, + "loss": 3.2521, + "step": 1478 + }, + { + "epoch": 0.02, + "learning_rate": 4.6806442654818106e-05, + "loss": 1.2604, + "step": 1480 + }, + { + "epoch": 0.02, + "learning_rate": 4.680181431083958e-05, + "loss": 3.3738, + "step": 1482 + }, + { + "epoch": 0.02, + "learning_rate": 4.679718596686106e-05, + "loss": 0.6927, + "step": 1484 + }, + { + "epoch": 0.02, + "learning_rate": 4.679255762288253e-05, + "loss": 2.8132, + "step": 1486 + }, + { + "epoch": 0.02, + "learning_rate": 4.678792927890401e-05, + "loss": 1.5098, + "step": 1488 + }, + { + "epoch": 0.02, + "learning_rate": 4.678330093492549e-05, + "loss": 3.0815, + "step": 1490 + }, + { + "epoch": 0.02, + "learning_rate": 4.677867259094696e-05, + "loss": 0.0186, + "step": 1492 + }, + { + "epoch": 0.02, + "learning_rate": 4.677404424696844e-05, + "loss": 2.8723, + "step": 1494 + }, + { + "epoch": 0.02, + "learning_rate": 4.676941590298991e-05, + "loss": 0.7, + "step": 1496 + }, + { + "epoch": 0.02, + "learning_rate": 4.676478755901139e-05, + "loss": 0.0015, + "step": 1498 + }, + { + "epoch": 0.02, + "learning_rate": 4.676015921503286e-05, + "loss": 8.2901, + "step": 1500 + }, + { + "epoch": 0.02, + "learning_rate": 4.675553087105434e-05, + "loss": 3.3564, + "step": 1502 + }, + { + "epoch": 0.02, + "learning_rate": 4.675090252707581e-05, + "loss": 4.3119, + "step": 1504 + }, + { + "epoch": 0.02, + "learning_rate": 4.674627418309729e-05, + "loss": 1.7273, + "step": 1506 + }, + { + "epoch": 0.02, + "learning_rate": 4.674164583911876e-05, + "loss": 6.9064, + "step": 1508 + }, + { + "epoch": 0.02, + "learning_rate": 4.673701749514024e-05, + "loss": 1.4491, + "step": 1510 + }, + { + "epoch": 0.02, + "learning_rate": 4.673238915116171e-05, + "loss": 2.1862, + "step": 1512 + }, + { + "epoch": 0.02, + "learning_rate": 4.672776080718319e-05, + "loss": 2.4411, + "step": 1514 + }, + { + "epoch": 0.02, + "learning_rate": 4.6723132463204664e-05, + "loss": 1.8262, + "step": 1516 + }, + { + "epoch": 0.02, + "learning_rate": 4.671850411922614e-05, + "loss": 9.0044, + "step": 1518 + }, + { + "epoch": 0.02, + "learning_rate": 4.6713875775247615e-05, + "loss": 2.2658, + "step": 1520 + }, + { + "epoch": 0.02, + "learning_rate": 4.6709247431269094e-05, + "loss": 4.8632, + "step": 1522 + }, + { + "epoch": 0.02, + "learning_rate": 4.6704619087290566e-05, + "loss": 1.8087, + "step": 1524 + }, + { + "epoch": 0.02, + "learning_rate": 4.6699990743312044e-05, + "loss": 3.459, + "step": 1526 + }, + { + "epoch": 0.02, + "learning_rate": 4.6695362399333517e-05, + "loss": 1.8268, + "step": 1528 + }, + { + "epoch": 0.02, + "learning_rate": 4.6690734055354995e-05, + "loss": 0.1453, + "step": 1530 + }, + { + "epoch": 0.02, + "learning_rate": 4.6686105711376474e-05, + "loss": 1.4866, + "step": 1532 + }, + { + "epoch": 0.02, + "learning_rate": 4.6681477367397946e-05, + "loss": 1.0276, + "step": 1534 + }, + { + "epoch": 0.02, + "learning_rate": 4.6676849023419425e-05, + "loss": 4.1424, + "step": 1536 + }, + { + "epoch": 0.02, + "learning_rate": 4.66722206794409e-05, + "loss": 10.1489, + "step": 1538 + }, + { + "epoch": 0.02, + "learning_rate": 4.6667592335462376e-05, + "loss": 4.4767, + "step": 1540 + }, + { + "epoch": 0.02, + "learning_rate": 4.666296399148385e-05, + "loss": 2.2751, + "step": 1542 + }, + { + "epoch": 0.02, + "learning_rate": 4.665833564750533e-05, + "loss": 2.2312, + "step": 1544 + }, + { + "epoch": 0.02, + "learning_rate": 4.66537073035268e-05, + "loss": 5.567, + "step": 1546 + }, + { + "epoch": 0.02, + "learning_rate": 4.664907895954828e-05, + "loss": 2.2757, + "step": 1548 + }, + { + "epoch": 0.02, + "learning_rate": 4.664445061556975e-05, + "loss": 2.6249, + "step": 1550 + }, + { + "epoch": 0.02, + "learning_rate": 4.663982227159123e-05, + "loss": 2.2077, + "step": 1552 + }, + { + "epoch": 0.02, + "learning_rate": 4.66351939276127e-05, + "loss": 0.018, + "step": 1554 + }, + { + "epoch": 0.02, + "learning_rate": 4.663056558363418e-05, + "loss": 2.4558, + "step": 1556 + }, + { + "epoch": 0.02, + "learning_rate": 4.662593723965565e-05, + "loss": 1.0583, + "step": 1558 + }, + { + "epoch": 0.02, + "learning_rate": 4.662130889567713e-05, + "loss": 1.729, + "step": 1560 + }, + { + "epoch": 0.02, + "learning_rate": 4.66166805516986e-05, + "loss": 1.484, + "step": 1562 + }, + { + "epoch": 0.02, + "learning_rate": 4.661205220772008e-05, + "loss": 3.7384, + "step": 1564 + }, + { + "epoch": 0.02, + "learning_rate": 4.660742386374155e-05, + "loss": 6.3257, + "step": 1566 + }, + { + "epoch": 0.02, + "learning_rate": 4.660279551976303e-05, + "loss": 3.1436, + "step": 1568 + }, + { + "epoch": 0.02, + "learning_rate": 4.659816717578451e-05, + "loss": 0.9981, + "step": 1570 + }, + { + "epoch": 0.02, + "learning_rate": 4.659353883180598e-05, + "loss": 2.2064, + "step": 1572 + }, + { + "epoch": 0.02, + "learning_rate": 4.658891048782746e-05, + "loss": 3.3594, + "step": 1574 + }, + { + "epoch": 0.02, + "learning_rate": 4.6584282143848933e-05, + "loss": 1.7759, + "step": 1576 + }, + { + "epoch": 0.02, + "learning_rate": 4.657965379987041e-05, + "loss": 1.3102, + "step": 1578 + }, + { + "epoch": 0.02, + "learning_rate": 4.6575025455891884e-05, + "loss": 3.0085, + "step": 1580 + }, + { + "epoch": 0.02, + "learning_rate": 4.657039711191336e-05, + "loss": 1.6332, + "step": 1582 + }, + { + "epoch": 0.02, + "learning_rate": 4.6565768767934835e-05, + "loss": 1.7029, + "step": 1584 + }, + { + "epoch": 0.02, + "learning_rate": 4.6561140423956314e-05, + "loss": 0.1442, + "step": 1586 + }, + { + "epoch": 0.02, + "learning_rate": 4.6556512079977786e-05, + "loss": 2.6524, + "step": 1588 + }, + { + "epoch": 0.02, + "learning_rate": 4.6551883735999265e-05, + "loss": 0.8463, + "step": 1590 + }, + { + "epoch": 0.02, + "learning_rate": 4.654725539202074e-05, + "loss": 0.0362, + "step": 1592 + }, + { + "epoch": 0.02, + "learning_rate": 4.6542627048042216e-05, + "loss": 2.4726, + "step": 1594 + }, + { + "epoch": 0.02, + "learning_rate": 4.653799870406369e-05, + "loss": 2.7328, + "step": 1596 + }, + { + "epoch": 0.02, + "learning_rate": 4.6533370360085167e-05, + "loss": 0.2323, + "step": 1598 + }, + { + "epoch": 0.02, + "learning_rate": 4.652874201610664e-05, + "loss": 0.1138, + "step": 1600 + }, + { + "epoch": 0.02, + "learning_rate": 4.652411367212812e-05, + "loss": 5.126, + "step": 1602 + }, + { + "epoch": 0.02, + "learning_rate": 4.651948532814959e-05, + "loss": 3.0895, + "step": 1604 + }, + { + "epoch": 0.02, + "learning_rate": 4.651485698417107e-05, + "loss": 2.0055, + "step": 1606 + }, + { + "epoch": 0.02, + "learning_rate": 4.651022864019254e-05, + "loss": 1.3187, + "step": 1608 + }, + { + "epoch": 0.02, + "learning_rate": 4.650560029621402e-05, + "loss": 2.1532, + "step": 1610 + }, + { + "epoch": 0.02, + "learning_rate": 4.65009719522355e-05, + "loss": 0.2399, + "step": 1612 + }, + { + "epoch": 0.02, + "learning_rate": 4.649634360825697e-05, + "loss": 9.1947, + "step": 1614 + }, + { + "epoch": 0.02, + "learning_rate": 4.649171526427845e-05, + "loss": 4.7747, + "step": 1616 + }, + { + "epoch": 0.02, + "learning_rate": 4.648708692029992e-05, + "loss": 5.3616, + "step": 1618 + }, + { + "epoch": 0.02, + "learning_rate": 4.64824585763214e-05, + "loss": 4.9785, + "step": 1620 + }, + { + "epoch": 0.02, + "learning_rate": 4.647783023234287e-05, + "loss": 2.9585, + "step": 1622 + }, + { + "epoch": 0.02, + "learning_rate": 4.6473201888364344e-05, + "loss": 3.2767, + "step": 1624 + }, + { + "epoch": 0.02, + "learning_rate": 4.6468573544385816e-05, + "loss": 4.0326, + "step": 1626 + }, + { + "epoch": 0.02, + "learning_rate": 4.6463945200407295e-05, + "loss": 1.9793, + "step": 1628 + }, + { + "epoch": 0.02, + "learning_rate": 4.6459316856428767e-05, + "loss": 1.3678, + "step": 1630 + }, + { + "epoch": 0.02, + "learning_rate": 4.6454688512450245e-05, + "loss": 2.1165, + "step": 1632 + }, + { + "epoch": 0.02, + "learning_rate": 4.645006016847172e-05, + "loss": 2.348, + "step": 1634 + }, + { + "epoch": 0.02, + "learning_rate": 4.6445431824493196e-05, + "loss": 1.477, + "step": 1636 + }, + { + "epoch": 0.02, + "learning_rate": 4.6440803480514675e-05, + "loss": 1.371, + "step": 1638 + }, + { + "epoch": 0.02, + "learning_rate": 4.643617513653615e-05, + "loss": 3.6031, + "step": 1640 + }, + { + "epoch": 0.02, + "learning_rate": 4.6431546792557626e-05, + "loss": 3.5409, + "step": 1642 + }, + { + "epoch": 0.02, + "learning_rate": 4.64269184485791e-05, + "loss": 2.3525, + "step": 1644 + }, + { + "epoch": 0.02, + "learning_rate": 4.642229010460058e-05, + "loss": 1.8493, + "step": 1646 + }, + { + "epoch": 0.02, + "learning_rate": 4.641766176062205e-05, + "loss": 5.3567, + "step": 1648 + }, + { + "epoch": 0.02, + "learning_rate": 4.641303341664353e-05, + "loss": 5.4484, + "step": 1650 + }, + { + "epoch": 0.02, + "learning_rate": 4.6408405072665e-05, + "loss": 0.9547, + "step": 1652 + }, + { + "epoch": 0.02, + "learning_rate": 4.640377672868648e-05, + "loss": 4.832, + "step": 1654 + }, + { + "epoch": 0.02, + "learning_rate": 4.639914838470795e-05, + "loss": 3.5683, + "step": 1656 + }, + { + "epoch": 0.02, + "learning_rate": 4.639452004072943e-05, + "loss": 2.5118, + "step": 1658 + }, + { + "epoch": 0.02, + "learning_rate": 4.63898916967509e-05, + "loss": 4.6071, + "step": 1660 + }, + { + "epoch": 0.02, + "learning_rate": 4.638526335277238e-05, + "loss": 4.4391, + "step": 1662 + }, + { + "epoch": 0.02, + "learning_rate": 4.638063500879385e-05, + "loss": 2.2106, + "step": 1664 + }, + { + "epoch": 0.02, + "learning_rate": 4.637600666481533e-05, + "loss": 0.224, + "step": 1666 + }, + { + "epoch": 0.02, + "learning_rate": 4.63713783208368e-05, + "loss": 1.7189, + "step": 1668 + }, + { + "epoch": 0.02, + "learning_rate": 4.636674997685828e-05, + "loss": 1.6031, + "step": 1670 + }, + { + "epoch": 0.02, + "learning_rate": 4.6362121632879754e-05, + "loss": 2.0248, + "step": 1672 + }, + { + "epoch": 0.02, + "learning_rate": 4.635749328890123e-05, + "loss": 2.8778, + "step": 1674 + }, + { + "epoch": 0.02, + "learning_rate": 4.635286494492271e-05, + "loss": 2.194, + "step": 1676 + }, + { + "epoch": 0.02, + "learning_rate": 4.6348236600944184e-05, + "loss": 7.6742, + "step": 1678 + }, + { + "epoch": 0.02, + "learning_rate": 4.634360825696566e-05, + "loss": 2.3626, + "step": 1680 + }, + { + "epoch": 0.02, + "learning_rate": 4.6338979912987134e-05, + "loss": 0.3874, + "step": 1682 + }, + { + "epoch": 0.02, + "learning_rate": 4.633435156900861e-05, + "loss": 0.5834, + "step": 1684 + }, + { + "epoch": 0.02, + "learning_rate": 4.6329723225030085e-05, + "loss": 2.0645, + "step": 1686 + }, + { + "epoch": 0.02, + "learning_rate": 4.6325094881051564e-05, + "loss": 5.1255, + "step": 1688 + }, + { + "epoch": 0.02, + "learning_rate": 4.6320466537073036e-05, + "loss": 2.7201, + "step": 1690 + }, + { + "epoch": 0.02, + "learning_rate": 4.6315838193094515e-05, + "loss": 0.5743, + "step": 1692 + }, + { + "epoch": 0.02, + "learning_rate": 4.631120984911599e-05, + "loss": 0.6093, + "step": 1694 + }, + { + "epoch": 0.02, + "learning_rate": 4.6306581505137466e-05, + "loss": 1.1557, + "step": 1696 + }, + { + "epoch": 0.02, + "learning_rate": 4.630195316115894e-05, + "loss": 6.0955, + "step": 1698 + }, + { + "epoch": 0.02, + "learning_rate": 4.6297324817180417e-05, + "loss": 2.3893, + "step": 1700 + }, + { + "epoch": 0.02, + "learning_rate": 4.629269647320189e-05, + "loss": 1.033, + "step": 1702 + }, + { + "epoch": 0.02, + "learning_rate": 4.628806812922337e-05, + "loss": 3.5923, + "step": 1704 + }, + { + "epoch": 0.02, + "learning_rate": 4.628343978524484e-05, + "loss": 1.1247, + "step": 1706 + }, + { + "epoch": 0.02, + "learning_rate": 4.627881144126632e-05, + "loss": 3.5498, + "step": 1708 + }, + { + "epoch": 0.02, + "learning_rate": 4.627418309728779e-05, + "loss": 1.8788, + "step": 1710 + }, + { + "epoch": 0.02, + "learning_rate": 4.626955475330927e-05, + "loss": 4.0412, + "step": 1712 + }, + { + "epoch": 0.02, + "learning_rate": 4.626492640933074e-05, + "loss": 0.8718, + "step": 1714 + }, + { + "epoch": 0.02, + "learning_rate": 4.626029806535222e-05, + "loss": 2.8557, + "step": 1716 + }, + { + "epoch": 0.02, + "learning_rate": 4.62556697213737e-05, + "loss": 1.8011, + "step": 1718 + }, + { + "epoch": 0.02, + "learning_rate": 4.625104137739517e-05, + "loss": 0.5937, + "step": 1720 + }, + { + "epoch": 0.02, + "learning_rate": 4.624641303341665e-05, + "loss": 5.9732, + "step": 1722 + }, + { + "epoch": 0.02, + "learning_rate": 4.624178468943812e-05, + "loss": 7.3994, + "step": 1724 + }, + { + "epoch": 0.02, + "learning_rate": 4.62371563454596e-05, + "loss": 6.1346, + "step": 1726 + }, + { + "epoch": 0.02, + "learning_rate": 4.623252800148107e-05, + "loss": 0.8119, + "step": 1728 + }, + { + "epoch": 0.02, + "learning_rate": 4.622789965750255e-05, + "loss": 2.5117, + "step": 1730 + }, + { + "epoch": 0.02, + "learning_rate": 4.622327131352402e-05, + "loss": 4.7919, + "step": 1732 + }, + { + "epoch": 0.02, + "learning_rate": 4.62186429695455e-05, + "loss": 3.0197, + "step": 1734 + }, + { + "epoch": 0.02, + "learning_rate": 4.6214014625566974e-05, + "loss": 2.4058, + "step": 1736 + }, + { + "epoch": 0.02, + "learning_rate": 4.620938628158845e-05, + "loss": 2.6577, + "step": 1738 + }, + { + "epoch": 0.02, + "learning_rate": 4.6204757937609925e-05, + "loss": 3.2956, + "step": 1740 + }, + { + "epoch": 0.02, + "learning_rate": 4.6200129593631404e-05, + "loss": 2.5949, + "step": 1742 + }, + { + "epoch": 0.02, + "learning_rate": 4.6195501249652876e-05, + "loss": 1.5059, + "step": 1744 + }, + { + "epoch": 0.02, + "learning_rate": 4.6190872905674355e-05, + "loss": 0.5341, + "step": 1746 + }, + { + "epoch": 0.02, + "learning_rate": 4.618624456169583e-05, + "loss": 0.5147, + "step": 1748 + }, + { + "epoch": 0.02, + "learning_rate": 4.6181616217717306e-05, + "loss": 1.8775, + "step": 1750 + }, + { + "epoch": 0.02, + "learning_rate": 4.617698787373878e-05, + "loss": 0.1786, + "step": 1752 + }, + { + "epoch": 0.02, + "learning_rate": 4.6172359529760256e-05, + "loss": 7.1488, + "step": 1754 + }, + { + "epoch": 0.02, + "learning_rate": 4.616773118578173e-05, + "loss": 0.0758, + "step": 1756 + }, + { + "epoch": 0.02, + "learning_rate": 4.616310284180321e-05, + "loss": 1.1736, + "step": 1758 + }, + { + "epoch": 0.02, + "learning_rate": 4.6158474497824686e-05, + "loss": 4.8311, + "step": 1760 + }, + { + "epoch": 0.02, + "learning_rate": 4.615384615384616e-05, + "loss": 4.3117, + "step": 1762 + }, + { + "epoch": 0.02, + "learning_rate": 4.614921780986764e-05, + "loss": 4.9715, + "step": 1764 + }, + { + "epoch": 0.02, + "learning_rate": 4.614458946588911e-05, + "loss": 12.1501, + "step": 1766 + }, + { + "epoch": 0.02, + "learning_rate": 4.613996112191058e-05, + "loss": 7.2045, + "step": 1768 + }, + { + "epoch": 0.02, + "learning_rate": 4.613533277793205e-05, + "loss": 0.2252, + "step": 1770 + }, + { + "epoch": 0.02, + "learning_rate": 4.613070443395353e-05, + "loss": 1.8361, + "step": 1772 + }, + { + "epoch": 0.02, + "learning_rate": 4.6126076089975004e-05, + "loss": 6.1041, + "step": 1774 + }, + { + "epoch": 0.02, + "learning_rate": 4.612144774599648e-05, + "loss": 0.4606, + "step": 1776 + }, + { + "epoch": 0.02, + "learning_rate": 4.6116819402017955e-05, + "loss": 1.9522, + "step": 1778 + }, + { + "epoch": 0.02, + "learning_rate": 4.6112191058039434e-05, + "loss": 3.2616, + "step": 1780 + }, + { + "epoch": 0.02, + "learning_rate": 4.6107562714060906e-05, + "loss": 1.4915, + "step": 1782 + }, + { + "epoch": 0.02, + "learning_rate": 4.6102934370082384e-05, + "loss": 4.3302, + "step": 1784 + }, + { + "epoch": 0.02, + "learning_rate": 4.609830602610386e-05, + "loss": 2.0909, + "step": 1786 + }, + { + "epoch": 0.02, + "learning_rate": 4.6093677682125335e-05, + "loss": 1.996, + "step": 1788 + }, + { + "epoch": 0.02, + "learning_rate": 4.6089049338146814e-05, + "loss": 2.7939, + "step": 1790 + }, + { + "epoch": 0.02, + "learning_rate": 4.6084420994168286e-05, + "loss": 1.2666, + "step": 1792 + }, + { + "epoch": 0.02, + "learning_rate": 4.6079792650189765e-05, + "loss": 0.714, + "step": 1794 + }, + { + "epoch": 0.02, + "learning_rate": 4.607516430621124e-05, + "loss": 1.7849, + "step": 1796 + }, + { + "epoch": 0.02, + "learning_rate": 4.6070535962232716e-05, + "loss": 3.3108, + "step": 1798 + }, + { + "epoch": 0.02, + "learning_rate": 4.606590761825419e-05, + "loss": 0.7047, + "step": 1800 + }, + { + "epoch": 0.02, + "learning_rate": 4.606127927427567e-05, + "loss": 0.8227, + "step": 1802 + }, + { + "epoch": 0.02, + "learning_rate": 4.605665093029714e-05, + "loss": 1.642, + "step": 1804 + }, + { + "epoch": 0.02, + "learning_rate": 4.605202258631862e-05, + "loss": 2.8321, + "step": 1806 + }, + { + "epoch": 0.02, + "learning_rate": 4.604739424234009e-05, + "loss": 0.0617, + "step": 1808 + }, + { + "epoch": 0.02, + "learning_rate": 4.604276589836157e-05, + "loss": 0.6797, + "step": 1810 + }, + { + "epoch": 0.02, + "learning_rate": 4.603813755438304e-05, + "loss": 7.7426, + "step": 1812 + }, + { + "epoch": 0.02, + "learning_rate": 4.603350921040452e-05, + "loss": 0.4267, + "step": 1814 + }, + { + "epoch": 0.02, + "learning_rate": 4.602888086642599e-05, + "loss": 0.2307, + "step": 1816 + }, + { + "epoch": 0.02, + "learning_rate": 4.602425252244747e-05, + "loss": 3.2965, + "step": 1818 + }, + { + "epoch": 0.02, + "learning_rate": 4.601962417846894e-05, + "loss": 2.933, + "step": 1820 + }, + { + "epoch": 0.02, + "learning_rate": 4.601499583449042e-05, + "loss": 2.5302, + "step": 1822 + }, + { + "epoch": 0.02, + "learning_rate": 4.60103674905119e-05, + "loss": 0.9394, + "step": 1824 + }, + { + "epoch": 0.02, + "learning_rate": 4.600573914653337e-05, + "loss": 3.3788, + "step": 1826 + }, + { + "epoch": 0.02, + "learning_rate": 4.600111080255485e-05, + "loss": 0.4418, + "step": 1828 + }, + { + "epoch": 0.02, + "learning_rate": 4.599648245857632e-05, + "loss": 2.1213, + "step": 1830 + }, + { + "epoch": 0.02, + "learning_rate": 4.59918541145978e-05, + "loss": 2.0603, + "step": 1832 + }, + { + "epoch": 0.02, + "learning_rate": 4.5987225770619273e-05, + "loss": 4.9091, + "step": 1834 + }, + { + "epoch": 0.02, + "learning_rate": 4.598259742664075e-05, + "loss": 4.5502, + "step": 1836 + }, + { + "epoch": 0.02, + "learning_rate": 4.5977969082662224e-05, + "loss": 5.3509, + "step": 1838 + }, + { + "epoch": 0.02, + "learning_rate": 4.59733407386837e-05, + "loss": 3.1513, + "step": 1840 + }, + { + "epoch": 0.02, + "learning_rate": 4.5968712394705175e-05, + "loss": 1.9933, + "step": 1842 + }, + { + "epoch": 0.02, + "learning_rate": 4.5964084050726654e-05, + "loss": 1.0844, + "step": 1844 + }, + { + "epoch": 0.02, + "learning_rate": 4.5959455706748126e-05, + "loss": 3.9193, + "step": 1846 + }, + { + "epoch": 0.02, + "learning_rate": 4.5954827362769605e-05, + "loss": 3.5403, + "step": 1848 + }, + { + "epoch": 0.02, + "learning_rate": 4.595019901879108e-05, + "loss": 1.5624, + "step": 1850 + }, + { + "epoch": 0.02, + "learning_rate": 4.5945570674812556e-05, + "loss": 0.5245, + "step": 1852 + }, + { + "epoch": 0.02, + "learning_rate": 4.594094233083403e-05, + "loss": 0.934, + "step": 1854 + }, + { + "epoch": 0.02, + "learning_rate": 4.5936313986855506e-05, + "loss": 2.399, + "step": 1856 + }, + { + "epoch": 0.02, + "learning_rate": 4.593168564287698e-05, + "loss": 2.5662, + "step": 1858 + }, + { + "epoch": 0.02, + "learning_rate": 4.592705729889846e-05, + "loss": 2.4856, + "step": 1860 + }, + { + "epoch": 0.02, + "learning_rate": 4.592242895491993e-05, + "loss": 2.0161, + "step": 1862 + }, + { + "epoch": 0.02, + "learning_rate": 4.591780061094141e-05, + "loss": 3.732, + "step": 1864 + }, + { + "epoch": 0.02, + "learning_rate": 4.591317226696289e-05, + "loss": 2.559, + "step": 1866 + }, + { + "epoch": 0.02, + "learning_rate": 4.590854392298436e-05, + "loss": 0.8488, + "step": 1868 + }, + { + "epoch": 0.02, + "learning_rate": 4.590391557900584e-05, + "loss": 4.8079, + "step": 1870 + }, + { + "epoch": 0.02, + "learning_rate": 4.589928723502731e-05, + "loss": 1.2993, + "step": 1872 + }, + { + "epoch": 0.02, + "learning_rate": 4.589465889104879e-05, + "loss": 3.6165, + "step": 1874 + }, + { + "epoch": 0.02, + "learning_rate": 4.589003054707026e-05, + "loss": 3.3347, + "step": 1876 + }, + { + "epoch": 0.02, + "learning_rate": 4.588540220309174e-05, + "loss": 1.3434, + "step": 1878 + }, + { + "epoch": 0.02, + "learning_rate": 4.588077385911321e-05, + "loss": 2.7971, + "step": 1880 + }, + { + "epoch": 0.02, + "learning_rate": 4.587614551513469e-05, + "loss": 2.1208, + "step": 1882 + }, + { + "epoch": 0.02, + "learning_rate": 4.587151717115616e-05, + "loss": 3.4964, + "step": 1884 + }, + { + "epoch": 0.02, + "learning_rate": 4.586688882717764e-05, + "loss": 0.8696, + "step": 1886 + }, + { + "epoch": 0.02, + "learning_rate": 4.586226048319911e-05, + "loss": 5.2081, + "step": 1888 + }, + { + "epoch": 0.02, + "learning_rate": 4.585763213922059e-05, + "loss": 3.3185, + "step": 1890 + }, + { + "epoch": 0.02, + "learning_rate": 4.5853003795242064e-05, + "loss": 3.5607, + "step": 1892 + }, + { + "epoch": 0.02, + "learning_rate": 4.584837545126354e-05, + "loss": 1.3931, + "step": 1894 + }, + { + "epoch": 0.02, + "learning_rate": 4.5843747107285015e-05, + "loss": 3.5017, + "step": 1896 + }, + { + "epoch": 0.02, + "learning_rate": 4.5839118763306494e-05, + "loss": 1.6544, + "step": 1898 + }, + { + "epoch": 0.02, + "learning_rate": 4.5834490419327966e-05, + "loss": 0.6147, + "step": 1900 + }, + { + "epoch": 0.02, + "learning_rate": 4.5829862075349445e-05, + "loss": 0.3586, + "step": 1902 + }, + { + "epoch": 0.02, + "learning_rate": 4.5825233731370923e-05, + "loss": 4.1892, + "step": 1904 + }, + { + "epoch": 0.02, + "learning_rate": 4.5820605387392395e-05, + "loss": 1.0249, + "step": 1906 + }, + { + "epoch": 0.02, + "learning_rate": 4.5815977043413874e-05, + "loss": 5.8179, + "step": 1908 + }, + { + "epoch": 0.02, + "learning_rate": 4.5811348699435346e-05, + "loss": 1.4441, + "step": 1910 + }, + { + "epoch": 0.02, + "learning_rate": 4.5806720355456825e-05, + "loss": 6.212, + "step": 1912 + }, + { + "epoch": 0.02, + "learning_rate": 4.580209201147829e-05, + "loss": 4.0223, + "step": 1914 + }, + { + "epoch": 0.02, + "learning_rate": 4.579746366749977e-05, + "loss": 1.4515, + "step": 1916 + }, + { + "epoch": 0.02, + "learning_rate": 4.579283532352124e-05, + "loss": 1.4379, + "step": 1918 + }, + { + "epoch": 0.02, + "learning_rate": 4.578820697954272e-05, + "loss": 0.7094, + "step": 1920 + }, + { + "epoch": 0.02, + "learning_rate": 4.578357863556419e-05, + "loss": 2.5687, + "step": 1922 + }, + { + "epoch": 0.02, + "learning_rate": 4.577895029158567e-05, + "loss": 0.9674, + "step": 1924 + }, + { + "epoch": 0.02, + "learning_rate": 4.577432194760714e-05, + "loss": 3.2022, + "step": 1926 + }, + { + "epoch": 0.02, + "learning_rate": 4.576969360362862e-05, + "loss": 0.9043, + "step": 1928 + }, + { + "epoch": 0.02, + "learning_rate": 4.57650652596501e-05, + "loss": 3.1792, + "step": 1930 + }, + { + "epoch": 0.02, + "learning_rate": 4.576043691567157e-05, + "loss": 3.1869, + "step": 1932 + }, + { + "epoch": 0.02, + "learning_rate": 4.575580857169305e-05, + "loss": 0.2195, + "step": 1934 + }, + { + "epoch": 0.02, + "learning_rate": 4.5751180227714523e-05, + "loss": 3.6047, + "step": 1936 + }, + { + "epoch": 0.02, + "learning_rate": 4.5746551883736e-05, + "loss": 8.8579, + "step": 1938 + }, + { + "epoch": 0.02, + "learning_rate": 4.5741923539757474e-05, + "loss": 2.4881, + "step": 1940 + }, + { + "epoch": 0.02, + "learning_rate": 4.573729519577895e-05, + "loss": 0.8224, + "step": 1942 + }, + { + "epoch": 0.02, + "learning_rate": 4.5732666851800425e-05, + "loss": 1.2833, + "step": 1944 + }, + { + "epoch": 0.02, + "learning_rate": 4.5728038507821904e-05, + "loss": 0.8703, + "step": 1946 + }, + { + "epoch": 0.02, + "learning_rate": 4.5723410163843376e-05, + "loss": 3.5144, + "step": 1948 + }, + { + "epoch": 0.02, + "learning_rate": 4.5718781819864855e-05, + "loss": 1.7912, + "step": 1950 + }, + { + "epoch": 0.02, + "learning_rate": 4.571415347588633e-05, + "loss": 1.215, + "step": 1952 + }, + { + "epoch": 0.02, + "learning_rate": 4.5709525131907806e-05, + "loss": 3.878, + "step": 1954 + }, + { + "epoch": 0.02, + "learning_rate": 4.570489678792928e-05, + "loss": 1.3974, + "step": 1956 + }, + { + "epoch": 0.02, + "learning_rate": 4.5700268443950757e-05, + "loss": 2.7886, + "step": 1958 + }, + { + "epoch": 0.02, + "learning_rate": 4.569564009997223e-05, + "loss": 2.0902, + "step": 1960 + }, + { + "epoch": 0.02, + "learning_rate": 4.569101175599371e-05, + "loss": 0.813, + "step": 1962 + }, + { + "epoch": 0.02, + "learning_rate": 4.568638341201518e-05, + "loss": 3.0979, + "step": 1964 + }, + { + "epoch": 0.02, + "learning_rate": 4.568175506803666e-05, + "loss": 1.8209, + "step": 1966 + }, + { + "epoch": 0.02, + "learning_rate": 4.567712672405813e-05, + "loss": 1.1106, + "step": 1968 + }, + { + "epoch": 0.02, + "learning_rate": 4.567249838007961e-05, + "loss": 2.2329, + "step": 1970 + }, + { + "epoch": 0.02, + "learning_rate": 4.566787003610109e-05, + "loss": 0.053, + "step": 1972 + }, + { + "epoch": 0.02, + "learning_rate": 4.566324169212256e-05, + "loss": 2.2613, + "step": 1974 + }, + { + "epoch": 0.02, + "learning_rate": 4.565861334814404e-05, + "loss": 5.4557, + "step": 1976 + }, + { + "epoch": 0.02, + "learning_rate": 4.565398500416551e-05, + "loss": 1.3582, + "step": 1978 + }, + { + "epoch": 0.02, + "learning_rate": 4.564935666018699e-05, + "loss": 3.0152, + "step": 1980 + }, + { + "epoch": 0.02, + "learning_rate": 4.564472831620846e-05, + "loss": 1.0431, + "step": 1982 + }, + { + "epoch": 0.02, + "learning_rate": 4.564009997222994e-05, + "loss": 0.0089, + "step": 1984 + }, + { + "epoch": 0.02, + "learning_rate": 4.563547162825141e-05, + "loss": 0.5129, + "step": 1986 + }, + { + "epoch": 0.02, + "learning_rate": 4.563084328427289e-05, + "loss": 3.2135, + "step": 1988 + }, + { + "epoch": 0.02, + "learning_rate": 4.562621494029436e-05, + "loss": 7.0357, + "step": 1990 + }, + { + "epoch": 0.02, + "learning_rate": 4.562158659631584e-05, + "loss": 1.9964, + "step": 1992 + }, + { + "epoch": 0.02, + "learning_rate": 4.5616958252337314e-05, + "loss": 2.6189, + "step": 1994 + }, + { + "epoch": 0.02, + "learning_rate": 4.561232990835879e-05, + "loss": 7.0998, + "step": 1996 + }, + { + "epoch": 0.02, + "learning_rate": 4.5607701564380265e-05, + "loss": 3.1208, + "step": 1998 + }, + { + "epoch": 0.02, + "learning_rate": 4.5603073220401744e-05, + "loss": 4.3489, + "step": 2000 + }, + { + "epoch": 0.02, + "learning_rate": 4.5598444876423216e-05, + "loss": 3.6178, + "step": 2002 + }, + { + "epoch": 0.02, + "learning_rate": 4.5593816532444695e-05, + "loss": 3.1075, + "step": 2004 + }, + { + "epoch": 0.02, + "learning_rate": 4.558918818846617e-05, + "loss": 1.6052, + "step": 2006 + }, + { + "epoch": 0.02, + "learning_rate": 4.5584559844487646e-05, + "loss": 2.2888, + "step": 2008 + }, + { + "epoch": 0.02, + "learning_rate": 4.5579931500509124e-05, + "loss": 1.3035, + "step": 2010 + }, + { + "epoch": 0.02, + "learning_rate": 4.5575303156530596e-05, + "loss": 3.3716, + "step": 2012 + }, + { + "epoch": 0.02, + "learning_rate": 4.5570674812552075e-05, + "loss": 2.2311, + "step": 2014 + }, + { + "epoch": 0.02, + "learning_rate": 4.556604646857355e-05, + "loss": 1.8558, + "step": 2016 + }, + { + "epoch": 0.02, + "learning_rate": 4.5561418124595026e-05, + "loss": 3.0461, + "step": 2018 + }, + { + "epoch": 0.02, + "learning_rate": 4.55567897806165e-05, + "loss": 0.5723, + "step": 2020 + }, + { + "epoch": 0.02, + "learning_rate": 4.555216143663798e-05, + "loss": 4.4988, + "step": 2022 + }, + { + "epoch": 0.02, + "learning_rate": 4.554753309265945e-05, + "loss": 3.8209, + "step": 2024 + }, + { + "epoch": 0.02, + "learning_rate": 4.554290474868093e-05, + "loss": 0.6857, + "step": 2026 + }, + { + "epoch": 0.02, + "learning_rate": 4.55382764047024e-05, + "loss": 2.3959, + "step": 2028 + }, + { + "epoch": 0.02, + "learning_rate": 4.553364806072388e-05, + "loss": 1.2031, + "step": 2030 + }, + { + "epoch": 0.02, + "learning_rate": 4.552901971674535e-05, + "loss": 5.2028, + "step": 2032 + }, + { + "epoch": 0.02, + "learning_rate": 4.552439137276683e-05, + "loss": 6.1027, + "step": 2034 + }, + { + "epoch": 0.02, + "learning_rate": 4.55197630287883e-05, + "loss": 2.4259, + "step": 2036 + }, + { + "epoch": 0.02, + "learning_rate": 4.551513468480978e-05, + "loss": 2.304, + "step": 2038 + }, + { + "epoch": 0.02, + "learning_rate": 4.551050634083125e-05, + "loss": 4.2057, + "step": 2040 + }, + { + "epoch": 0.02, + "learning_rate": 4.550587799685273e-05, + "loss": 2.4561, + "step": 2042 + }, + { + "epoch": 0.02, + "learning_rate": 4.55012496528742e-05, + "loss": 1.5298, + "step": 2044 + }, + { + "epoch": 0.02, + "learning_rate": 4.549662130889568e-05, + "loss": 1.0376, + "step": 2046 + }, + { + "epoch": 0.02, + "learning_rate": 4.5491992964917154e-05, + "loss": 2.0048, + "step": 2048 + }, + { + "epoch": 0.02, + "learning_rate": 4.548736462093863e-05, + "loss": 1.3363, + "step": 2050 + }, + { + "epoch": 0.02, + "learning_rate": 4.548273627696011e-05, + "loss": 2.1149, + "step": 2052 + }, + { + "epoch": 0.02, + "learning_rate": 4.5478107932981584e-05, + "loss": 2.7407, + "step": 2054 + }, + { + "epoch": 0.02, + "learning_rate": 4.547347958900306e-05, + "loss": 0.7472, + "step": 2056 + }, + { + "epoch": 0.02, + "learning_rate": 4.546885124502453e-05, + "loss": 1.87, + "step": 2058 + }, + { + "epoch": 0.02, + "learning_rate": 4.5464222901046007e-05, + "loss": 1.9965, + "step": 2060 + }, + { + "epoch": 0.02, + "learning_rate": 4.545959455706748e-05, + "loss": 4.991, + "step": 2062 + }, + { + "epoch": 0.02, + "learning_rate": 4.545496621308896e-05, + "loss": 0.1791, + "step": 2064 + }, + { + "epoch": 0.02, + "learning_rate": 4.545033786911043e-05, + "loss": 1.6174, + "step": 2066 + }, + { + "epoch": 0.02, + "learning_rate": 4.544570952513191e-05, + "loss": 1.8906, + "step": 2068 + }, + { + "epoch": 0.02, + "learning_rate": 4.544108118115338e-05, + "loss": 3.155, + "step": 2070 + }, + { + "epoch": 0.02, + "learning_rate": 4.543645283717486e-05, + "loss": 2.1355, + "step": 2072 + }, + { + "epoch": 0.02, + "learning_rate": 4.543182449319633e-05, + "loss": 7.8725, + "step": 2074 + }, + { + "epoch": 0.02, + "learning_rate": 4.542719614921781e-05, + "loss": 1.0543, + "step": 2076 + }, + { + "epoch": 0.02, + "learning_rate": 4.542256780523929e-05, + "loss": 3.4159, + "step": 2078 + }, + { + "epoch": 0.02, + "learning_rate": 4.541793946126076e-05, + "loss": 3.5965, + "step": 2080 + }, + { + "epoch": 0.02, + "learning_rate": 4.541331111728224e-05, + "loss": 1.3608, + "step": 2082 + }, + { + "epoch": 0.02, + "learning_rate": 4.540868277330371e-05, + "loss": 0.6067, + "step": 2084 + }, + { + "epoch": 0.02, + "learning_rate": 4.540405442932519e-05, + "loss": 2.863, + "step": 2086 + }, + { + "epoch": 0.02, + "learning_rate": 4.539942608534666e-05, + "loss": 3.5894, + "step": 2088 + }, + { + "epoch": 0.02, + "learning_rate": 4.539479774136814e-05, + "loss": 0.1556, + "step": 2090 + }, + { + "epoch": 0.02, + "learning_rate": 4.539016939738961e-05, + "loss": 2.7656, + "step": 2092 + }, + { + "epoch": 0.02, + "learning_rate": 4.538554105341109e-05, + "loss": 0.5127, + "step": 2094 + }, + { + "epoch": 0.02, + "learning_rate": 4.5380912709432564e-05, + "loss": 0.8523, + "step": 2096 + }, + { + "epoch": 0.02, + "learning_rate": 4.537628436545404e-05, + "loss": 2.0607, + "step": 2098 + }, + { + "epoch": 0.02, + "learning_rate": 4.5371656021475515e-05, + "loss": 0.6225, + "step": 2100 + }, + { + "epoch": 0.02, + "learning_rate": 4.5367027677496994e-05, + "loss": 0.1571, + "step": 2102 + }, + { + "epoch": 0.02, + "learning_rate": 4.5362399333518466e-05, + "loss": 1.9484, + "step": 2104 + }, + { + "epoch": 0.02, + "learning_rate": 4.5357770989539945e-05, + "loss": 0.6029, + "step": 2106 + }, + { + "epoch": 0.02, + "learning_rate": 4.535314264556142e-05, + "loss": 0.9368, + "step": 2108 + }, + { + "epoch": 0.02, + "learning_rate": 4.5348514301582896e-05, + "loss": 2.6047, + "step": 2110 + }, + { + "epoch": 0.02, + "learning_rate": 4.534388595760437e-05, + "loss": 0.0311, + "step": 2112 + }, + { + "epoch": 0.02, + "learning_rate": 4.5339257613625846e-05, + "loss": 8.9976, + "step": 2114 + }, + { + "epoch": 0.02, + "learning_rate": 4.533462926964732e-05, + "loss": 4.9767, + "step": 2116 + }, + { + "epoch": 0.02, + "learning_rate": 4.53300009256688e-05, + "loss": 2.0942, + "step": 2118 + }, + { + "epoch": 0.02, + "learning_rate": 4.5325372581690276e-05, + "loss": 5.3527, + "step": 2120 + }, + { + "epoch": 0.02, + "learning_rate": 4.532074423771175e-05, + "loss": 3.7434, + "step": 2122 + }, + { + "epoch": 0.02, + "learning_rate": 4.531611589373323e-05, + "loss": 4.6058, + "step": 2124 + }, + { + "epoch": 0.02, + "learning_rate": 4.53114875497547e-05, + "loss": 3.7413, + "step": 2126 + }, + { + "epoch": 0.02, + "learning_rate": 4.530685920577618e-05, + "loss": 2.5465, + "step": 2128 + }, + { + "epoch": 0.02, + "learning_rate": 4.530223086179765e-05, + "loss": 1.6612, + "step": 2130 + }, + { + "epoch": 0.02, + "learning_rate": 4.529760251781913e-05, + "loss": 1.9171, + "step": 2132 + }, + { + "epoch": 0.02, + "learning_rate": 4.52929741738406e-05, + "loss": 1.7376, + "step": 2134 + }, + { + "epoch": 0.02, + "learning_rate": 4.528834582986208e-05, + "loss": 1.8551, + "step": 2136 + }, + { + "epoch": 0.02, + "learning_rate": 4.528371748588355e-05, + "loss": 2.7661, + "step": 2138 + }, + { + "epoch": 0.02, + "learning_rate": 4.527908914190503e-05, + "loss": 1.8831, + "step": 2140 + }, + { + "epoch": 0.02, + "learning_rate": 4.52744607979265e-05, + "loss": 1.327, + "step": 2142 + }, + { + "epoch": 0.02, + "learning_rate": 4.526983245394798e-05, + "loss": 0.8619, + "step": 2144 + }, + { + "epoch": 0.02, + "learning_rate": 4.526520410996945e-05, + "loss": 0.3338, + "step": 2146 + }, + { + "epoch": 0.02, + "learning_rate": 4.526057576599093e-05, + "loss": 5.1896, + "step": 2148 + }, + { + "epoch": 0.02, + "learning_rate": 4.5255947422012404e-05, + "loss": 0.0308, + "step": 2150 + }, + { + "epoch": 0.02, + "learning_rate": 4.525131907803388e-05, + "loss": 3.9201, + "step": 2152 + }, + { + "epoch": 0.02, + "learning_rate": 4.5246690734055355e-05, + "loss": 10.1902, + "step": 2154 + }, + { + "epoch": 0.02, + "learning_rate": 4.5242062390076834e-05, + "loss": 2.434, + "step": 2156 + }, + { + "epoch": 0.02, + "learning_rate": 4.523743404609831e-05, + "loss": 1.6084, + "step": 2158 + }, + { + "epoch": 0.02, + "learning_rate": 4.5232805702119785e-05, + "loss": 2.6577, + "step": 2160 + }, + { + "epoch": 0.02, + "learning_rate": 4.522817735814126e-05, + "loss": 0.3432, + "step": 2162 + }, + { + "epoch": 0.02, + "learning_rate": 4.5223549014162735e-05, + "loss": 3.3654, + "step": 2164 + }, + { + "epoch": 0.02, + "learning_rate": 4.5218920670184214e-05, + "loss": 1.7249, + "step": 2166 + }, + { + "epoch": 0.02, + "learning_rate": 4.5214292326205686e-05, + "loss": 4.2702, + "step": 2168 + }, + { + "epoch": 0.02, + "learning_rate": 4.5209663982227165e-05, + "loss": 0.0735, + "step": 2170 + }, + { + "epoch": 0.03, + "learning_rate": 4.520503563824864e-05, + "loss": 1.9043, + "step": 2172 + }, + { + "epoch": 0.03, + "learning_rate": 4.5200407294270116e-05, + "loss": 3.6567, + "step": 2174 + }, + { + "epoch": 0.03, + "learning_rate": 4.519577895029159e-05, + "loss": 3.6733, + "step": 2176 + }, + { + "epoch": 0.03, + "learning_rate": 4.519115060631307e-05, + "loss": 5.7071, + "step": 2178 + }, + { + "epoch": 0.03, + "learning_rate": 4.518652226233454e-05, + "loss": 1.0858, + "step": 2180 + }, + { + "epoch": 0.03, + "learning_rate": 4.518189391835602e-05, + "loss": 1.3381, + "step": 2182 + }, + { + "epoch": 0.03, + "learning_rate": 4.517726557437749e-05, + "loss": 4.0219, + "step": 2184 + }, + { + "epoch": 0.03, + "learning_rate": 4.517263723039897e-05, + "loss": 1.66, + "step": 2186 + }, + { + "epoch": 0.03, + "learning_rate": 4.516800888642044e-05, + "loss": 2.7369, + "step": 2188 + }, + { + "epoch": 0.03, + "learning_rate": 4.516338054244192e-05, + "loss": 1.1413, + "step": 2190 + }, + { + "epoch": 0.03, + "learning_rate": 4.515875219846339e-05, + "loss": 3.1562, + "step": 2192 + }, + { + "epoch": 0.03, + "learning_rate": 4.515412385448487e-05, + "loss": 2.3145, + "step": 2194 + }, + { + "epoch": 0.03, + "learning_rate": 4.514949551050634e-05, + "loss": 2.579, + "step": 2196 + }, + { + "epoch": 0.03, + "learning_rate": 4.514486716652782e-05, + "loss": 1.3117, + "step": 2198 + }, + { + "epoch": 0.03, + "learning_rate": 4.51402388225493e-05, + "loss": 0.0994, + "step": 2200 + }, + { + "epoch": 0.03, + "learning_rate": 4.513561047857077e-05, + "loss": 4.6938, + "step": 2202 + }, + { + "epoch": 0.03, + "learning_rate": 4.5130982134592244e-05, + "loss": 1.992, + "step": 2204 + }, + { + "epoch": 0.03, + "learning_rate": 4.5126353790613716e-05, + "loss": 4.6339, + "step": 2206 + }, + { + "epoch": 0.03, + "learning_rate": 4.5121725446635195e-05, + "loss": 0.0708, + "step": 2208 + }, + { + "epoch": 0.03, + "learning_rate": 4.511709710265667e-05, + "loss": 1.9962, + "step": 2210 + }, + { + "epoch": 0.03, + "learning_rate": 4.5112468758678146e-05, + "loss": 3.7955, + "step": 2212 + }, + { + "epoch": 0.03, + "learning_rate": 4.510784041469962e-05, + "loss": 5.114, + "step": 2214 + }, + { + "epoch": 0.03, + "learning_rate": 4.5103212070721096e-05, + "loss": 0.6122, + "step": 2216 + }, + { + "epoch": 0.03, + "learning_rate": 4.509858372674257e-05, + "loss": 1.6119, + "step": 2218 + }, + { + "epoch": 0.03, + "learning_rate": 4.509395538276405e-05, + "loss": 1.6431, + "step": 2220 + }, + { + "epoch": 0.03, + "learning_rate": 4.508932703878552e-05, + "loss": 2.0628, + "step": 2222 + }, + { + "epoch": 0.03, + "learning_rate": 4.5084698694807e-05, + "loss": 5.4893, + "step": 2224 + }, + { + "epoch": 0.03, + "learning_rate": 4.508007035082848e-05, + "loss": 1.8499, + "step": 2226 + }, + { + "epoch": 0.03, + "learning_rate": 4.507544200684995e-05, + "loss": 0.0013, + "step": 2228 + }, + { + "epoch": 0.03, + "learning_rate": 4.507081366287143e-05, + "loss": 4.5373, + "step": 2230 + }, + { + "epoch": 0.03, + "learning_rate": 4.50661853188929e-05, + "loss": 2.4563, + "step": 2232 + }, + { + "epoch": 0.03, + "learning_rate": 4.506155697491438e-05, + "loss": 2.305, + "step": 2234 + }, + { + "epoch": 0.03, + "learning_rate": 4.505692863093585e-05, + "loss": 0.4411, + "step": 2236 + }, + { + "epoch": 0.03, + "learning_rate": 4.505230028695733e-05, + "loss": 7.7629, + "step": 2238 + }, + { + "epoch": 0.03, + "learning_rate": 4.50476719429788e-05, + "loss": 3.6271, + "step": 2240 + }, + { + "epoch": 0.03, + "learning_rate": 4.504304359900028e-05, + "loss": 3.2062, + "step": 2242 + }, + { + "epoch": 0.03, + "learning_rate": 4.503841525502175e-05, + "loss": 2.6683, + "step": 2244 + }, + { + "epoch": 0.03, + "learning_rate": 4.503378691104323e-05, + "loss": 2.5465, + "step": 2246 + }, + { + "epoch": 0.03, + "learning_rate": 4.50291585670647e-05, + "loss": 2.3814, + "step": 2248 + }, + { + "epoch": 0.03, + "learning_rate": 4.502453022308618e-05, + "loss": 1.9564, + "step": 2250 + }, + { + "epoch": 0.03, + "learning_rate": 4.5019901879107654e-05, + "loss": 0.1662, + "step": 2252 + }, + { + "epoch": 0.03, + "learning_rate": 4.501527353512913e-05, + "loss": 2.2642, + "step": 2254 + }, + { + "epoch": 0.03, + "learning_rate": 4.5010645191150605e-05, + "loss": 1.3017, + "step": 2256 + }, + { + "epoch": 0.03, + "learning_rate": 4.5006016847172084e-05, + "loss": 1.9808, + "step": 2258 + }, + { + "epoch": 0.03, + "learning_rate": 4.5001388503193556e-05, + "loss": 0.8798, + "step": 2260 + }, + { + "epoch": 0.03, + "learning_rate": 4.4996760159215035e-05, + "loss": 1.9536, + "step": 2262 + }, + { + "epoch": 0.03, + "learning_rate": 4.4992131815236513e-05, + "loss": 6.3355, + "step": 2264 + }, + { + "epoch": 0.03, + "learning_rate": 4.4987503471257985e-05, + "loss": 1.8066, + "step": 2266 + }, + { + "epoch": 0.03, + "learning_rate": 4.4982875127279464e-05, + "loss": 2.4471, + "step": 2268 + }, + { + "epoch": 0.03, + "learning_rate": 4.4978246783300936e-05, + "loss": 0.8584, + "step": 2270 + }, + { + "epoch": 0.03, + "learning_rate": 4.4973618439322415e-05, + "loss": 1.5624, + "step": 2272 + }, + { + "epoch": 0.03, + "learning_rate": 4.496899009534389e-05, + "loss": 6.2858, + "step": 2274 + }, + { + "epoch": 0.03, + "learning_rate": 4.4964361751365366e-05, + "loss": 1.1941, + "step": 2276 + }, + { + "epoch": 0.03, + "learning_rate": 4.495973340738684e-05, + "loss": 2.4854, + "step": 2278 + }, + { + "epoch": 0.03, + "learning_rate": 4.495510506340832e-05, + "loss": 4.911, + "step": 2280 + }, + { + "epoch": 0.03, + "learning_rate": 4.495047671942979e-05, + "loss": 2.1383, + "step": 2282 + }, + { + "epoch": 0.03, + "learning_rate": 4.494584837545127e-05, + "loss": 0.6882, + "step": 2284 + }, + { + "epoch": 0.03, + "learning_rate": 4.494122003147274e-05, + "loss": 1.499, + "step": 2286 + }, + { + "epoch": 0.03, + "learning_rate": 4.493659168749422e-05, + "loss": 4.5602, + "step": 2288 + }, + { + "epoch": 0.03, + "learning_rate": 4.493196334351569e-05, + "loss": 1.0174, + "step": 2290 + }, + { + "epoch": 0.03, + "learning_rate": 4.492733499953717e-05, + "loss": 2.5959, + "step": 2292 + }, + { + "epoch": 0.03, + "learning_rate": 4.492270665555864e-05, + "loss": 4.0921, + "step": 2294 + }, + { + "epoch": 0.03, + "learning_rate": 4.491807831158012e-05, + "loss": 3.4905, + "step": 2296 + }, + { + "epoch": 0.03, + "learning_rate": 4.491344996760159e-05, + "loss": 2.3414, + "step": 2298 + }, + { + "epoch": 0.03, + "learning_rate": 4.490882162362307e-05, + "loss": 0.6486, + "step": 2300 + }, + { + "epoch": 0.03, + "learning_rate": 4.490419327964454e-05, + "loss": 3.1082, + "step": 2302 + }, + { + "epoch": 0.03, + "learning_rate": 4.489956493566602e-05, + "loss": 3.1168, + "step": 2304 + }, + { + "epoch": 0.03, + "learning_rate": 4.48949365916875e-05, + "loss": 4.6532, + "step": 2306 + }, + { + "epoch": 0.03, + "learning_rate": 4.489030824770897e-05, + "loss": 1.9955, + "step": 2308 + }, + { + "epoch": 0.03, + "learning_rate": 4.488567990373045e-05, + "loss": 5.8696, + "step": 2310 + }, + { + "epoch": 0.03, + "learning_rate": 4.4881051559751924e-05, + "loss": 4.8305, + "step": 2312 + }, + { + "epoch": 0.03, + "learning_rate": 4.48764232157734e-05, + "loss": 1.1393, + "step": 2314 + }, + { + "epoch": 0.03, + "learning_rate": 4.4871794871794874e-05, + "loss": 1.3943, + "step": 2316 + }, + { + "epoch": 0.03, + "learning_rate": 4.486716652781635e-05, + "loss": 0.8162, + "step": 2318 + }, + { + "epoch": 0.03, + "learning_rate": 4.4862538183837825e-05, + "loss": 2.7841, + "step": 2320 + }, + { + "epoch": 0.03, + "learning_rate": 4.4857909839859304e-05, + "loss": 5.2569, + "step": 2322 + }, + { + "epoch": 0.03, + "learning_rate": 4.4853281495880776e-05, + "loss": 3.4046, + "step": 2324 + }, + { + "epoch": 0.03, + "learning_rate": 4.4848653151902255e-05, + "loss": 1.3027, + "step": 2326 + }, + { + "epoch": 0.03, + "learning_rate": 4.484402480792373e-05, + "loss": 0.866, + "step": 2328 + }, + { + "epoch": 0.03, + "learning_rate": 4.4839396463945206e-05, + "loss": 2.8791, + "step": 2330 + }, + { + "epoch": 0.03, + "learning_rate": 4.483476811996668e-05, + "loss": 0.1113, + "step": 2332 + }, + { + "epoch": 0.03, + "learning_rate": 4.483013977598816e-05, + "loss": 2.381, + "step": 2334 + }, + { + "epoch": 0.03, + "learning_rate": 4.482551143200963e-05, + "loss": 3.0403, + "step": 2336 + }, + { + "epoch": 0.03, + "learning_rate": 4.482088308803111e-05, + "loss": 1.4951, + "step": 2338 + }, + { + "epoch": 0.03, + "learning_rate": 4.481625474405258e-05, + "loss": 2.0979, + "step": 2340 + }, + { + "epoch": 0.03, + "learning_rate": 4.481162640007406e-05, + "loss": 2.0601, + "step": 2342 + }, + { + "epoch": 0.03, + "learning_rate": 4.480699805609554e-05, + "loss": 0.2744, + "step": 2344 + }, + { + "epoch": 0.03, + "learning_rate": 4.480236971211701e-05, + "loss": 7.8202, + "step": 2346 + }, + { + "epoch": 0.03, + "learning_rate": 4.479774136813848e-05, + "loss": 1.4182, + "step": 2348 + }, + { + "epoch": 0.03, + "learning_rate": 4.479311302415995e-05, + "loss": 5.8921, + "step": 2350 + }, + { + "epoch": 0.03, + "learning_rate": 4.478848468018143e-05, + "loss": 3.8961, + "step": 2352 + }, + { + "epoch": 0.03, + "learning_rate": 4.4783856336202904e-05, + "loss": 2.173, + "step": 2354 + }, + { + "epoch": 0.03, + "learning_rate": 4.477922799222438e-05, + "loss": 1.2978, + "step": 2356 + }, + { + "epoch": 0.03, + "learning_rate": 4.4774599648245855e-05, + "loss": 2.4788, + "step": 2358 + }, + { + "epoch": 0.03, + "learning_rate": 4.4769971304267334e-05, + "loss": 3.9309, + "step": 2360 + }, + { + "epoch": 0.03, + "learning_rate": 4.4765342960288806e-05, + "loss": 3.1497, + "step": 2362 + }, + { + "epoch": 0.03, + "learning_rate": 4.4760714616310285e-05, + "loss": 1.5971, + "step": 2364 + }, + { + "epoch": 0.03, + "learning_rate": 4.475608627233176e-05, + "loss": 1.2024, + "step": 2366 + }, + { + "epoch": 0.03, + "learning_rate": 4.4751457928353236e-05, + "loss": 0.681, + "step": 2368 + }, + { + "epoch": 0.03, + "learning_rate": 4.4746829584374714e-05, + "loss": 4.8842, + "step": 2370 + }, + { + "epoch": 0.03, + "learning_rate": 4.4742201240396186e-05, + "loss": 5.752, + "step": 2372 + }, + { + "epoch": 0.03, + "learning_rate": 4.4737572896417665e-05, + "loss": 6.2372, + "step": 2374 + }, + { + "epoch": 0.03, + "learning_rate": 4.473294455243914e-05, + "loss": 4.3843, + "step": 2376 + }, + { + "epoch": 0.03, + "learning_rate": 4.4728316208460616e-05, + "loss": 4.17, + "step": 2378 + }, + { + "epoch": 0.03, + "learning_rate": 4.472368786448209e-05, + "loss": 5.429, + "step": 2380 + }, + { + "epoch": 0.03, + "learning_rate": 4.471905952050357e-05, + "loss": 1.4702, + "step": 2382 + }, + { + "epoch": 0.03, + "learning_rate": 4.471443117652504e-05, + "loss": 4.9791, + "step": 2384 + }, + { + "epoch": 0.03, + "learning_rate": 4.470980283254652e-05, + "loss": 3.6112, + "step": 2386 + }, + { + "epoch": 0.03, + "learning_rate": 4.470517448856799e-05, + "loss": 2.094, + "step": 2388 + }, + { + "epoch": 0.03, + "learning_rate": 4.470054614458947e-05, + "loss": 3.3698, + "step": 2390 + }, + { + "epoch": 0.03, + "learning_rate": 4.469591780061094e-05, + "loss": 1.9098, + "step": 2392 + }, + { + "epoch": 0.03, + "learning_rate": 4.469128945663242e-05, + "loss": 2.7917, + "step": 2394 + }, + { + "epoch": 0.03, + "learning_rate": 4.468666111265389e-05, + "loss": 0.3426, + "step": 2396 + }, + { + "epoch": 0.03, + "learning_rate": 4.468203276867537e-05, + "loss": 3.0689, + "step": 2398 + }, + { + "epoch": 0.03, + "learning_rate": 4.467740442469684e-05, + "loss": 1.8702, + "step": 2400 + }, + { + "epoch": 0.03, + "learning_rate": 4.467277608071832e-05, + "loss": 0.0791, + "step": 2402 + }, + { + "epoch": 0.03, + "learning_rate": 4.466814773673979e-05, + "loss": 1.2237, + "step": 2404 + }, + { + "epoch": 0.03, + "learning_rate": 4.466351939276127e-05, + "loss": 1.5561, + "step": 2406 + }, + { + "epoch": 0.03, + "learning_rate": 4.4658891048782744e-05, + "loss": 2.407, + "step": 2408 + }, + { + "epoch": 0.03, + "learning_rate": 4.465426270480422e-05, + "loss": 0.0041, + "step": 2410 + }, + { + "epoch": 0.03, + "learning_rate": 4.46496343608257e-05, + "loss": 2.7705, + "step": 2412 + }, + { + "epoch": 0.03, + "learning_rate": 4.4645006016847174e-05, + "loss": 1.0757, + "step": 2414 + }, + { + "epoch": 0.03, + "learning_rate": 4.464037767286865e-05, + "loss": 3.2402, + "step": 2416 + }, + { + "epoch": 0.03, + "learning_rate": 4.4635749328890125e-05, + "loss": 1.8926, + "step": 2418 + }, + { + "epoch": 0.03, + "learning_rate": 4.46311209849116e-05, + "loss": 2.0275, + "step": 2420 + }, + { + "epoch": 0.03, + "learning_rate": 4.4626492640933075e-05, + "loss": 1.0583, + "step": 2422 + }, + { + "epoch": 0.03, + "learning_rate": 4.4621864296954554e-05, + "loss": 0.325, + "step": 2424 + }, + { + "epoch": 0.03, + "learning_rate": 4.4617235952976026e-05, + "loss": 1.194, + "step": 2426 + }, + { + "epoch": 0.03, + "learning_rate": 4.4612607608997505e-05, + "loss": 1.2236, + "step": 2428 + }, + { + "epoch": 0.03, + "learning_rate": 4.460797926501898e-05, + "loss": 2.2921, + "step": 2430 + }, + { + "epoch": 0.03, + "learning_rate": 4.4603350921040456e-05, + "loss": 0.1005, + "step": 2432 + }, + { + "epoch": 0.03, + "learning_rate": 4.459872257706193e-05, + "loss": 0.2327, + "step": 2434 + }, + { + "epoch": 0.03, + "learning_rate": 4.459409423308341e-05, + "loss": 5.9419, + "step": 2436 + }, + { + "epoch": 0.03, + "learning_rate": 4.458946588910488e-05, + "loss": 0.0061, + "step": 2438 + }, + { + "epoch": 0.03, + "learning_rate": 4.458483754512636e-05, + "loss": 0.2648, + "step": 2440 + }, + { + "epoch": 0.03, + "learning_rate": 4.458020920114783e-05, + "loss": 3.4569, + "step": 2442 + }, + { + "epoch": 0.03, + "learning_rate": 4.457558085716931e-05, + "loss": 6.1861, + "step": 2444 + }, + { + "epoch": 0.03, + "learning_rate": 4.457095251319078e-05, + "loss": 5.4836, + "step": 2446 + }, + { + "epoch": 0.03, + "learning_rate": 4.456632416921226e-05, + "loss": 1.8945, + "step": 2448 + }, + { + "epoch": 0.03, + "learning_rate": 4.456169582523373e-05, + "loss": 1.9144, + "step": 2450 + }, + { + "epoch": 0.03, + "learning_rate": 4.455706748125521e-05, + "loss": 3.5867, + "step": 2452 + }, + { + "epoch": 0.03, + "learning_rate": 4.455243913727669e-05, + "loss": 3.9415, + "step": 2454 + }, + { + "epoch": 0.03, + "learning_rate": 4.454781079329816e-05, + "loss": 3.6372, + "step": 2456 + }, + { + "epoch": 0.03, + "learning_rate": 4.454318244931964e-05, + "loss": 1.1826, + "step": 2458 + }, + { + "epoch": 0.03, + "learning_rate": 4.453855410534111e-05, + "loss": 1.5229, + "step": 2460 + }, + { + "epoch": 0.03, + "learning_rate": 4.453392576136259e-05, + "loss": 2.71, + "step": 2462 + }, + { + "epoch": 0.03, + "learning_rate": 4.452929741738406e-05, + "loss": 1.2933, + "step": 2464 + }, + { + "epoch": 0.03, + "learning_rate": 4.452466907340554e-05, + "loss": 3.0796, + "step": 2466 + }, + { + "epoch": 0.03, + "learning_rate": 4.4520040729427014e-05, + "loss": 4.9143, + "step": 2468 + }, + { + "epoch": 0.03, + "learning_rate": 4.451541238544849e-05, + "loss": 0.6296, + "step": 2470 + }, + { + "epoch": 0.03, + "learning_rate": 4.4510784041469964e-05, + "loss": 4.4411, + "step": 2472 + }, + { + "epoch": 0.03, + "learning_rate": 4.450615569749144e-05, + "loss": 0.9448, + "step": 2474 + }, + { + "epoch": 0.03, + "learning_rate": 4.4501527353512915e-05, + "loss": 6.1768, + "step": 2476 + }, + { + "epoch": 0.03, + "learning_rate": 4.4496899009534394e-05, + "loss": 0.514, + "step": 2478 + }, + { + "epoch": 0.03, + "learning_rate": 4.4492270665555866e-05, + "loss": 0.9086, + "step": 2480 + }, + { + "epoch": 0.03, + "learning_rate": 4.4487642321577345e-05, + "loss": 5.5642, + "step": 2482 + }, + { + "epoch": 0.03, + "learning_rate": 4.448301397759882e-05, + "loss": 1.4092, + "step": 2484 + }, + { + "epoch": 0.03, + "learning_rate": 4.4478385633620296e-05, + "loss": 1.4034, + "step": 2486 + }, + { + "epoch": 0.03, + "learning_rate": 4.447375728964177e-05, + "loss": 7.0888, + "step": 2488 + }, + { + "epoch": 0.03, + "learning_rate": 4.4469128945663247e-05, + "loss": 5.1851, + "step": 2490 + }, + { + "epoch": 0.03, + "learning_rate": 4.4464500601684725e-05, + "loss": 4.5908, + "step": 2492 + }, + { + "epoch": 0.03, + "learning_rate": 4.445987225770619e-05, + "loss": 1.0254, + "step": 2494 + }, + { + "epoch": 0.03, + "learning_rate": 4.445524391372767e-05, + "loss": 3.7667, + "step": 2496 + }, + { + "epoch": 0.03, + "learning_rate": 4.445061556974914e-05, + "loss": 2.3717, + "step": 2498 + }, + { + "epoch": 0.03, + "learning_rate": 4.444598722577062e-05, + "loss": 0.2581, + "step": 2500 + }, + { + "epoch": 0.03, + "learning_rate": 4.444135888179209e-05, + "loss": 1.4121, + "step": 2502 + }, + { + "epoch": 0.03, + "learning_rate": 4.443673053781357e-05, + "loss": 6.1679, + "step": 2504 + }, + { + "epoch": 0.03, + "learning_rate": 4.443210219383504e-05, + "loss": 2.0511, + "step": 2506 + }, + { + "epoch": 0.03, + "learning_rate": 4.442747384985652e-05, + "loss": 1.4458, + "step": 2508 + }, + { + "epoch": 0.03, + "learning_rate": 4.4422845505877994e-05, + "loss": 1.7507, + "step": 2510 + }, + { + "epoch": 0.03, + "learning_rate": 4.441821716189947e-05, + "loss": 1.6248, + "step": 2512 + }, + { + "epoch": 0.03, + "learning_rate": 4.4413588817920945e-05, + "loss": 0.303, + "step": 2514 + }, + { + "epoch": 0.03, + "learning_rate": 4.4408960473942424e-05, + "loss": 2.1568, + "step": 2516 + }, + { + "epoch": 0.03, + "learning_rate": 4.44043321299639e-05, + "loss": 2.6031, + "step": 2518 + }, + { + "epoch": 0.03, + "learning_rate": 4.4399703785985375e-05, + "loss": 0.0373, + "step": 2520 + }, + { + "epoch": 0.03, + "learning_rate": 4.439507544200685e-05, + "loss": 0.6339, + "step": 2522 + }, + { + "epoch": 0.03, + "learning_rate": 4.4390447098028325e-05, + "loss": 1.3996, + "step": 2524 + }, + { + "epoch": 0.03, + "learning_rate": 4.4385818754049804e-05, + "loss": 2.5112, + "step": 2526 + }, + { + "epoch": 0.03, + "learning_rate": 4.4381190410071276e-05, + "loss": 5.8007, + "step": 2528 + }, + { + "epoch": 0.03, + "learning_rate": 4.4376562066092755e-05, + "loss": 5.9392, + "step": 2530 + }, + { + "epoch": 0.03, + "learning_rate": 4.437193372211423e-05, + "loss": 1.1424, + "step": 2532 + }, + { + "epoch": 0.03, + "learning_rate": 4.4367305378135706e-05, + "loss": 5.5354, + "step": 2534 + }, + { + "epoch": 0.03, + "learning_rate": 4.436267703415718e-05, + "loss": 0.9002, + "step": 2536 + }, + { + "epoch": 0.03, + "learning_rate": 4.435804869017866e-05, + "loss": 1.965, + "step": 2538 + }, + { + "epoch": 0.03, + "learning_rate": 4.435342034620013e-05, + "loss": 1.4898, + "step": 2540 + }, + { + "epoch": 0.03, + "learning_rate": 4.434879200222161e-05, + "loss": 6.6592, + "step": 2542 + }, + { + "epoch": 0.03, + "learning_rate": 4.434416365824308e-05, + "loss": 0.9809, + "step": 2544 + }, + { + "epoch": 0.03, + "learning_rate": 4.433953531426456e-05, + "loss": 2.5261, + "step": 2546 + }, + { + "epoch": 0.03, + "learning_rate": 4.433490697028603e-05, + "loss": 2.4459, + "step": 2548 + }, + { + "epoch": 0.03, + "learning_rate": 4.433027862630751e-05, + "loss": 1.0395, + "step": 2550 + }, + { + "epoch": 0.03, + "learning_rate": 4.432565028232898e-05, + "loss": 0.3818, + "step": 2552 + }, + { + "epoch": 0.03, + "learning_rate": 4.432102193835046e-05, + "loss": 3.5728, + "step": 2554 + }, + { + "epoch": 0.03, + "learning_rate": 4.431639359437193e-05, + "loss": 3.5446, + "step": 2556 + }, + { + "epoch": 0.03, + "learning_rate": 4.431176525039341e-05, + "loss": 3.4712, + "step": 2558 + }, + { + "epoch": 0.03, + "learning_rate": 4.430713690641489e-05, + "loss": 0.6659, + "step": 2560 + }, + { + "epoch": 0.03, + "learning_rate": 4.430250856243636e-05, + "loss": 4.5963, + "step": 2562 + }, + { + "epoch": 0.03, + "learning_rate": 4.429788021845784e-05, + "loss": 0.9314, + "step": 2564 + }, + { + "epoch": 0.03, + "learning_rate": 4.429325187447931e-05, + "loss": 2.7721, + "step": 2566 + }, + { + "epoch": 0.03, + "learning_rate": 4.428862353050079e-05, + "loss": 0.2912, + "step": 2568 + }, + { + "epoch": 0.03, + "learning_rate": 4.4283995186522264e-05, + "loss": 3.0583, + "step": 2570 + }, + { + "epoch": 0.03, + "learning_rate": 4.427936684254374e-05, + "loss": 2.545, + "step": 2572 + }, + { + "epoch": 0.03, + "learning_rate": 4.4274738498565214e-05, + "loss": 3.8111, + "step": 2574 + }, + { + "epoch": 0.03, + "learning_rate": 4.427011015458669e-05, + "loss": 3.7708, + "step": 2576 + }, + { + "epoch": 0.03, + "learning_rate": 4.4265481810608165e-05, + "loss": 5.1702, + "step": 2578 + }, + { + "epoch": 0.03, + "learning_rate": 4.4260853466629644e-05, + "loss": 0.6992, + "step": 2580 + }, + { + "epoch": 0.03, + "learning_rate": 4.4256225122651116e-05, + "loss": 2.4681, + "step": 2582 + }, + { + "epoch": 0.03, + "learning_rate": 4.4251596778672595e-05, + "loss": 4.0294, + "step": 2584 + }, + { + "epoch": 0.03, + "learning_rate": 4.424696843469407e-05, + "loss": 2.8139, + "step": 2586 + }, + { + "epoch": 0.03, + "learning_rate": 4.4242340090715546e-05, + "loss": 1.4004, + "step": 2588 + }, + { + "epoch": 0.03, + "learning_rate": 4.423771174673702e-05, + "loss": 3.3789, + "step": 2590 + }, + { + "epoch": 0.03, + "learning_rate": 4.4233083402758497e-05, + "loss": 2.332, + "step": 2592 + }, + { + "epoch": 0.03, + "learning_rate": 4.422845505877997e-05, + "loss": 1.0721, + "step": 2594 + }, + { + "epoch": 0.03, + "learning_rate": 4.422382671480145e-05, + "loss": 0.4702, + "step": 2596 + }, + { + "epoch": 0.03, + "learning_rate": 4.4219198370822926e-05, + "loss": 3.6235, + "step": 2598 + }, + { + "epoch": 0.03, + "learning_rate": 4.42145700268444e-05, + "loss": 2.7316, + "step": 2600 + }, + { + "epoch": 0.03, + "learning_rate": 4.420994168286588e-05, + "loss": 0.0648, + "step": 2602 + }, + { + "epoch": 0.03, + "learning_rate": 4.420531333888735e-05, + "loss": 3.0792, + "step": 2604 + }, + { + "epoch": 0.03, + "learning_rate": 4.420068499490883e-05, + "loss": 0.207, + "step": 2606 + }, + { + "epoch": 0.03, + "learning_rate": 4.41960566509303e-05, + "loss": 0.0044, + "step": 2608 + }, + { + "epoch": 0.03, + "learning_rate": 4.419142830695178e-05, + "loss": 7.9926, + "step": 2610 + }, + { + "epoch": 0.03, + "learning_rate": 4.418679996297325e-05, + "loss": 3.725, + "step": 2612 + }, + { + "epoch": 0.03, + "learning_rate": 4.418217161899473e-05, + "loss": 0.0093, + "step": 2614 + }, + { + "epoch": 0.03, + "learning_rate": 4.41775432750162e-05, + "loss": 0.4651, + "step": 2616 + }, + { + "epoch": 0.03, + "learning_rate": 4.417291493103768e-05, + "loss": 2.9808, + "step": 2618 + }, + { + "epoch": 0.03, + "learning_rate": 4.416828658705915e-05, + "loss": 0.3609, + "step": 2620 + }, + { + "epoch": 0.03, + "learning_rate": 4.416365824308063e-05, + "loss": 0.5582, + "step": 2622 + }, + { + "epoch": 0.03, + "learning_rate": 4.4159029899102103e-05, + "loss": 0.2891, + "step": 2624 + }, + { + "epoch": 0.03, + "learning_rate": 4.415440155512358e-05, + "loss": 3.892, + "step": 2626 + }, + { + "epoch": 0.03, + "learning_rate": 4.4149773211145054e-05, + "loss": 0.775, + "step": 2628 + }, + { + "epoch": 0.03, + "learning_rate": 4.414514486716653e-05, + "loss": 2.5052, + "step": 2630 + }, + { + "epoch": 0.03, + "learning_rate": 4.4140516523188005e-05, + "loss": 3.1075, + "step": 2632 + }, + { + "epoch": 0.03, + "learning_rate": 4.4135888179209484e-05, + "loss": 4.7003, + "step": 2634 + }, + { + "epoch": 0.03, + "learning_rate": 4.4131259835230956e-05, + "loss": 2.4242, + "step": 2636 + }, + { + "epoch": 0.03, + "learning_rate": 4.412663149125243e-05, + "loss": 7.5006, + "step": 2638 + }, + { + "epoch": 0.03, + "learning_rate": 4.412200314727391e-05, + "loss": 4.1007, + "step": 2640 + }, + { + "epoch": 0.03, + "learning_rate": 4.411737480329538e-05, + "loss": 1.8945, + "step": 2642 + }, + { + "epoch": 0.03, + "learning_rate": 4.411274645931686e-05, + "loss": 2.61, + "step": 2644 + }, + { + "epoch": 0.03, + "learning_rate": 4.410811811533833e-05, + "loss": 4.0779, + "step": 2646 + }, + { + "epoch": 0.03, + "learning_rate": 4.410348977135981e-05, + "loss": 1.3684, + "step": 2648 + }, + { + "epoch": 0.03, + "learning_rate": 4.409886142738128e-05, + "loss": 2.0648, + "step": 2650 + }, + { + "epoch": 0.03, + "learning_rate": 4.409423308340276e-05, + "loss": 5.6303, + "step": 2652 + }, + { + "epoch": 0.03, + "learning_rate": 4.408960473942423e-05, + "loss": 4.1201, + "step": 2654 + }, + { + "epoch": 0.03, + "learning_rate": 4.408497639544571e-05, + "loss": 2.9721, + "step": 2656 + }, + { + "epoch": 0.03, + "learning_rate": 4.408034805146718e-05, + "loss": 3.1875, + "step": 2658 + }, + { + "epoch": 0.03, + "learning_rate": 4.407571970748866e-05, + "loss": 3.6593, + "step": 2660 + }, + { + "epoch": 0.03, + "learning_rate": 4.407109136351013e-05, + "loss": 2.4537, + "step": 2662 + }, + { + "epoch": 0.03, + "learning_rate": 4.406646301953161e-05, + "loss": 3.2957, + "step": 2664 + }, + { + "epoch": 0.03, + "learning_rate": 4.406183467555309e-05, + "loss": 3.8032, + "step": 2666 + }, + { + "epoch": 0.03, + "learning_rate": 4.405720633157456e-05, + "loss": 2.9254, + "step": 2668 + }, + { + "epoch": 0.03, + "learning_rate": 4.405257798759604e-05, + "loss": 2.7486, + "step": 2670 + }, + { + "epoch": 0.03, + "learning_rate": 4.4047949643617514e-05, + "loss": 1.1837, + "step": 2672 + }, + { + "epoch": 0.03, + "learning_rate": 4.404332129963899e-05, + "loss": 2.7018, + "step": 2674 + }, + { + "epoch": 0.03, + "learning_rate": 4.4038692955660464e-05, + "loss": 3.6432, + "step": 2676 + }, + { + "epoch": 0.03, + "learning_rate": 4.403406461168194e-05, + "loss": 3.5784, + "step": 2678 + }, + { + "epoch": 0.03, + "learning_rate": 4.4029436267703415e-05, + "loss": 1.4645, + "step": 2680 + }, + { + "epoch": 0.03, + "learning_rate": 4.4024807923724894e-05, + "loss": 2.8162, + "step": 2682 + }, + { + "epoch": 0.03, + "learning_rate": 4.4020179579746366e-05, + "loss": 5.4228, + "step": 2684 + }, + { + "epoch": 0.03, + "learning_rate": 4.4015551235767845e-05, + "loss": 4.3235, + "step": 2686 + }, + { + "epoch": 0.03, + "learning_rate": 4.401092289178932e-05, + "loss": 3.1405, + "step": 2688 + }, + { + "epoch": 0.03, + "learning_rate": 4.4006294547810796e-05, + "loss": 3.6612, + "step": 2690 + }, + { + "epoch": 0.03, + "learning_rate": 4.400166620383227e-05, + "loss": 1.943, + "step": 2692 + }, + { + "epoch": 0.03, + "learning_rate": 4.399703785985375e-05, + "loss": 2.3171, + "step": 2694 + }, + { + "epoch": 0.03, + "learning_rate": 4.399240951587522e-05, + "loss": 2.9479, + "step": 2696 + }, + { + "epoch": 0.03, + "learning_rate": 4.39877811718967e-05, + "loss": 5.2004, + "step": 2698 + }, + { + "epoch": 0.03, + "learning_rate": 4.398315282791817e-05, + "loss": 2.3372, + "step": 2700 + }, + { + "epoch": 0.03, + "learning_rate": 4.397852448393965e-05, + "loss": 1.9641, + "step": 2702 + }, + { + "epoch": 0.03, + "learning_rate": 4.397389613996113e-05, + "loss": 5.2737, + "step": 2704 + }, + { + "epoch": 0.03, + "learning_rate": 4.39692677959826e-05, + "loss": 0.7862, + "step": 2706 + }, + { + "epoch": 0.03, + "learning_rate": 4.396463945200408e-05, + "loss": 3.2986, + "step": 2708 + }, + { + "epoch": 0.03, + "learning_rate": 4.396001110802555e-05, + "loss": 4.3655, + "step": 2710 + }, + { + "epoch": 0.03, + "learning_rate": 4.395538276404703e-05, + "loss": 1.5156, + "step": 2712 + }, + { + "epoch": 0.03, + "learning_rate": 4.39507544200685e-05, + "loss": 3.8107, + "step": 2714 + }, + { + "epoch": 0.03, + "learning_rate": 4.394612607608998e-05, + "loss": 3.4354, + "step": 2716 + }, + { + "epoch": 0.03, + "learning_rate": 4.394149773211145e-05, + "loss": 2.8509, + "step": 2718 + }, + { + "epoch": 0.03, + "learning_rate": 4.393686938813293e-05, + "loss": 4.3572, + "step": 2720 + }, + { + "epoch": 0.03, + "learning_rate": 4.39322410441544e-05, + "loss": 0.0784, + "step": 2722 + }, + { + "epoch": 0.03, + "learning_rate": 4.392761270017588e-05, + "loss": 1.6573, + "step": 2724 + }, + { + "epoch": 0.03, + "learning_rate": 4.3922984356197353e-05, + "loss": 0.7686, + "step": 2726 + }, + { + "epoch": 0.03, + "learning_rate": 4.391835601221883e-05, + "loss": 6.2967, + "step": 2728 + }, + { + "epoch": 0.03, + "learning_rate": 4.3913727668240304e-05, + "loss": 4.4603, + "step": 2730 + }, + { + "epoch": 0.03, + "learning_rate": 4.390909932426178e-05, + "loss": 1.0802, + "step": 2732 + }, + { + "epoch": 0.03, + "learning_rate": 4.3904470980283255e-05, + "loss": 4.3052, + "step": 2734 + }, + { + "epoch": 0.03, + "learning_rate": 4.3899842636304734e-05, + "loss": 2.7789, + "step": 2736 + }, + { + "epoch": 0.03, + "learning_rate": 4.3895214292326206e-05, + "loss": 0.0174, + "step": 2738 + }, + { + "epoch": 0.03, + "learning_rate": 4.3890585948347685e-05, + "loss": 0.5841, + "step": 2740 + }, + { + "epoch": 0.03, + "learning_rate": 4.388595760436916e-05, + "loss": 3.9594, + "step": 2742 + }, + { + "epoch": 0.03, + "learning_rate": 4.3881329260390636e-05, + "loss": 3.4283, + "step": 2744 + }, + { + "epoch": 0.03, + "learning_rate": 4.3876700916412114e-05, + "loss": 0.0334, + "step": 2746 + }, + { + "epoch": 0.03, + "learning_rate": 4.3872072572433587e-05, + "loss": 3.6736, + "step": 2748 + }, + { + "epoch": 0.03, + "learning_rate": 4.3867444228455065e-05, + "loss": 3.993, + "step": 2750 + }, + { + "epoch": 0.03, + "learning_rate": 4.386281588447654e-05, + "loss": 2.7755, + "step": 2752 + }, + { + "epoch": 0.03, + "learning_rate": 4.3858187540498016e-05, + "loss": 2.6507, + "step": 2754 + }, + { + "epoch": 0.03, + "learning_rate": 4.385355919651949e-05, + "loss": 2.7294, + "step": 2756 + }, + { + "epoch": 0.03, + "learning_rate": 4.384893085254097e-05, + "loss": 1.3126, + "step": 2758 + }, + { + "epoch": 0.03, + "learning_rate": 4.384430250856244e-05, + "loss": 2.0301, + "step": 2760 + }, + { + "epoch": 0.03, + "learning_rate": 4.383967416458392e-05, + "loss": 2.633, + "step": 2762 + }, + { + "epoch": 0.03, + "learning_rate": 4.383504582060539e-05, + "loss": 0.5288, + "step": 2764 + }, + { + "epoch": 0.03, + "learning_rate": 4.383041747662687e-05, + "loss": 1.1458, + "step": 2766 + }, + { + "epoch": 0.03, + "learning_rate": 4.382578913264834e-05, + "loss": 2.0533, + "step": 2768 + }, + { + "epoch": 0.03, + "learning_rate": 4.382116078866982e-05, + "loss": 1.4202, + "step": 2770 + }, + { + "epoch": 0.03, + "learning_rate": 4.381653244469129e-05, + "loss": 5.0797, + "step": 2772 + }, + { + "epoch": 0.03, + "learning_rate": 4.381190410071277e-05, + "loss": 1.7898, + "step": 2774 + }, + { + "epoch": 0.03, + "learning_rate": 4.380727575673424e-05, + "loss": 1.5671, + "step": 2776 + }, + { + "epoch": 0.03, + "learning_rate": 4.380264741275572e-05, + "loss": 2.1318, + "step": 2778 + }, + { + "epoch": 0.03, + "learning_rate": 4.379801906877719e-05, + "loss": 1.4095, + "step": 2780 + }, + { + "epoch": 0.03, + "learning_rate": 4.379339072479867e-05, + "loss": 0.0043, + "step": 2782 + }, + { + "epoch": 0.03, + "learning_rate": 4.3788762380820144e-05, + "loss": 1.1215, + "step": 2784 + }, + { + "epoch": 0.03, + "learning_rate": 4.3784134036841616e-05, + "loss": 1.2865, + "step": 2786 + }, + { + "epoch": 0.03, + "learning_rate": 4.3779505692863095e-05, + "loss": 5.3486, + "step": 2788 + }, + { + "epoch": 0.03, + "learning_rate": 4.377487734888457e-05, + "loss": 1.8235, + "step": 2790 + }, + { + "epoch": 0.03, + "learning_rate": 4.3770249004906046e-05, + "loss": 1.3124, + "step": 2792 + }, + { + "epoch": 0.03, + "learning_rate": 4.376562066092752e-05, + "loss": 4.5353, + "step": 2794 + }, + { + "epoch": 0.03, + "learning_rate": 4.3760992316949e-05, + "loss": 0.7145, + "step": 2796 + }, + { + "epoch": 0.03, + "learning_rate": 4.375636397297047e-05, + "loss": 3.7446, + "step": 2798 + }, + { + "epoch": 0.03, + "learning_rate": 4.375173562899195e-05, + "loss": 0.9431, + "step": 2800 + }, + { + "epoch": 0.03, + "learning_rate": 4.374710728501342e-05, + "loss": 1.3472, + "step": 2802 + }, + { + "epoch": 0.03, + "learning_rate": 4.37424789410349e-05, + "loss": 0.8382, + "step": 2804 + }, + { + "epoch": 0.03, + "learning_rate": 4.373785059705637e-05, + "loss": 3.288, + "step": 2806 + }, + { + "epoch": 0.03, + "learning_rate": 4.373322225307785e-05, + "loss": 2.5933, + "step": 2808 + }, + { + "epoch": 0.03, + "learning_rate": 4.372859390909932e-05, + "loss": 1.5494, + "step": 2810 + }, + { + "epoch": 0.03, + "learning_rate": 4.37239655651208e-05, + "loss": 0.9717, + "step": 2812 + }, + { + "epoch": 0.03, + "learning_rate": 4.371933722114228e-05, + "loss": 2.3439, + "step": 2814 + }, + { + "epoch": 0.03, + "learning_rate": 4.371470887716375e-05, + "loss": 2.4958, + "step": 2816 + }, + { + "epoch": 0.03, + "learning_rate": 4.371008053318523e-05, + "loss": 4.3141, + "step": 2818 + }, + { + "epoch": 0.03, + "learning_rate": 4.37054521892067e-05, + "loss": 2.8412, + "step": 2820 + }, + { + "epoch": 0.03, + "learning_rate": 4.370082384522818e-05, + "loss": 1.4193, + "step": 2822 + }, + { + "epoch": 0.03, + "learning_rate": 4.369619550124965e-05, + "loss": 3.127, + "step": 2824 + }, + { + "epoch": 0.03, + "learning_rate": 4.369156715727113e-05, + "loss": 3.2877, + "step": 2826 + }, + { + "epoch": 0.03, + "learning_rate": 4.3686938813292603e-05, + "loss": 0.975, + "step": 2828 + }, + { + "epoch": 0.03, + "learning_rate": 4.368231046931408e-05, + "loss": 1.605, + "step": 2830 + }, + { + "epoch": 0.03, + "learning_rate": 4.3677682125335554e-05, + "loss": 0.5697, + "step": 2832 + }, + { + "epoch": 0.03, + "learning_rate": 4.367305378135703e-05, + "loss": 4.5788, + "step": 2834 + }, + { + "epoch": 0.03, + "learning_rate": 4.3668425437378505e-05, + "loss": 0.7533, + "step": 2836 + }, + { + "epoch": 0.03, + "learning_rate": 4.3663797093399984e-05, + "loss": 2.2919, + "step": 2838 + }, + { + "epoch": 0.03, + "learning_rate": 4.3659168749421456e-05, + "loss": 0.5652, + "step": 2840 + }, + { + "epoch": 0.03, + "learning_rate": 4.3654540405442935e-05, + "loss": 6.8378, + "step": 2842 + }, + { + "epoch": 0.03, + "learning_rate": 4.364991206146441e-05, + "loss": 1.3532, + "step": 2844 + }, + { + "epoch": 0.03, + "learning_rate": 4.3645283717485886e-05, + "loss": 2.8194, + "step": 2846 + }, + { + "epoch": 0.03, + "learning_rate": 4.364065537350736e-05, + "loss": 5.205, + "step": 2848 + }, + { + "epoch": 0.03, + "learning_rate": 4.3636027029528837e-05, + "loss": 3.8505, + "step": 2850 + }, + { + "epoch": 0.03, + "learning_rate": 4.3631398685550315e-05, + "loss": 2.6481, + "step": 2852 + }, + { + "epoch": 0.03, + "learning_rate": 4.362677034157179e-05, + "loss": 2.8456, + "step": 2854 + }, + { + "epoch": 0.03, + "learning_rate": 4.3622141997593266e-05, + "loss": 4.1129, + "step": 2856 + }, + { + "epoch": 0.03, + "learning_rate": 4.361751365361474e-05, + "loss": 4.2937, + "step": 2858 + }, + { + "epoch": 0.03, + "learning_rate": 4.361288530963622e-05, + "loss": 2.4405, + "step": 2860 + }, + { + "epoch": 0.03, + "learning_rate": 4.360825696565769e-05, + "loss": 1.2435, + "step": 2862 + }, + { + "epoch": 0.03, + "learning_rate": 4.360362862167917e-05, + "loss": 2.9837, + "step": 2864 + }, + { + "epoch": 0.03, + "learning_rate": 4.359900027770064e-05, + "loss": 1.0911, + "step": 2866 + }, + { + "epoch": 0.03, + "learning_rate": 4.359437193372212e-05, + "loss": 1.2255, + "step": 2868 + }, + { + "epoch": 0.03, + "learning_rate": 4.358974358974359e-05, + "loss": 0.8968, + "step": 2870 + }, + { + "epoch": 0.03, + "learning_rate": 4.358511524576507e-05, + "loss": 4.6966, + "step": 2872 + }, + { + "epoch": 0.03, + "learning_rate": 4.358048690178654e-05, + "loss": 2.5355, + "step": 2874 + }, + { + "epoch": 0.03, + "learning_rate": 4.357585855780802e-05, + "loss": 2.2348, + "step": 2876 + }, + { + "epoch": 0.03, + "learning_rate": 4.357123021382949e-05, + "loss": 3.9803, + "step": 2878 + }, + { + "epoch": 0.03, + "learning_rate": 4.356660186985097e-05, + "loss": 1.894, + "step": 2880 + }, + { + "epoch": 0.03, + "learning_rate": 4.356197352587244e-05, + "loss": 1.7825, + "step": 2882 + }, + { + "epoch": 0.03, + "learning_rate": 4.355734518189392e-05, + "loss": 0.8478, + "step": 2884 + }, + { + "epoch": 0.03, + "learning_rate": 4.3552716837915394e-05, + "loss": 7.4051, + "step": 2886 + }, + { + "epoch": 0.03, + "learning_rate": 4.354808849393687e-05, + "loss": 3.6924, + "step": 2888 + }, + { + "epoch": 0.03, + "learning_rate": 4.3543460149958345e-05, + "loss": 2.776, + "step": 2890 + }, + { + "epoch": 0.03, + "learning_rate": 4.3538831805979824e-05, + "loss": 2.752, + "step": 2892 + }, + { + "epoch": 0.03, + "learning_rate": 4.35342034620013e-05, + "loss": 1.3285, + "step": 2894 + }, + { + "epoch": 0.03, + "learning_rate": 4.3529575118022775e-05, + "loss": 1.9698, + "step": 2896 + }, + { + "epoch": 0.03, + "learning_rate": 4.3524946774044254e-05, + "loss": 1.5089, + "step": 2898 + }, + { + "epoch": 0.03, + "learning_rate": 4.3520318430065726e-05, + "loss": 1.1621, + "step": 2900 + }, + { + "epoch": 0.03, + "learning_rate": 4.3515690086087204e-05, + "loss": 3.0624, + "step": 2902 + }, + { + "epoch": 0.03, + "learning_rate": 4.3511061742108676e-05, + "loss": 3.4497, + "step": 2904 + }, + { + "epoch": 0.03, + "learning_rate": 4.3506433398130155e-05, + "loss": 1.9959, + "step": 2906 + }, + { + "epoch": 0.03, + "learning_rate": 4.350180505415163e-05, + "loss": 6.8955, + "step": 2908 + }, + { + "epoch": 0.03, + "learning_rate": 4.3497176710173106e-05, + "loss": 2.1801, + "step": 2910 + }, + { + "epoch": 0.03, + "learning_rate": 4.349254836619458e-05, + "loss": 2.7468, + "step": 2912 + }, + { + "epoch": 0.03, + "learning_rate": 4.348792002221606e-05, + "loss": 2.1356, + "step": 2914 + }, + { + "epoch": 0.03, + "learning_rate": 4.348329167823753e-05, + "loss": 2.996, + "step": 2916 + }, + { + "epoch": 0.03, + "learning_rate": 4.347866333425901e-05, + "loss": 1.6563, + "step": 2918 + }, + { + "epoch": 0.03, + "learning_rate": 4.347403499028048e-05, + "loss": 2.8704, + "step": 2920 + }, + { + "epoch": 0.03, + "learning_rate": 4.346940664630196e-05, + "loss": 1.5192, + "step": 2922 + }, + { + "epoch": 0.03, + "learning_rate": 4.346477830232343e-05, + "loss": 1.6347, + "step": 2924 + }, + { + "epoch": 0.03, + "learning_rate": 4.346014995834491e-05, + "loss": 3.7213, + "step": 2926 + }, + { + "epoch": 0.03, + "learning_rate": 4.345552161436638e-05, + "loss": 2.5478, + "step": 2928 + }, + { + "epoch": 0.03, + "learning_rate": 4.3450893270387854e-05, + "loss": 1.9151, + "step": 2930 + }, + { + "epoch": 0.03, + "learning_rate": 4.344626492640933e-05, + "loss": 3.5758, + "step": 2932 + }, + { + "epoch": 0.03, + "learning_rate": 4.3441636582430804e-05, + "loss": 1.8049, + "step": 2934 + }, + { + "epoch": 0.03, + "learning_rate": 4.343700823845228e-05, + "loss": 3.7361, + "step": 2936 + }, + { + "epoch": 0.03, + "learning_rate": 4.3432379894473755e-05, + "loss": 1.9746, + "step": 2938 + }, + { + "epoch": 0.03, + "learning_rate": 4.3427751550495234e-05, + "loss": 1.2504, + "step": 2940 + }, + { + "epoch": 0.03, + "learning_rate": 4.3423123206516706e-05, + "loss": 2.9904, + "step": 2942 + }, + { + "epoch": 0.03, + "learning_rate": 4.3418494862538185e-05, + "loss": 4.0428, + "step": 2944 + }, + { + "epoch": 0.03, + "learning_rate": 4.341386651855966e-05, + "loss": 3.0578, + "step": 2946 + }, + { + "epoch": 0.03, + "learning_rate": 4.3409238174581136e-05, + "loss": 3.4413, + "step": 2948 + }, + { + "epoch": 0.03, + "learning_rate": 4.340460983060261e-05, + "loss": 2.0494, + "step": 2950 + }, + { + "epoch": 0.03, + "learning_rate": 4.3399981486624087e-05, + "loss": 1.621, + "step": 2952 + }, + { + "epoch": 0.03, + "learning_rate": 4.339535314264556e-05, + "loss": 2.9422, + "step": 2954 + }, + { + "epoch": 0.03, + "learning_rate": 4.339072479866704e-05, + "loss": 0.8094, + "step": 2956 + }, + { + "epoch": 0.03, + "learning_rate": 4.3386096454688516e-05, + "loss": 1.1235, + "step": 2958 + }, + { + "epoch": 0.03, + "learning_rate": 4.338146811070999e-05, + "loss": 0.8384, + "step": 2960 + }, + { + "epoch": 0.03, + "learning_rate": 4.337683976673147e-05, + "loss": 1.6409, + "step": 2962 + }, + { + "epoch": 0.03, + "learning_rate": 4.337221142275294e-05, + "loss": 1.2324, + "step": 2964 + }, + { + "epoch": 0.03, + "learning_rate": 4.336758307877442e-05, + "loss": 3.5691, + "step": 2966 + }, + { + "epoch": 0.03, + "learning_rate": 4.336295473479589e-05, + "loss": 5.449, + "step": 2968 + }, + { + "epoch": 0.03, + "learning_rate": 4.335832639081737e-05, + "loss": 1.2248, + "step": 2970 + }, + { + "epoch": 0.03, + "learning_rate": 4.335369804683884e-05, + "loss": 2.1359, + "step": 2972 + }, + { + "epoch": 0.03, + "learning_rate": 4.334906970286032e-05, + "loss": 2.8838, + "step": 2974 + }, + { + "epoch": 0.03, + "learning_rate": 4.334444135888179e-05, + "loss": 4.8468, + "step": 2976 + }, + { + "epoch": 0.03, + "learning_rate": 4.333981301490327e-05, + "loss": 3.0735, + "step": 2978 + }, + { + "epoch": 0.03, + "learning_rate": 4.333518467092474e-05, + "loss": 2.0088, + "step": 2980 + }, + { + "epoch": 0.03, + "learning_rate": 4.333055632694622e-05, + "loss": 6.8113, + "step": 2982 + }, + { + "epoch": 0.03, + "learning_rate": 4.332592798296769e-05, + "loss": 1.7866, + "step": 2984 + }, + { + "epoch": 0.03, + "learning_rate": 4.332129963898917e-05, + "loss": 1.5066, + "step": 2986 + }, + { + "epoch": 0.03, + "learning_rate": 4.3316671295010644e-05, + "loss": 3.4974, + "step": 2988 + }, + { + "epoch": 0.03, + "learning_rate": 4.331204295103212e-05, + "loss": 2.2421, + "step": 2990 + }, + { + "epoch": 0.03, + "learning_rate": 4.3307414607053595e-05, + "loss": 2.6757, + "step": 2992 + }, + { + "epoch": 0.03, + "learning_rate": 4.3302786263075074e-05, + "loss": 2.9015, + "step": 2994 + }, + { + "epoch": 0.03, + "learning_rate": 4.3298157919096546e-05, + "loss": 1.4698, + "step": 2996 + }, + { + "epoch": 0.03, + "learning_rate": 4.3293529575118025e-05, + "loss": 2.3329, + "step": 2998 + }, + { + "epoch": 0.03, + "learning_rate": 4.3288901231139504e-05, + "loss": 1.6533, + "step": 3000 + }, + { + "epoch": 0.03, + "learning_rate": 4.3284272887160976e-05, + "loss": 0.5187, + "step": 3002 + }, + { + "epoch": 0.03, + "learning_rate": 4.3279644543182454e-05, + "loss": 4.2235, + "step": 3004 + }, + { + "epoch": 0.03, + "learning_rate": 4.3275016199203926e-05, + "loss": 1.8987, + "step": 3006 + }, + { + "epoch": 0.03, + "learning_rate": 4.3270387855225405e-05, + "loss": 1.5289, + "step": 3008 + }, + { + "epoch": 0.03, + "learning_rate": 4.326575951124688e-05, + "loss": 0.0567, + "step": 3010 + }, + { + "epoch": 0.03, + "learning_rate": 4.3261131167268356e-05, + "loss": 9.8389, + "step": 3012 + }, + { + "epoch": 0.03, + "learning_rate": 4.325650282328983e-05, + "loss": 2.1317, + "step": 3014 + }, + { + "epoch": 0.03, + "learning_rate": 4.325187447931131e-05, + "loss": 2.8164, + "step": 3016 + }, + { + "epoch": 0.03, + "learning_rate": 4.324724613533278e-05, + "loss": 1.89, + "step": 3018 + }, + { + "epoch": 0.03, + "learning_rate": 4.324261779135426e-05, + "loss": 3.6251, + "step": 3020 + }, + { + "epoch": 0.03, + "learning_rate": 4.323798944737573e-05, + "loss": 2.2447, + "step": 3022 + }, + { + "epoch": 0.03, + "learning_rate": 4.323336110339721e-05, + "loss": 6.4708, + "step": 3024 + }, + { + "epoch": 0.03, + "learning_rate": 4.322873275941868e-05, + "loss": 2.7006, + "step": 3026 + }, + { + "epoch": 0.03, + "learning_rate": 4.322410441544016e-05, + "loss": 1.7226, + "step": 3028 + }, + { + "epoch": 0.03, + "learning_rate": 4.321947607146163e-05, + "loss": 0.0546, + "step": 3030 + }, + { + "epoch": 0.03, + "learning_rate": 4.321484772748311e-05, + "loss": 1.5409, + "step": 3032 + }, + { + "epoch": 0.03, + "learning_rate": 4.321021938350458e-05, + "loss": 7.4564, + "step": 3034 + }, + { + "epoch": 0.03, + "learning_rate": 4.320559103952606e-05, + "loss": 0.9203, + "step": 3036 + }, + { + "epoch": 0.03, + "learning_rate": 4.320096269554753e-05, + "loss": 0.6046, + "step": 3038 + }, + { + "epoch": 0.04, + "learning_rate": 4.319633435156901e-05, + "loss": 1.8553, + "step": 3040 + }, + { + "epoch": 0.04, + "learning_rate": 4.319170600759049e-05, + "loss": 0.7454, + "step": 3042 + }, + { + "epoch": 0.04, + "learning_rate": 4.318707766361196e-05, + "loss": 7.2183, + "step": 3044 + }, + { + "epoch": 0.04, + "learning_rate": 4.318244931963344e-05, + "loss": 3.8499, + "step": 3046 + }, + { + "epoch": 0.04, + "learning_rate": 4.3177820975654914e-05, + "loss": 1.0101, + "step": 3048 + }, + { + "epoch": 0.04, + "learning_rate": 4.317319263167639e-05, + "loss": 0.6399, + "step": 3050 + }, + { + "epoch": 0.04, + "learning_rate": 4.3168564287697865e-05, + "loss": 2.9095, + "step": 3052 + }, + { + "epoch": 0.04, + "learning_rate": 4.3163935943719343e-05, + "loss": 6.8418, + "step": 3054 + }, + { + "epoch": 0.04, + "learning_rate": 4.3159307599740815e-05, + "loss": 3.5289, + "step": 3056 + }, + { + "epoch": 0.04, + "learning_rate": 4.3154679255762294e-05, + "loss": 2.098, + "step": 3058 + }, + { + "epoch": 0.04, + "learning_rate": 4.3150050911783766e-05, + "loss": 2.8032, + "step": 3060 + }, + { + "epoch": 0.04, + "learning_rate": 4.3145422567805245e-05, + "loss": 2.3119, + "step": 3062 + }, + { + "epoch": 0.04, + "learning_rate": 4.314079422382672e-05, + "loss": 1.8421, + "step": 3064 + }, + { + "epoch": 0.04, + "learning_rate": 4.3136165879848196e-05, + "loss": 3.1715, + "step": 3066 + }, + { + "epoch": 0.04, + "learning_rate": 4.313153753586967e-05, + "loss": 3.8869, + "step": 3068 + }, + { + "epoch": 0.04, + "learning_rate": 4.312690919189115e-05, + "loss": 3.4876, + "step": 3070 + }, + { + "epoch": 0.04, + "learning_rate": 4.312228084791262e-05, + "loss": 0.4605, + "step": 3072 + }, + { + "epoch": 0.04, + "learning_rate": 4.311765250393409e-05, + "loss": 4.0718, + "step": 3074 + }, + { + "epoch": 0.04, + "learning_rate": 4.311302415995557e-05, + "loss": 1.8317, + "step": 3076 + }, + { + "epoch": 0.04, + "learning_rate": 4.310839581597704e-05, + "loss": 0.2, + "step": 3078 + }, + { + "epoch": 0.04, + "learning_rate": 4.310376747199852e-05, + "loss": 1.9115, + "step": 3080 + }, + { + "epoch": 0.04, + "learning_rate": 4.309913912801999e-05, + "loss": 2.8866, + "step": 3082 + }, + { + "epoch": 0.04, + "learning_rate": 4.309451078404147e-05, + "loss": 0.0458, + "step": 3084 + }, + { + "epoch": 0.04, + "learning_rate": 4.3089882440062943e-05, + "loss": 5.833, + "step": 3086 + }, + { + "epoch": 0.04, + "learning_rate": 4.308525409608442e-05, + "loss": 1.1995, + "step": 3088 + }, + { + "epoch": 0.04, + "learning_rate": 4.3080625752105894e-05, + "loss": 0.9039, + "step": 3090 + }, + { + "epoch": 0.04, + "learning_rate": 4.307599740812737e-05, + "loss": 5.8866, + "step": 3092 + }, + { + "epoch": 0.04, + "learning_rate": 4.3071369064148845e-05, + "loss": 0.7266, + "step": 3094 + }, + { + "epoch": 0.04, + "learning_rate": 4.3066740720170324e-05, + "loss": 0.6991, + "step": 3096 + }, + { + "epoch": 0.04, + "learning_rate": 4.3062112376191796e-05, + "loss": 1.9344, + "step": 3098 + }, + { + "epoch": 0.04, + "learning_rate": 4.3057484032213275e-05, + "loss": 1.4056, + "step": 3100 + }, + { + "epoch": 0.04, + "learning_rate": 4.305285568823475e-05, + "loss": 1.3197, + "step": 3102 + }, + { + "epoch": 0.04, + "learning_rate": 4.3048227344256226e-05, + "loss": 2.046, + "step": 3104 + }, + { + "epoch": 0.04, + "learning_rate": 4.3043599000277704e-05, + "loss": 0.0402, + "step": 3106 + }, + { + "epoch": 0.04, + "learning_rate": 4.3038970656299176e-05, + "loss": 0.374, + "step": 3108 + }, + { + "epoch": 0.04, + "learning_rate": 4.3034342312320655e-05, + "loss": 3.9081, + "step": 3110 + }, + { + "epoch": 0.04, + "learning_rate": 4.302971396834213e-05, + "loss": 7.0488, + "step": 3112 + }, + { + "epoch": 0.04, + "learning_rate": 4.3025085624363606e-05, + "loss": 7.4802, + "step": 3114 + }, + { + "epoch": 0.04, + "learning_rate": 4.302045728038508e-05, + "loss": 2.5635, + "step": 3116 + }, + { + "epoch": 0.04, + "learning_rate": 4.301582893640656e-05, + "loss": 0.0342, + "step": 3118 + }, + { + "epoch": 0.04, + "learning_rate": 4.301120059242803e-05, + "loss": 1.1016, + "step": 3120 + }, + { + "epoch": 0.04, + "learning_rate": 4.300657224844951e-05, + "loss": 1.1263, + "step": 3122 + }, + { + "epoch": 0.04, + "learning_rate": 4.300194390447098e-05, + "loss": 4.8935, + "step": 3124 + }, + { + "epoch": 0.04, + "learning_rate": 4.299731556049246e-05, + "loss": 2.4525, + "step": 3126 + }, + { + "epoch": 0.04, + "learning_rate": 4.299268721651393e-05, + "loss": 2.3383, + "step": 3128 + }, + { + "epoch": 0.04, + "learning_rate": 4.298805887253541e-05, + "loss": 1.8077, + "step": 3130 + }, + { + "epoch": 0.04, + "learning_rate": 4.298343052855688e-05, + "loss": 0.4661, + "step": 3132 + }, + { + "epoch": 0.04, + "learning_rate": 4.297880218457836e-05, + "loss": 7.0783, + "step": 3134 + }, + { + "epoch": 0.04, + "learning_rate": 4.297417384059983e-05, + "loss": 3.5726, + "step": 3136 + }, + { + "epoch": 0.04, + "learning_rate": 4.296954549662131e-05, + "loss": 1.1502, + "step": 3138 + }, + { + "epoch": 0.04, + "learning_rate": 4.296491715264278e-05, + "loss": 2.4616, + "step": 3140 + }, + { + "epoch": 0.04, + "learning_rate": 4.296028880866426e-05, + "loss": 4.9733, + "step": 3142 + }, + { + "epoch": 0.04, + "learning_rate": 4.2955660464685734e-05, + "loss": 2.754, + "step": 3144 + }, + { + "epoch": 0.04, + "learning_rate": 4.295103212070721e-05, + "loss": 3.1002, + "step": 3146 + }, + { + "epoch": 0.04, + "learning_rate": 4.294640377672869e-05, + "loss": 6.7086, + "step": 3148 + }, + { + "epoch": 0.04, + "learning_rate": 4.2941775432750164e-05, + "loss": 3.1922, + "step": 3150 + }, + { + "epoch": 0.04, + "learning_rate": 4.293714708877164e-05, + "loss": 4.0187, + "step": 3152 + }, + { + "epoch": 0.04, + "learning_rate": 4.2932518744793115e-05, + "loss": 2.4182, + "step": 3154 + }, + { + "epoch": 0.04, + "learning_rate": 4.2927890400814593e-05, + "loss": 3.0794, + "step": 3156 + }, + { + "epoch": 0.04, + "learning_rate": 4.2923262056836065e-05, + "loss": 1.2806, + "step": 3158 + }, + { + "epoch": 0.04, + "learning_rate": 4.2918633712857544e-05, + "loss": 1.2764, + "step": 3160 + }, + { + "epoch": 0.04, + "learning_rate": 4.2914005368879016e-05, + "loss": 4.7056, + "step": 3162 + }, + { + "epoch": 0.04, + "learning_rate": 4.2909377024900495e-05, + "loss": 2.0243, + "step": 3164 + }, + { + "epoch": 0.04, + "learning_rate": 4.290474868092197e-05, + "loss": 4.0551, + "step": 3166 + }, + { + "epoch": 0.04, + "learning_rate": 4.2900120336943446e-05, + "loss": 0.6186, + "step": 3168 + }, + { + "epoch": 0.04, + "learning_rate": 4.289549199296492e-05, + "loss": 2.2275, + "step": 3170 + }, + { + "epoch": 0.04, + "learning_rate": 4.28908636489864e-05, + "loss": 2.5112, + "step": 3172 + }, + { + "epoch": 0.04, + "learning_rate": 4.288623530500787e-05, + "loss": 1.2749, + "step": 3174 + }, + { + "epoch": 0.04, + "learning_rate": 4.288160696102935e-05, + "loss": 3.9905, + "step": 3176 + }, + { + "epoch": 0.04, + "learning_rate": 4.287697861705082e-05, + "loss": 3.2244, + "step": 3178 + }, + { + "epoch": 0.04, + "learning_rate": 4.28723502730723e-05, + "loss": 6.6179, + "step": 3180 + }, + { + "epoch": 0.04, + "learning_rate": 4.286772192909377e-05, + "loss": 1.2725, + "step": 3182 + }, + { + "epoch": 0.04, + "learning_rate": 4.286309358511525e-05, + "loss": 0.0003, + "step": 3184 + }, + { + "epoch": 0.04, + "learning_rate": 4.285846524113673e-05, + "loss": 1.1506, + "step": 3186 + }, + { + "epoch": 0.04, + "learning_rate": 4.28538368971582e-05, + "loss": 2.4248, + "step": 3188 + }, + { + "epoch": 0.04, + "learning_rate": 4.284920855317968e-05, + "loss": 1.6929, + "step": 3190 + }, + { + "epoch": 0.04, + "learning_rate": 4.284458020920115e-05, + "loss": 2.8815, + "step": 3192 + }, + { + "epoch": 0.04, + "learning_rate": 4.283995186522263e-05, + "loss": 4.4427, + "step": 3194 + }, + { + "epoch": 0.04, + "learning_rate": 4.28353235212441e-05, + "loss": 3.1305, + "step": 3196 + }, + { + "epoch": 0.04, + "learning_rate": 4.283069517726558e-05, + "loss": 0.8681, + "step": 3198 + }, + { + "epoch": 0.04, + "learning_rate": 4.282606683328705e-05, + "loss": 1.4018, + "step": 3200 + }, + { + "epoch": 0.04, + "learning_rate": 4.282143848930853e-05, + "loss": 1.5007, + "step": 3202 + }, + { + "epoch": 0.04, + "learning_rate": 4.2816810145330004e-05, + "loss": 4.7174, + "step": 3204 + }, + { + "epoch": 0.04, + "learning_rate": 4.281218180135148e-05, + "loss": 3.5461, + "step": 3206 + }, + { + "epoch": 0.04, + "learning_rate": 4.2807553457372954e-05, + "loss": 3.2717, + "step": 3208 + }, + { + "epoch": 0.04, + "learning_rate": 4.280292511339443e-05, + "loss": 0.0314, + "step": 3210 + }, + { + "epoch": 0.04, + "learning_rate": 4.2798296769415905e-05, + "loss": 0.5393, + "step": 3212 + }, + { + "epoch": 0.04, + "learning_rate": 4.2793668425437384e-05, + "loss": 0.0365, + "step": 3214 + }, + { + "epoch": 0.04, + "learning_rate": 4.2789040081458856e-05, + "loss": 2.0317, + "step": 3216 + }, + { + "epoch": 0.04, + "learning_rate": 4.278441173748033e-05, + "loss": 0.8583, + "step": 3218 + }, + { + "epoch": 0.04, + "learning_rate": 4.277978339350181e-05, + "loss": 6.0489, + "step": 3220 + }, + { + "epoch": 0.04, + "learning_rate": 4.277515504952328e-05, + "loss": 2.9371, + "step": 3222 + }, + { + "epoch": 0.04, + "learning_rate": 4.277052670554476e-05, + "loss": 1.0969, + "step": 3224 + }, + { + "epoch": 0.04, + "learning_rate": 4.276589836156623e-05, + "loss": 0.9496, + "step": 3226 + }, + { + "epoch": 0.04, + "learning_rate": 4.276127001758771e-05, + "loss": 2.5164, + "step": 3228 + }, + { + "epoch": 0.04, + "learning_rate": 4.275664167360918e-05, + "loss": 8.0292, + "step": 3230 + }, + { + "epoch": 0.04, + "learning_rate": 4.275201332963066e-05, + "loss": 2.4253, + "step": 3232 + }, + { + "epoch": 0.04, + "learning_rate": 4.274738498565213e-05, + "loss": 3.5435, + "step": 3234 + }, + { + "epoch": 0.04, + "learning_rate": 4.274275664167361e-05, + "loss": 4.0552, + "step": 3236 + }, + { + "epoch": 0.04, + "learning_rate": 4.273812829769508e-05, + "loss": 3.2006, + "step": 3238 + }, + { + "epoch": 0.04, + "learning_rate": 4.273349995371656e-05, + "loss": 3.2304, + "step": 3240 + }, + { + "epoch": 0.04, + "learning_rate": 4.272887160973803e-05, + "loss": 1.9212, + "step": 3242 + }, + { + "epoch": 0.04, + "learning_rate": 4.272424326575951e-05, + "loss": 2.2574, + "step": 3244 + }, + { + "epoch": 0.04, + "learning_rate": 4.2719614921780984e-05, + "loss": 5.3556, + "step": 3246 + }, + { + "epoch": 0.04, + "learning_rate": 4.271498657780246e-05, + "loss": 3.1678, + "step": 3248 + }, + { + "epoch": 0.04, + "learning_rate": 4.2710358233823935e-05, + "loss": 2.3383, + "step": 3250 + }, + { + "epoch": 0.04, + "learning_rate": 4.2705729889845414e-05, + "loss": 2.4842, + "step": 3252 + }, + { + "epoch": 0.04, + "learning_rate": 4.270110154586689e-05, + "loss": 0.0118, + "step": 3254 + }, + { + "epoch": 0.04, + "learning_rate": 4.2696473201888365e-05, + "loss": 4.3121, + "step": 3256 + }, + { + "epoch": 0.04, + "learning_rate": 4.2691844857909843e-05, + "loss": 1.8563, + "step": 3258 + }, + { + "epoch": 0.04, + "learning_rate": 4.2687216513931316e-05, + "loss": 0.1075, + "step": 3260 + }, + { + "epoch": 0.04, + "learning_rate": 4.2682588169952794e-05, + "loss": 1.7577, + "step": 3262 + }, + { + "epoch": 0.04, + "learning_rate": 4.2677959825974266e-05, + "loss": 2.1716, + "step": 3264 + }, + { + "epoch": 0.04, + "learning_rate": 4.2673331481995745e-05, + "loss": 3.6116, + "step": 3266 + }, + { + "epoch": 0.04, + "learning_rate": 4.266870313801722e-05, + "loss": 0.2518, + "step": 3268 + }, + { + "epoch": 0.04, + "learning_rate": 4.2664074794038696e-05, + "loss": 0.7862, + "step": 3270 + }, + { + "epoch": 0.04, + "learning_rate": 4.265944645006017e-05, + "loss": 0.2902, + "step": 3272 + }, + { + "epoch": 0.04, + "learning_rate": 4.265481810608165e-05, + "loss": 1.3783, + "step": 3274 + }, + { + "epoch": 0.04, + "learning_rate": 4.265018976210312e-05, + "loss": 4.187, + "step": 3276 + }, + { + "epoch": 0.04, + "learning_rate": 4.26455614181246e-05, + "loss": 2.8048, + "step": 3278 + }, + { + "epoch": 0.04, + "learning_rate": 4.264093307414607e-05, + "loss": 1.9152, + "step": 3280 + }, + { + "epoch": 0.04, + "learning_rate": 4.263630473016755e-05, + "loss": 1.3466, + "step": 3282 + }, + { + "epoch": 0.04, + "learning_rate": 4.263167638618902e-05, + "loss": 6.0907, + "step": 3284 + }, + { + "epoch": 0.04, + "learning_rate": 4.26270480422105e-05, + "loss": 1.5592, + "step": 3286 + }, + { + "epoch": 0.04, + "learning_rate": 4.262241969823197e-05, + "loss": 0.7997, + "step": 3288 + }, + { + "epoch": 0.04, + "learning_rate": 4.261779135425345e-05, + "loss": 5.2901, + "step": 3290 + }, + { + "epoch": 0.04, + "learning_rate": 4.261316301027493e-05, + "loss": 1.6352, + "step": 3292 + }, + { + "epoch": 0.04, + "learning_rate": 4.26085346662964e-05, + "loss": 3.447, + "step": 3294 + }, + { + "epoch": 0.04, + "learning_rate": 4.260390632231788e-05, + "loss": 2.47, + "step": 3296 + }, + { + "epoch": 0.04, + "learning_rate": 4.259927797833935e-05, + "loss": 1.4011, + "step": 3298 + }, + { + "epoch": 0.04, + "learning_rate": 4.259464963436083e-05, + "loss": 0.186, + "step": 3300 + }, + { + "epoch": 0.04, + "learning_rate": 4.25900212903823e-05, + "loss": 2.8851, + "step": 3302 + }, + { + "epoch": 0.04, + "learning_rate": 4.258539294640378e-05, + "loss": 0.9522, + "step": 3304 + }, + { + "epoch": 0.04, + "learning_rate": 4.2580764602425254e-05, + "loss": 1.1656, + "step": 3306 + }, + { + "epoch": 0.04, + "learning_rate": 4.257613625844673e-05, + "loss": 6.3406, + "step": 3308 + }, + { + "epoch": 0.04, + "learning_rate": 4.2571507914468205e-05, + "loss": 3.9567, + "step": 3310 + }, + { + "epoch": 0.04, + "learning_rate": 4.256687957048968e-05, + "loss": 1.6652, + "step": 3312 + }, + { + "epoch": 0.04, + "learning_rate": 4.2562251226511155e-05, + "loss": 1.4155, + "step": 3314 + }, + { + "epoch": 0.04, + "learning_rate": 4.2557622882532634e-05, + "loss": 4.0032, + "step": 3316 + }, + { + "epoch": 0.04, + "learning_rate": 4.2552994538554106e-05, + "loss": 1.365, + "step": 3318 + }, + { + "epoch": 0.04, + "learning_rate": 4.2548366194575585e-05, + "loss": 2.2116, + "step": 3320 + }, + { + "epoch": 0.04, + "learning_rate": 4.254373785059706e-05, + "loss": 3.6597, + "step": 3322 + }, + { + "epoch": 0.04, + "learning_rate": 4.2539109506618536e-05, + "loss": 2.9412, + "step": 3324 + }, + { + "epoch": 0.04, + "learning_rate": 4.253448116264001e-05, + "loss": 4.1019, + "step": 3326 + }, + { + "epoch": 0.04, + "learning_rate": 4.252985281866149e-05, + "loss": 3.5006, + "step": 3328 + }, + { + "epoch": 0.04, + "learning_rate": 4.252522447468296e-05, + "loss": 0.6264, + "step": 3330 + }, + { + "epoch": 0.04, + "learning_rate": 4.252059613070444e-05, + "loss": 1.0123, + "step": 3332 + }, + { + "epoch": 0.04, + "learning_rate": 4.2515967786725916e-05, + "loss": 2.6124, + "step": 3334 + }, + { + "epoch": 0.04, + "learning_rate": 4.251133944274739e-05, + "loss": 2.4115, + "step": 3336 + }, + { + "epoch": 0.04, + "learning_rate": 4.250671109876887e-05, + "loss": 2.7729, + "step": 3338 + }, + { + "epoch": 0.04, + "learning_rate": 4.250208275479034e-05, + "loss": 1.8189, + "step": 3340 + }, + { + "epoch": 0.04, + "learning_rate": 4.249745441081182e-05, + "loss": 2.9102, + "step": 3342 + }, + { + "epoch": 0.04, + "learning_rate": 4.249282606683329e-05, + "loss": 0.4764, + "step": 3344 + }, + { + "epoch": 0.04, + "learning_rate": 4.248819772285477e-05, + "loss": 5.0997, + "step": 3346 + }, + { + "epoch": 0.04, + "learning_rate": 4.248356937887624e-05, + "loss": 0.793, + "step": 3348 + }, + { + "epoch": 0.04, + "learning_rate": 4.247894103489772e-05, + "loss": 0.7547, + "step": 3350 + }, + { + "epoch": 0.04, + "learning_rate": 4.247431269091919e-05, + "loss": 0.959, + "step": 3352 + }, + { + "epoch": 0.04, + "learning_rate": 4.246968434694067e-05, + "loss": 1.7352, + "step": 3354 + }, + { + "epoch": 0.04, + "learning_rate": 4.246505600296214e-05, + "loss": 0.0378, + "step": 3356 + }, + { + "epoch": 0.04, + "learning_rate": 4.246042765898362e-05, + "loss": 0.1231, + "step": 3358 + }, + { + "epoch": 0.04, + "learning_rate": 4.2455799315005094e-05, + "loss": 1.2512, + "step": 3360 + }, + { + "epoch": 0.04, + "learning_rate": 4.245117097102657e-05, + "loss": 2.6686, + "step": 3362 + }, + { + "epoch": 0.04, + "learning_rate": 4.2446542627048044e-05, + "loss": 4.1809, + "step": 3364 + }, + { + "epoch": 0.04, + "learning_rate": 4.2441914283069516e-05, + "loss": 1.5655, + "step": 3366 + }, + { + "epoch": 0.04, + "learning_rate": 4.2437285939090995e-05, + "loss": 5.4001, + "step": 3368 + }, + { + "epoch": 0.04, + "learning_rate": 4.243265759511247e-05, + "loss": 3.8123, + "step": 3370 + }, + { + "epoch": 0.04, + "learning_rate": 4.2428029251133946e-05, + "loss": 1.4871, + "step": 3372 + }, + { + "epoch": 0.04, + "learning_rate": 4.242340090715542e-05, + "loss": 0.3414, + "step": 3374 + }, + { + "epoch": 0.04, + "learning_rate": 4.24187725631769e-05, + "loss": 2.6262, + "step": 3376 + }, + { + "epoch": 0.04, + "learning_rate": 4.241414421919837e-05, + "loss": 0.0363, + "step": 3378 + }, + { + "epoch": 0.04, + "learning_rate": 4.240951587521985e-05, + "loss": 0.1372, + "step": 3380 + }, + { + "epoch": 0.04, + "learning_rate": 4.240488753124132e-05, + "loss": 0.8853, + "step": 3382 + }, + { + "epoch": 0.04, + "learning_rate": 4.24002591872628e-05, + "loss": 0.0597, + "step": 3384 + }, + { + "epoch": 0.04, + "learning_rate": 4.239563084328427e-05, + "loss": 1.7297, + "step": 3386 + }, + { + "epoch": 0.04, + "learning_rate": 4.239100249930575e-05, + "loss": 4.9774, + "step": 3388 + }, + { + "epoch": 0.04, + "learning_rate": 4.238637415532722e-05, + "loss": 1.7487, + "step": 3390 + }, + { + "epoch": 0.04, + "learning_rate": 4.23817458113487e-05, + "loss": 0.005, + "step": 3392 + }, + { + "epoch": 0.04, + "learning_rate": 4.237711746737017e-05, + "loss": 6.1751, + "step": 3394 + }, + { + "epoch": 0.04, + "learning_rate": 4.237248912339165e-05, + "loss": 4.0365, + "step": 3396 + }, + { + "epoch": 0.04, + "learning_rate": 4.236786077941312e-05, + "loss": 2.4866, + "step": 3398 + }, + { + "epoch": 0.04, + "learning_rate": 4.23632324354346e-05, + "loss": 3.9682, + "step": 3400 + }, + { + "epoch": 0.04, + "learning_rate": 4.235860409145608e-05, + "loss": 0.2623, + "step": 3402 + }, + { + "epoch": 0.04, + "learning_rate": 4.235397574747755e-05, + "loss": 2.031, + "step": 3404 + }, + { + "epoch": 0.04, + "learning_rate": 4.234934740349903e-05, + "loss": 0.2057, + "step": 3406 + }, + { + "epoch": 0.04, + "learning_rate": 4.2344719059520504e-05, + "loss": 1.7295, + "step": 3408 + }, + { + "epoch": 0.04, + "learning_rate": 4.234009071554198e-05, + "loss": 4.2343, + "step": 3410 + }, + { + "epoch": 0.04, + "learning_rate": 4.2335462371563455e-05, + "loss": 2.9266, + "step": 3412 + }, + { + "epoch": 0.04, + "learning_rate": 4.233083402758493e-05, + "loss": 1.5224, + "step": 3414 + }, + { + "epoch": 0.04, + "learning_rate": 4.2326205683606405e-05, + "loss": 0.0123, + "step": 3416 + }, + { + "epoch": 0.04, + "learning_rate": 4.2321577339627884e-05, + "loss": 1.7313, + "step": 3418 + }, + { + "epoch": 0.04, + "learning_rate": 4.2316948995649356e-05, + "loss": 0.7637, + "step": 3420 + }, + { + "epoch": 0.04, + "learning_rate": 4.2312320651670835e-05, + "loss": 4.1393, + "step": 3422 + }, + { + "epoch": 0.04, + "learning_rate": 4.230769230769231e-05, + "loss": 2.2686, + "step": 3424 + }, + { + "epoch": 0.04, + "learning_rate": 4.2303063963713786e-05, + "loss": 0.0369, + "step": 3426 + }, + { + "epoch": 0.04, + "learning_rate": 4.229843561973526e-05, + "loss": 3.6006, + "step": 3428 + }, + { + "epoch": 0.04, + "learning_rate": 4.229380727575674e-05, + "loss": 0.602, + "step": 3430 + }, + { + "epoch": 0.04, + "learning_rate": 4.228917893177821e-05, + "loss": 1.6638, + "step": 3432 + }, + { + "epoch": 0.04, + "learning_rate": 4.228455058779969e-05, + "loss": 1.8726, + "step": 3434 + }, + { + "epoch": 0.04, + "learning_rate": 4.227992224382116e-05, + "loss": 0.0104, + "step": 3436 + }, + { + "epoch": 0.04, + "learning_rate": 4.227529389984264e-05, + "loss": 0.6932, + "step": 3438 + }, + { + "epoch": 0.04, + "learning_rate": 4.227066555586412e-05, + "loss": 4.7155, + "step": 3440 + }, + { + "epoch": 0.04, + "learning_rate": 4.226603721188559e-05, + "loss": 3.8274, + "step": 3442 + }, + { + "epoch": 0.04, + "learning_rate": 4.226140886790707e-05, + "loss": 4.0874, + "step": 3444 + }, + { + "epoch": 0.04, + "learning_rate": 4.225678052392854e-05, + "loss": 7.9174, + "step": 3446 + }, + { + "epoch": 0.04, + "learning_rate": 4.225215217995002e-05, + "loss": 7.3444, + "step": 3448 + }, + { + "epoch": 0.04, + "learning_rate": 4.224752383597149e-05, + "loss": 3.3625, + "step": 3450 + }, + { + "epoch": 0.04, + "learning_rate": 4.224289549199297e-05, + "loss": 3.9635, + "step": 3452 + }, + { + "epoch": 0.04, + "learning_rate": 4.223826714801444e-05, + "loss": 5.6324, + "step": 3454 + }, + { + "epoch": 0.04, + "learning_rate": 4.223363880403592e-05, + "loss": 2.6101, + "step": 3456 + }, + { + "epoch": 0.04, + "learning_rate": 4.222901046005739e-05, + "loss": 2.7896, + "step": 3458 + }, + { + "epoch": 0.04, + "learning_rate": 4.222438211607887e-05, + "loss": 0.6936, + "step": 3460 + }, + { + "epoch": 0.04, + "learning_rate": 4.2219753772100344e-05, + "loss": 1.3427, + "step": 3462 + }, + { + "epoch": 0.04, + "learning_rate": 4.221512542812182e-05, + "loss": 1.1458, + "step": 3464 + }, + { + "epoch": 0.04, + "learning_rate": 4.2210497084143294e-05, + "loss": 1.0104, + "step": 3466 + }, + { + "epoch": 0.04, + "learning_rate": 4.220586874016477e-05, + "loss": 2.0883, + "step": 3468 + }, + { + "epoch": 0.04, + "learning_rate": 4.2201240396186245e-05, + "loss": 4.5542, + "step": 3470 + }, + { + "epoch": 0.04, + "learning_rate": 4.2196612052207724e-05, + "loss": 0.8578, + "step": 3472 + }, + { + "epoch": 0.04, + "learning_rate": 4.2191983708229196e-05, + "loss": 1.1, + "step": 3474 + }, + { + "epoch": 0.04, + "learning_rate": 4.2187355364250675e-05, + "loss": 2.0326, + "step": 3476 + }, + { + "epoch": 0.04, + "learning_rate": 4.218272702027215e-05, + "loss": 2.0999, + "step": 3478 + }, + { + "epoch": 0.04, + "learning_rate": 4.2178098676293626e-05, + "loss": 1.3926, + "step": 3480 + }, + { + "epoch": 0.04, + "learning_rate": 4.2173470332315105e-05, + "loss": 0.5853, + "step": 3482 + }, + { + "epoch": 0.04, + "learning_rate": 4.216884198833658e-05, + "loss": 0.0059, + "step": 3484 + }, + { + "epoch": 0.04, + "learning_rate": 4.2164213644358055e-05, + "loss": 3.2007, + "step": 3486 + }, + { + "epoch": 0.04, + "learning_rate": 4.215958530037953e-05, + "loss": 2.5404, + "step": 3488 + }, + { + "epoch": 0.04, + "learning_rate": 4.2154956956401006e-05, + "loss": 5.7034, + "step": 3490 + }, + { + "epoch": 0.04, + "learning_rate": 4.215032861242248e-05, + "loss": 0.0066, + "step": 3492 + }, + { + "epoch": 0.04, + "learning_rate": 4.214570026844396e-05, + "loss": 2.2369, + "step": 3494 + }, + { + "epoch": 0.04, + "learning_rate": 4.214107192446543e-05, + "loss": 1.7382, + "step": 3496 + }, + { + "epoch": 0.04, + "learning_rate": 4.213644358048691e-05, + "loss": 1.1092, + "step": 3498 + }, + { + "epoch": 0.04, + "learning_rate": 4.213181523650838e-05, + "loss": 0.7164, + "step": 3500 + }, + { + "epoch": 0.04, + "learning_rate": 4.212718689252986e-05, + "loss": 4.5722, + "step": 3502 + }, + { + "epoch": 0.04, + "learning_rate": 4.212255854855133e-05, + "loss": 2.5489, + "step": 3504 + }, + { + "epoch": 0.04, + "learning_rate": 4.211793020457281e-05, + "loss": 7.1832, + "step": 3506 + }, + { + "epoch": 0.04, + "learning_rate": 4.211330186059428e-05, + "loss": 4.1472, + "step": 3508 + }, + { + "epoch": 0.04, + "learning_rate": 4.2108673516615754e-05, + "loss": 1.4145, + "step": 3510 + }, + { + "epoch": 0.04, + "learning_rate": 4.210404517263723e-05, + "loss": 2.9268, + "step": 3512 + }, + { + "epoch": 0.04, + "learning_rate": 4.2099416828658705e-05, + "loss": 7.2677, + "step": 3514 + }, + { + "epoch": 0.04, + "learning_rate": 4.2094788484680183e-05, + "loss": 1.4109, + "step": 3516 + }, + { + "epoch": 0.04, + "learning_rate": 4.2090160140701655e-05, + "loss": 4.2786, + "step": 3518 + }, + { + "epoch": 0.04, + "learning_rate": 4.2085531796723134e-05, + "loss": 5.4463, + "step": 3520 + }, + { + "epoch": 0.04, + "learning_rate": 4.2080903452744606e-05, + "loss": 3.4037, + "step": 3522 + }, + { + "epoch": 0.04, + "learning_rate": 4.2076275108766085e-05, + "loss": 1.6205, + "step": 3524 + }, + { + "epoch": 0.04, + "learning_rate": 4.207164676478756e-05, + "loss": 0.8215, + "step": 3526 + }, + { + "epoch": 0.04, + "learning_rate": 4.2067018420809036e-05, + "loss": 1.1193, + "step": 3528 + }, + { + "epoch": 0.04, + "learning_rate": 4.206239007683051e-05, + "loss": 0.602, + "step": 3530 + }, + { + "epoch": 0.04, + "learning_rate": 4.205776173285199e-05, + "loss": 0.1034, + "step": 3532 + }, + { + "epoch": 0.04, + "learning_rate": 4.205313338887346e-05, + "loss": 6.2331, + "step": 3534 + }, + { + "epoch": 0.04, + "learning_rate": 4.204850504489494e-05, + "loss": 3.7723, + "step": 3536 + }, + { + "epoch": 0.04, + "learning_rate": 4.204387670091641e-05, + "loss": 0.1112, + "step": 3538 + }, + { + "epoch": 0.04, + "learning_rate": 4.203924835693789e-05, + "loss": 1.4574, + "step": 3540 + }, + { + "epoch": 0.04, + "learning_rate": 4.203462001295936e-05, + "loss": 8.817, + "step": 3542 + }, + { + "epoch": 0.04, + "learning_rate": 4.202999166898084e-05, + "loss": 2.4523, + "step": 3544 + }, + { + "epoch": 0.04, + "learning_rate": 4.202536332500232e-05, + "loss": 1.8239, + "step": 3546 + }, + { + "epoch": 0.04, + "learning_rate": 4.202073498102379e-05, + "loss": 2.7102, + "step": 3548 + }, + { + "epoch": 0.04, + "learning_rate": 4.201610663704527e-05, + "loss": 1.357, + "step": 3550 + }, + { + "epoch": 0.04, + "learning_rate": 4.201147829306674e-05, + "loss": 2.3576, + "step": 3552 + }, + { + "epoch": 0.04, + "learning_rate": 4.200684994908822e-05, + "loss": 1.0694, + "step": 3554 + }, + { + "epoch": 0.04, + "learning_rate": 4.200222160510969e-05, + "loss": 6.537, + "step": 3556 + }, + { + "epoch": 0.04, + "learning_rate": 4.199759326113117e-05, + "loss": 1.9346, + "step": 3558 + }, + { + "epoch": 0.04, + "learning_rate": 4.199296491715264e-05, + "loss": 0.714, + "step": 3560 + }, + { + "epoch": 0.04, + "learning_rate": 4.198833657317412e-05, + "loss": 2.4289, + "step": 3562 + }, + { + "epoch": 0.04, + "learning_rate": 4.1983708229195594e-05, + "loss": 1.2738, + "step": 3564 + }, + { + "epoch": 0.04, + "learning_rate": 4.197907988521707e-05, + "loss": 2.4684, + "step": 3566 + }, + { + "epoch": 0.04, + "learning_rate": 4.1974451541238544e-05, + "loss": 2.5662, + "step": 3568 + }, + { + "epoch": 0.04, + "learning_rate": 4.196982319726002e-05, + "loss": 2.4683, + "step": 3570 + }, + { + "epoch": 0.04, + "learning_rate": 4.1965194853281495e-05, + "loss": 3.6421, + "step": 3572 + }, + { + "epoch": 0.04, + "learning_rate": 4.1960566509302974e-05, + "loss": 0.7933, + "step": 3574 + }, + { + "epoch": 0.04, + "learning_rate": 4.1955938165324446e-05, + "loss": 2.0745, + "step": 3576 + }, + { + "epoch": 0.04, + "learning_rate": 4.1951309821345925e-05, + "loss": 0.6784, + "step": 3578 + }, + { + "epoch": 0.04, + "learning_rate": 4.19466814773674e-05, + "loss": 0.868, + "step": 3580 + }, + { + "epoch": 0.04, + "learning_rate": 4.1942053133388876e-05, + "loss": 4.4735, + "step": 3582 + }, + { + "epoch": 0.04, + "learning_rate": 4.193742478941035e-05, + "loss": 3.4782, + "step": 3584 + }, + { + "epoch": 0.04, + "learning_rate": 4.193279644543183e-05, + "loss": 2.4448, + "step": 3586 + }, + { + "epoch": 0.04, + "learning_rate": 4.1928168101453305e-05, + "loss": 0.8041, + "step": 3588 + }, + { + "epoch": 0.04, + "learning_rate": 4.192353975747478e-05, + "loss": 3.2576, + "step": 3590 + }, + { + "epoch": 0.04, + "learning_rate": 4.1918911413496256e-05, + "loss": 1.747, + "step": 3592 + }, + { + "epoch": 0.04, + "learning_rate": 4.191428306951773e-05, + "loss": 1.2863, + "step": 3594 + }, + { + "epoch": 0.04, + "learning_rate": 4.190965472553921e-05, + "loss": 4.2613, + "step": 3596 + }, + { + "epoch": 0.04, + "learning_rate": 4.190502638156068e-05, + "loss": 0.8291, + "step": 3598 + }, + { + "epoch": 0.04, + "learning_rate": 4.190039803758216e-05, + "loss": 0.0146, + "step": 3600 + }, + { + "epoch": 0.04, + "learning_rate": 4.189576969360363e-05, + "loss": 2.3546, + "step": 3602 + }, + { + "epoch": 0.04, + "learning_rate": 4.189114134962511e-05, + "loss": 1.4018, + "step": 3604 + }, + { + "epoch": 0.04, + "learning_rate": 4.188651300564658e-05, + "loss": 2.0871, + "step": 3606 + }, + { + "epoch": 0.04, + "learning_rate": 4.188188466166806e-05, + "loss": 4.8774, + "step": 3608 + }, + { + "epoch": 0.04, + "learning_rate": 4.187725631768953e-05, + "loss": 0.0071, + "step": 3610 + }, + { + "epoch": 0.04, + "learning_rate": 4.187262797371101e-05, + "loss": 1.1932, + "step": 3612 + }, + { + "epoch": 0.04, + "learning_rate": 4.186799962973248e-05, + "loss": 3.895, + "step": 3614 + }, + { + "epoch": 0.04, + "learning_rate": 4.186337128575396e-05, + "loss": 6.5203, + "step": 3616 + }, + { + "epoch": 0.04, + "learning_rate": 4.1858742941775433e-05, + "loss": 2.5641, + "step": 3618 + }, + { + "epoch": 0.04, + "learning_rate": 4.185411459779691e-05, + "loss": 3.6787, + "step": 3620 + }, + { + "epoch": 0.04, + "learning_rate": 4.1849486253818384e-05, + "loss": 1.2298, + "step": 3622 + }, + { + "epoch": 0.04, + "learning_rate": 4.184485790983986e-05, + "loss": 2.7474, + "step": 3624 + }, + { + "epoch": 0.04, + "learning_rate": 4.184022956586134e-05, + "loss": 0.588, + "step": 3626 + }, + { + "epoch": 0.04, + "learning_rate": 4.1835601221882814e-05, + "loss": 1.941, + "step": 3628 + }, + { + "epoch": 0.04, + "learning_rate": 4.183097287790429e-05, + "loss": 0.2898, + "step": 3630 + }, + { + "epoch": 0.04, + "learning_rate": 4.1826344533925765e-05, + "loss": 2.6456, + "step": 3632 + }, + { + "epoch": 0.04, + "learning_rate": 4.1821716189947244e-05, + "loss": 0.1782, + "step": 3634 + }, + { + "epoch": 0.04, + "learning_rate": 4.1817087845968716e-05, + "loss": 3.256, + "step": 3636 + }, + { + "epoch": 0.04, + "learning_rate": 4.1812459501990194e-05, + "loss": 5.2607, + "step": 3638 + }, + { + "epoch": 0.04, + "learning_rate": 4.1807831158011667e-05, + "loss": 0.0001, + "step": 3640 + }, + { + "epoch": 0.04, + "learning_rate": 4.1803202814033145e-05, + "loss": 5.2252, + "step": 3642 + }, + { + "epoch": 0.04, + "learning_rate": 4.179857447005462e-05, + "loss": 2.5432, + "step": 3644 + }, + { + "epoch": 0.04, + "learning_rate": 4.1793946126076096e-05, + "loss": 1.1664, + "step": 3646 + }, + { + "epoch": 0.04, + "learning_rate": 4.178931778209757e-05, + "loss": 0.8417, + "step": 3648 + }, + { + "epoch": 0.04, + "learning_rate": 4.178468943811905e-05, + "loss": 0.7782, + "step": 3650 + }, + { + "epoch": 0.04, + "learning_rate": 4.178006109414052e-05, + "loss": 1.3184, + "step": 3652 + }, + { + "epoch": 0.04, + "learning_rate": 4.177543275016199e-05, + "loss": 3.1738, + "step": 3654 + }, + { + "epoch": 0.04, + "learning_rate": 4.177080440618347e-05, + "loss": 3.2281, + "step": 3656 + }, + { + "epoch": 0.04, + "learning_rate": 4.176617606220494e-05, + "loss": 0.5099, + "step": 3658 + }, + { + "epoch": 0.04, + "learning_rate": 4.176154771822642e-05, + "loss": 1.7614, + "step": 3660 + }, + { + "epoch": 0.04, + "learning_rate": 4.175691937424789e-05, + "loss": 1.164, + "step": 3662 + }, + { + "epoch": 0.04, + "learning_rate": 4.175229103026937e-05, + "loss": 0.1932, + "step": 3664 + }, + { + "epoch": 0.04, + "learning_rate": 4.1747662686290844e-05, + "loss": 0.1416, + "step": 3666 + }, + { + "epoch": 0.04, + "learning_rate": 4.174303434231232e-05, + "loss": 0.8173, + "step": 3668 + }, + { + "epoch": 0.04, + "learning_rate": 4.1738405998333795e-05, + "loss": 2.5322, + "step": 3670 + }, + { + "epoch": 0.04, + "learning_rate": 4.173377765435527e-05, + "loss": 0.6402, + "step": 3672 + }, + { + "epoch": 0.04, + "learning_rate": 4.1729149310376745e-05, + "loss": 0.0021, + "step": 3674 + }, + { + "epoch": 0.04, + "learning_rate": 4.1724520966398224e-05, + "loss": 0.5181, + "step": 3676 + }, + { + "epoch": 0.04, + "learning_rate": 4.1719892622419696e-05, + "loss": 1.6885, + "step": 3678 + }, + { + "epoch": 0.04, + "learning_rate": 4.1715264278441175e-05, + "loss": 0.3627, + "step": 3680 + }, + { + "epoch": 0.04, + "learning_rate": 4.171063593446265e-05, + "loss": 2.0161, + "step": 3682 + }, + { + "epoch": 0.04, + "learning_rate": 4.1706007590484126e-05, + "loss": 5.0827, + "step": 3684 + }, + { + "epoch": 0.04, + "learning_rate": 4.17013792465056e-05, + "loss": 5.7841, + "step": 3686 + }, + { + "epoch": 0.04, + "learning_rate": 4.169675090252708e-05, + "loss": 6.1674, + "step": 3688 + }, + { + "epoch": 0.04, + "learning_rate": 4.169212255854855e-05, + "loss": 4.6215, + "step": 3690 + }, + { + "epoch": 0.04, + "learning_rate": 4.168749421457003e-05, + "loss": 2.2734, + "step": 3692 + }, + { + "epoch": 0.04, + "learning_rate": 4.1682865870591506e-05, + "loss": 2.8671, + "step": 3694 + }, + { + "epoch": 0.04, + "learning_rate": 4.167823752661298e-05, + "loss": 0.0236, + "step": 3696 + }, + { + "epoch": 0.04, + "learning_rate": 4.167360918263446e-05, + "loss": 0.0064, + "step": 3698 + }, + { + "epoch": 0.04, + "learning_rate": 4.166898083865593e-05, + "loss": 5.0424, + "step": 3700 + }, + { + "epoch": 0.04, + "learning_rate": 4.166435249467741e-05, + "loss": 7.3294, + "step": 3702 + }, + { + "epoch": 0.04, + "learning_rate": 4.165972415069888e-05, + "loss": 1.7349, + "step": 3704 + }, + { + "epoch": 0.04, + "learning_rate": 4.165509580672036e-05, + "loss": 2.1063, + "step": 3706 + }, + { + "epoch": 0.04, + "learning_rate": 4.165046746274183e-05, + "loss": 0.4167, + "step": 3708 + }, + { + "epoch": 0.04, + "learning_rate": 4.164583911876331e-05, + "loss": 3.1664, + "step": 3710 + }, + { + "epoch": 0.04, + "learning_rate": 4.164121077478478e-05, + "loss": 1.1138, + "step": 3712 + }, + { + "epoch": 0.04, + "learning_rate": 4.163658243080626e-05, + "loss": 0.4404, + "step": 3714 + }, + { + "epoch": 0.04, + "learning_rate": 4.163195408682773e-05, + "loss": 1.8831, + "step": 3716 + }, + { + "epoch": 0.04, + "learning_rate": 4.162732574284921e-05, + "loss": 1.718, + "step": 3718 + }, + { + "epoch": 0.04, + "learning_rate": 4.1622697398870684e-05, + "loss": 0.0035, + "step": 3720 + }, + { + "epoch": 0.04, + "learning_rate": 4.161806905489216e-05, + "loss": 0.1341, + "step": 3722 + }, + { + "epoch": 0.04, + "learning_rate": 4.1613440710913634e-05, + "loss": 4.1329, + "step": 3724 + }, + { + "epoch": 0.04, + "learning_rate": 4.160881236693511e-05, + "loss": 3.58, + "step": 3726 + }, + { + "epoch": 0.04, + "learning_rate": 4.1604184022956585e-05, + "loss": 3.091, + "step": 3728 + }, + { + "epoch": 0.04, + "learning_rate": 4.1599555678978064e-05, + "loss": 1.8734, + "step": 3730 + }, + { + "epoch": 0.04, + "learning_rate": 4.1594927334999536e-05, + "loss": 2.215, + "step": 3732 + }, + { + "epoch": 0.04, + "learning_rate": 4.1590298991021015e-05, + "loss": 3.293, + "step": 3734 + }, + { + "epoch": 0.04, + "learning_rate": 4.1585670647042494e-05, + "loss": 1.3867, + "step": 3736 + }, + { + "epoch": 0.04, + "learning_rate": 4.1581042303063966e-05, + "loss": 2.058, + "step": 3738 + }, + { + "epoch": 0.04, + "learning_rate": 4.1576413959085445e-05, + "loss": 4.2126, + "step": 3740 + }, + { + "epoch": 0.04, + "learning_rate": 4.1571785615106917e-05, + "loss": 2.0042, + "step": 3742 + }, + { + "epoch": 0.04, + "learning_rate": 4.1567157271128395e-05, + "loss": 2.607, + "step": 3744 + }, + { + "epoch": 0.04, + "learning_rate": 4.156252892714987e-05, + "loss": 2.3221, + "step": 3746 + }, + { + "epoch": 0.04, + "learning_rate": 4.1557900583171346e-05, + "loss": 3.3707, + "step": 3748 + }, + { + "epoch": 0.04, + "learning_rate": 4.155327223919282e-05, + "loss": 3.2625, + "step": 3750 + }, + { + "epoch": 0.04, + "learning_rate": 4.15486438952143e-05, + "loss": 2.3737, + "step": 3752 + }, + { + "epoch": 0.04, + "learning_rate": 4.154401555123577e-05, + "loss": 0.385, + "step": 3754 + }, + { + "epoch": 0.04, + "learning_rate": 4.153938720725725e-05, + "loss": 1.3596, + "step": 3756 + }, + { + "epoch": 0.04, + "learning_rate": 4.153475886327872e-05, + "loss": 2.6154, + "step": 3758 + }, + { + "epoch": 0.04, + "learning_rate": 4.15301305193002e-05, + "loss": 0.7361, + "step": 3760 + }, + { + "epoch": 0.04, + "learning_rate": 4.152550217532167e-05, + "loss": 1.0208, + "step": 3762 + }, + { + "epoch": 0.04, + "learning_rate": 4.152087383134315e-05, + "loss": 3.1949, + "step": 3764 + }, + { + "epoch": 0.04, + "learning_rate": 4.151624548736462e-05, + "loss": 0.0055, + "step": 3766 + }, + { + "epoch": 0.04, + "learning_rate": 4.15116171433861e-05, + "loss": 4.3333, + "step": 3768 + }, + { + "epoch": 0.04, + "learning_rate": 4.150698879940757e-05, + "loss": 3.7332, + "step": 3770 + }, + { + "epoch": 0.04, + "learning_rate": 4.150236045542905e-05, + "loss": 1.3496, + "step": 3772 + }, + { + "epoch": 0.04, + "learning_rate": 4.149773211145053e-05, + "loss": 2.2948, + "step": 3774 + }, + { + "epoch": 0.04, + "learning_rate": 4.1493103767472e-05, + "loss": 2.3364, + "step": 3776 + }, + { + "epoch": 0.04, + "learning_rate": 4.148847542349348e-05, + "loss": 0.7023, + "step": 3778 + }, + { + "epoch": 0.04, + "learning_rate": 4.148384707951495e-05, + "loss": 0.9714, + "step": 3780 + }, + { + "epoch": 0.04, + "learning_rate": 4.147921873553643e-05, + "loss": 1.1263, + "step": 3782 + }, + { + "epoch": 0.04, + "learning_rate": 4.1474590391557904e-05, + "loss": 3.7788, + "step": 3784 + }, + { + "epoch": 0.04, + "learning_rate": 4.146996204757938e-05, + "loss": 4.536, + "step": 3786 + }, + { + "epoch": 0.04, + "learning_rate": 4.1465333703600855e-05, + "loss": 0.3711, + "step": 3788 + }, + { + "epoch": 0.04, + "learning_rate": 4.1460705359622334e-05, + "loss": 3.9316, + "step": 3790 + }, + { + "epoch": 0.04, + "learning_rate": 4.1456077015643806e-05, + "loss": 0.9738, + "step": 3792 + }, + { + "epoch": 0.04, + "learning_rate": 4.1451448671665284e-05, + "loss": 0.0512, + "step": 3794 + }, + { + "epoch": 0.04, + "learning_rate": 4.1446820327686756e-05, + "loss": 0.3861, + "step": 3796 + }, + { + "epoch": 0.04, + "learning_rate": 4.144219198370823e-05, + "loss": 2.7784, + "step": 3798 + }, + { + "epoch": 0.04, + "learning_rate": 4.143756363972971e-05, + "loss": 2.0859, + "step": 3800 + }, + { + "epoch": 0.04, + "learning_rate": 4.143293529575118e-05, + "loss": 3.0655, + "step": 3802 + }, + { + "epoch": 0.04, + "learning_rate": 4.142830695177266e-05, + "loss": 5.0659, + "step": 3804 + }, + { + "epoch": 0.04, + "learning_rate": 4.142367860779413e-05, + "loss": 0.1243, + "step": 3806 + }, + { + "epoch": 0.04, + "learning_rate": 4.141905026381561e-05, + "loss": 1.4189, + "step": 3808 + }, + { + "epoch": 0.04, + "learning_rate": 4.141442191983708e-05, + "loss": 2.347, + "step": 3810 + }, + { + "epoch": 0.04, + "learning_rate": 4.140979357585856e-05, + "loss": 2.277, + "step": 3812 + }, + { + "epoch": 0.04, + "learning_rate": 4.140516523188003e-05, + "loss": 3.4847, + "step": 3814 + }, + { + "epoch": 0.04, + "learning_rate": 4.140053688790151e-05, + "loss": 0.0679, + "step": 3816 + }, + { + "epoch": 0.04, + "learning_rate": 4.139590854392298e-05, + "loss": 1.6648, + "step": 3818 + }, + { + "epoch": 0.04, + "learning_rate": 4.139128019994446e-05, + "loss": 4.5147, + "step": 3820 + }, + { + "epoch": 0.04, + "learning_rate": 4.1386651855965934e-05, + "loss": 1.7526, + "step": 3822 + }, + { + "epoch": 0.04, + "learning_rate": 4.138202351198741e-05, + "loss": 1.8708, + "step": 3824 + }, + { + "epoch": 0.04, + "learning_rate": 4.1377395168008884e-05, + "loss": 3.5462, + "step": 3826 + }, + { + "epoch": 0.04, + "learning_rate": 4.137276682403036e-05, + "loss": 2.906, + "step": 3828 + }, + { + "epoch": 0.04, + "learning_rate": 4.1368138480051835e-05, + "loss": 6.6052, + "step": 3830 + }, + { + "epoch": 0.04, + "learning_rate": 4.1363510136073314e-05, + "loss": 3.0927, + "step": 3832 + }, + { + "epoch": 0.04, + "learning_rate": 4.1358881792094786e-05, + "loss": 0.5694, + "step": 3834 + }, + { + "epoch": 0.04, + "learning_rate": 4.1354253448116265e-05, + "loss": 2.3637, + "step": 3836 + }, + { + "epoch": 0.04, + "learning_rate": 4.134962510413774e-05, + "loss": 0.0285, + "step": 3838 + }, + { + "epoch": 0.04, + "learning_rate": 4.1344996760159216e-05, + "loss": 2.2627, + "step": 3840 + }, + { + "epoch": 0.04, + "learning_rate": 4.1340368416180695e-05, + "loss": 7.042, + "step": 3842 + }, + { + "epoch": 0.04, + "learning_rate": 4.1335740072202167e-05, + "loss": 0.7975, + "step": 3844 + }, + { + "epoch": 0.04, + "learning_rate": 4.1331111728223645e-05, + "loss": 4.4457, + "step": 3846 + }, + { + "epoch": 0.04, + "learning_rate": 4.132648338424512e-05, + "loss": 0.1089, + "step": 3848 + }, + { + "epoch": 0.04, + "learning_rate": 4.1321855040266596e-05, + "loss": 4.7459, + "step": 3850 + }, + { + "epoch": 0.04, + "learning_rate": 4.131722669628807e-05, + "loss": 3.1986, + "step": 3852 + }, + { + "epoch": 0.04, + "learning_rate": 4.131259835230955e-05, + "loss": 0.0668, + "step": 3854 + }, + { + "epoch": 0.04, + "learning_rate": 4.130797000833102e-05, + "loss": 1.4696, + "step": 3856 + }, + { + "epoch": 0.04, + "learning_rate": 4.13033416643525e-05, + "loss": 0.0704, + "step": 3858 + }, + { + "epoch": 0.04, + "learning_rate": 4.129871332037397e-05, + "loss": 1.3157, + "step": 3860 + }, + { + "epoch": 0.04, + "learning_rate": 4.129408497639545e-05, + "loss": 3.2024, + "step": 3862 + }, + { + "epoch": 0.04, + "learning_rate": 4.128945663241692e-05, + "loss": 5.3763, + "step": 3864 + }, + { + "epoch": 0.04, + "learning_rate": 4.12848282884384e-05, + "loss": 3.1467, + "step": 3866 + }, + { + "epoch": 0.04, + "learning_rate": 4.128019994445987e-05, + "loss": 1.8381, + "step": 3868 + }, + { + "epoch": 0.04, + "learning_rate": 4.127557160048135e-05, + "loss": 5.4926, + "step": 3870 + }, + { + "epoch": 0.04, + "learning_rate": 4.127094325650282e-05, + "loss": 1.4235, + "step": 3872 + }, + { + "epoch": 0.04, + "learning_rate": 4.12663149125243e-05, + "loss": 2.2867, + "step": 3874 + }, + { + "epoch": 0.04, + "learning_rate": 4.1261686568545773e-05, + "loss": 2.2757, + "step": 3876 + }, + { + "epoch": 0.04, + "learning_rate": 4.125705822456725e-05, + "loss": 0.3781, + "step": 3878 + }, + { + "epoch": 0.04, + "learning_rate": 4.125242988058873e-05, + "loss": 2.9354, + "step": 3880 + }, + { + "epoch": 0.04, + "learning_rate": 4.12478015366102e-05, + "loss": 5.6954, + "step": 3882 + }, + { + "epoch": 0.04, + "learning_rate": 4.124317319263168e-05, + "loss": 1.1827, + "step": 3884 + }, + { + "epoch": 0.04, + "learning_rate": 4.1238544848653154e-05, + "loss": 2.0504, + "step": 3886 + }, + { + "epoch": 0.04, + "learning_rate": 4.123391650467463e-05, + "loss": 3.7537, + "step": 3888 + }, + { + "epoch": 0.04, + "learning_rate": 4.1229288160696105e-05, + "loss": 2.0449, + "step": 3890 + }, + { + "epoch": 0.04, + "learning_rate": 4.1224659816717584e-05, + "loss": 2.7653, + "step": 3892 + }, + { + "epoch": 0.04, + "learning_rate": 4.1220031472739056e-05, + "loss": 1.9298, + "step": 3894 + }, + { + "epoch": 0.04, + "learning_rate": 4.1215403128760534e-05, + "loss": 0.1034, + "step": 3896 + }, + { + "epoch": 0.04, + "learning_rate": 4.1210774784782006e-05, + "loss": 1.6294, + "step": 3898 + }, + { + "epoch": 0.04, + "learning_rate": 4.1206146440803485e-05, + "loss": 5.753, + "step": 3900 + }, + { + "epoch": 0.04, + "learning_rate": 4.120151809682496e-05, + "loss": 4.9546, + "step": 3902 + }, + { + "epoch": 0.04, + "learning_rate": 4.1196889752846436e-05, + "loss": 0.4539, + "step": 3904 + }, + { + "epoch": 0.04, + "learning_rate": 4.119226140886791e-05, + "loss": 3.1163, + "step": 3906 + }, + { + "epoch": 0.05, + "learning_rate": 4.118763306488939e-05, + "loss": 2.8255, + "step": 3908 + }, + { + "epoch": 0.05, + "learning_rate": 4.118300472091086e-05, + "loss": 0.5978, + "step": 3910 + }, + { + "epoch": 0.05, + "learning_rate": 4.117837637693234e-05, + "loss": 2.3276, + "step": 3912 + }, + { + "epoch": 0.05, + "learning_rate": 4.117374803295381e-05, + "loss": 0.1532, + "step": 3914 + }, + { + "epoch": 0.05, + "learning_rate": 4.116911968897529e-05, + "loss": 1.2292, + "step": 3916 + }, + { + "epoch": 0.05, + "learning_rate": 4.116449134499676e-05, + "loss": 7.4231, + "step": 3918 + }, + { + "epoch": 0.05, + "learning_rate": 4.115986300101824e-05, + "loss": 0.3868, + "step": 3920 + }, + { + "epoch": 0.05, + "learning_rate": 4.115523465703972e-05, + "loss": 2.4503, + "step": 3922 + }, + { + "epoch": 0.05, + "learning_rate": 4.115060631306119e-05, + "loss": 1.8351, + "step": 3924 + }, + { + "epoch": 0.05, + "learning_rate": 4.114597796908267e-05, + "loss": 1.9912, + "step": 3926 + }, + { + "epoch": 0.05, + "learning_rate": 4.114134962510414e-05, + "loss": 4.1159, + "step": 3928 + }, + { + "epoch": 0.05, + "learning_rate": 4.113672128112562e-05, + "loss": 2.1446, + "step": 3930 + }, + { + "epoch": 0.05, + "learning_rate": 4.113209293714709e-05, + "loss": 3.7215, + "step": 3932 + }, + { + "epoch": 0.05, + "learning_rate": 4.112746459316857e-05, + "loss": 2.3043, + "step": 3934 + }, + { + "epoch": 0.05, + "learning_rate": 4.112283624919004e-05, + "loss": 1.7064, + "step": 3936 + }, + { + "epoch": 0.05, + "learning_rate": 4.111820790521152e-05, + "loss": 0.8269, + "step": 3938 + }, + { + "epoch": 0.05, + "learning_rate": 4.1113579561232994e-05, + "loss": 0.3153, + "step": 3940 + }, + { + "epoch": 0.05, + "learning_rate": 4.110895121725447e-05, + "loss": 3.5597, + "step": 3942 + }, + { + "epoch": 0.05, + "learning_rate": 4.110432287327594e-05, + "loss": 1.2079, + "step": 3944 + }, + { + "epoch": 0.05, + "learning_rate": 4.109969452929742e-05, + "loss": 0.5997, + "step": 3946 + }, + { + "epoch": 0.05, + "learning_rate": 4.1095066185318895e-05, + "loss": 0.0103, + "step": 3948 + }, + { + "epoch": 0.05, + "learning_rate": 4.109043784134037e-05, + "loss": 12.0473, + "step": 3950 + }, + { + "epoch": 0.05, + "learning_rate": 4.1085809497361846e-05, + "loss": 3.8525, + "step": 3952 + }, + { + "epoch": 0.05, + "learning_rate": 4.108118115338332e-05, + "loss": 7.9891, + "step": 3954 + }, + { + "epoch": 0.05, + "learning_rate": 4.10765528094048e-05, + "loss": 5.579, + "step": 3956 + }, + { + "epoch": 0.05, + "learning_rate": 4.107192446542627e-05, + "loss": 7.2447, + "step": 3958 + }, + { + "epoch": 0.05, + "learning_rate": 4.106729612144775e-05, + "loss": 1.3527, + "step": 3960 + }, + { + "epoch": 0.05, + "learning_rate": 4.106266777746922e-05, + "loss": 5.7355, + "step": 3962 + }, + { + "epoch": 0.05, + "learning_rate": 4.10580394334907e-05, + "loss": 1.3518, + "step": 3964 + }, + { + "epoch": 0.05, + "learning_rate": 4.105341108951217e-05, + "loss": 5.4268, + "step": 3966 + }, + { + "epoch": 0.05, + "learning_rate": 4.104878274553365e-05, + "loss": 1.2727, + "step": 3968 + }, + { + "epoch": 0.05, + "learning_rate": 4.104415440155512e-05, + "loss": 3.2651, + "step": 3970 + }, + { + "epoch": 0.05, + "learning_rate": 4.10395260575766e-05, + "loss": 2.4137, + "step": 3972 + }, + { + "epoch": 0.05, + "learning_rate": 4.103489771359807e-05, + "loss": 2.5234, + "step": 3974 + }, + { + "epoch": 0.05, + "learning_rate": 4.103026936961955e-05, + "loss": 1.0836, + "step": 3976 + }, + { + "epoch": 0.05, + "learning_rate": 4.1025641025641023e-05, + "loss": 3.1837, + "step": 3978 + }, + { + "epoch": 0.05, + "learning_rate": 4.10210126816625e-05, + "loss": 3.6531, + "step": 3980 + }, + { + "epoch": 0.05, + "learning_rate": 4.1016384337683974e-05, + "loss": 2.0475, + "step": 3982 + }, + { + "epoch": 0.05, + "learning_rate": 4.101175599370545e-05, + "loss": 2.2145, + "step": 3984 + }, + { + "epoch": 0.05, + "learning_rate": 4.100712764972693e-05, + "loss": 1.5748, + "step": 3986 + }, + { + "epoch": 0.05, + "learning_rate": 4.1002499305748404e-05, + "loss": 1.3289, + "step": 3988 + }, + { + "epoch": 0.05, + "learning_rate": 4.099787096176988e-05, + "loss": 2.0359, + "step": 3990 + }, + { + "epoch": 0.05, + "learning_rate": 4.0993242617791355e-05, + "loss": 6.8152, + "step": 3992 + }, + { + "epoch": 0.05, + "learning_rate": 4.0988614273812834e-05, + "loss": 0.3586, + "step": 3994 + }, + { + "epoch": 0.05, + "learning_rate": 4.0983985929834306e-05, + "loss": 3.4035, + "step": 3996 + }, + { + "epoch": 0.05, + "learning_rate": 4.0979357585855784e-05, + "loss": 0.1823, + "step": 3998 + }, + { + "epoch": 0.05, + "learning_rate": 4.0974729241877256e-05, + "loss": 4.0488, + "step": 4000 + }, + { + "epoch": 0.05, + "learning_rate": 4.0970100897898735e-05, + "loss": 1.9893, + "step": 4002 + }, + { + "epoch": 0.05, + "learning_rate": 4.096547255392021e-05, + "loss": 1.4194, + "step": 4004 + }, + { + "epoch": 0.05, + "learning_rate": 4.0960844209941686e-05, + "loss": 1.3266, + "step": 4006 + }, + { + "epoch": 0.05, + "learning_rate": 4.095621586596316e-05, + "loss": 1.321, + "step": 4008 + }, + { + "epoch": 0.05, + "learning_rate": 4.095158752198464e-05, + "loss": 1.6558, + "step": 4010 + }, + { + "epoch": 0.05, + "learning_rate": 4.094695917800611e-05, + "loss": 2.8843, + "step": 4012 + }, + { + "epoch": 0.05, + "learning_rate": 4.094233083402759e-05, + "loss": 5.0278, + "step": 4014 + }, + { + "epoch": 0.05, + "learning_rate": 4.093770249004906e-05, + "loss": 0.0043, + "step": 4016 + }, + { + "epoch": 0.05, + "learning_rate": 4.093307414607054e-05, + "loss": 0.0009, + "step": 4018 + }, + { + "epoch": 0.05, + "learning_rate": 4.092844580209201e-05, + "loss": 3.0047, + "step": 4020 + }, + { + "epoch": 0.05, + "learning_rate": 4.092381745811349e-05, + "loss": 4.3861, + "step": 4022 + }, + { + "epoch": 0.05, + "learning_rate": 4.091918911413496e-05, + "loss": 1.0815, + "step": 4024 + }, + { + "epoch": 0.05, + "learning_rate": 4.091456077015644e-05, + "loss": 0.9404, + "step": 4026 + }, + { + "epoch": 0.05, + "learning_rate": 4.090993242617792e-05, + "loss": 3.5903, + "step": 4028 + }, + { + "epoch": 0.05, + "learning_rate": 4.090530408219939e-05, + "loss": 4.6203, + "step": 4030 + }, + { + "epoch": 0.05, + "learning_rate": 4.090067573822087e-05, + "loss": 2.2168, + "step": 4032 + }, + { + "epoch": 0.05, + "learning_rate": 4.089604739424234e-05, + "loss": 1.1086, + "step": 4034 + }, + { + "epoch": 0.05, + "learning_rate": 4.089141905026382e-05, + "loss": 1.3785, + "step": 4036 + }, + { + "epoch": 0.05, + "learning_rate": 4.088679070628529e-05, + "loss": 1.8327, + "step": 4038 + }, + { + "epoch": 0.05, + "learning_rate": 4.088216236230677e-05, + "loss": 5.0805, + "step": 4040 + }, + { + "epoch": 0.05, + "learning_rate": 4.0877534018328244e-05, + "loss": 1.8962, + "step": 4042 + }, + { + "epoch": 0.05, + "learning_rate": 4.087290567434972e-05, + "loss": 3.5338, + "step": 4044 + }, + { + "epoch": 0.05, + "learning_rate": 4.0868277330371195e-05, + "loss": 2.0095, + "step": 4046 + }, + { + "epoch": 0.05, + "learning_rate": 4.0863648986392673e-05, + "loss": 0.5762, + "step": 4048 + }, + { + "epoch": 0.05, + "learning_rate": 4.0859020642414145e-05, + "loss": 2.23, + "step": 4050 + }, + { + "epoch": 0.05, + "learning_rate": 4.0854392298435624e-05, + "loss": 0.0408, + "step": 4052 + }, + { + "epoch": 0.05, + "learning_rate": 4.0849763954457096e-05, + "loss": 4.9383, + "step": 4054 + }, + { + "epoch": 0.05, + "learning_rate": 4.0845135610478575e-05, + "loss": 4.579, + "step": 4056 + }, + { + "epoch": 0.05, + "learning_rate": 4.084050726650005e-05, + "loss": 0.2584, + "step": 4058 + }, + { + "epoch": 0.05, + "learning_rate": 4.0835878922521526e-05, + "loss": 3.4475, + "step": 4060 + }, + { + "epoch": 0.05, + "learning_rate": 4.0831250578543e-05, + "loss": 1.942, + "step": 4062 + }, + { + "epoch": 0.05, + "learning_rate": 4.082662223456448e-05, + "loss": 1.4312, + "step": 4064 + }, + { + "epoch": 0.05, + "learning_rate": 4.082199389058595e-05, + "loss": 1.1294, + "step": 4066 + }, + { + "epoch": 0.05, + "learning_rate": 4.081736554660743e-05, + "loss": 4.6716, + "step": 4068 + }, + { + "epoch": 0.05, + "learning_rate": 4.0812737202628907e-05, + "loss": 0.9898, + "step": 4070 + }, + { + "epoch": 0.05, + "learning_rate": 4.080810885865038e-05, + "loss": 4.0005, + "step": 4072 + }, + { + "epoch": 0.05, + "learning_rate": 4.080348051467186e-05, + "loss": 2.9498, + "step": 4074 + }, + { + "epoch": 0.05, + "learning_rate": 4.079885217069333e-05, + "loss": 1.8626, + "step": 4076 + }, + { + "epoch": 0.05, + "learning_rate": 4.079422382671481e-05, + "loss": 2.0634, + "step": 4078 + }, + { + "epoch": 0.05, + "learning_rate": 4.078959548273628e-05, + "loss": 2.7744, + "step": 4080 + }, + { + "epoch": 0.05, + "learning_rate": 4.078496713875776e-05, + "loss": 0.2012, + "step": 4082 + }, + { + "epoch": 0.05, + "learning_rate": 4.078033879477923e-05, + "loss": 0.6234, + "step": 4084 + }, + { + "epoch": 0.05, + "learning_rate": 4.077571045080071e-05, + "loss": 2.7512, + "step": 4086 + }, + { + "epoch": 0.05, + "learning_rate": 4.0771082106822175e-05, + "loss": 1.6342, + "step": 4088 + }, + { + "epoch": 0.05, + "learning_rate": 4.0766453762843654e-05, + "loss": 0.0013, + "step": 4090 + }, + { + "epoch": 0.05, + "learning_rate": 4.0761825418865126e-05, + "loss": 1.6898, + "step": 4092 + }, + { + "epoch": 0.05, + "learning_rate": 4.0757197074886605e-05, + "loss": 1.4912, + "step": 4094 + }, + { + "epoch": 0.05, + "learning_rate": 4.0752568730908084e-05, + "loss": 0.1067, + "step": 4096 + }, + { + "epoch": 0.05, + "learning_rate": 4.0747940386929556e-05, + "loss": 3.2861, + "step": 4098 + }, + { + "epoch": 0.05, + "learning_rate": 4.0743312042951035e-05, + "loss": 0.8231, + "step": 4100 + }, + { + "epoch": 0.05, + "learning_rate": 4.0738683698972507e-05, + "loss": 1.166, + "step": 4102 + }, + { + "epoch": 0.05, + "learning_rate": 4.0734055354993985e-05, + "loss": 6.3418, + "step": 4104 + }, + { + "epoch": 0.05, + "learning_rate": 4.072942701101546e-05, + "loss": 1.4024, + "step": 4106 + }, + { + "epoch": 0.05, + "learning_rate": 4.0724798667036936e-05, + "loss": 0.1601, + "step": 4108 + }, + { + "epoch": 0.05, + "learning_rate": 4.072017032305841e-05, + "loss": 0.3026, + "step": 4110 + }, + { + "epoch": 0.05, + "learning_rate": 4.071554197907989e-05, + "loss": 3.2877, + "step": 4112 + }, + { + "epoch": 0.05, + "learning_rate": 4.071091363510136e-05, + "loss": 1.5339, + "step": 4114 + }, + { + "epoch": 0.05, + "learning_rate": 4.070628529112284e-05, + "loss": 2.8374, + "step": 4116 + }, + { + "epoch": 0.05, + "learning_rate": 4.070165694714431e-05, + "loss": 1.7023, + "step": 4118 + }, + { + "epoch": 0.05, + "learning_rate": 4.069702860316579e-05, + "loss": 0.3327, + "step": 4120 + }, + { + "epoch": 0.05, + "learning_rate": 4.069240025918726e-05, + "loss": 3.2016, + "step": 4122 + }, + { + "epoch": 0.05, + "learning_rate": 4.068777191520874e-05, + "loss": 1.9094, + "step": 4124 + }, + { + "epoch": 0.05, + "learning_rate": 4.068314357123021e-05, + "loss": 7.8456, + "step": 4126 + }, + { + "epoch": 0.05, + "learning_rate": 4.067851522725169e-05, + "loss": 1.5327, + "step": 4128 + }, + { + "epoch": 0.05, + "learning_rate": 4.067388688327316e-05, + "loss": 0.7614, + "step": 4130 + }, + { + "epoch": 0.05, + "learning_rate": 4.066925853929464e-05, + "loss": 0.0004, + "step": 4132 + }, + { + "epoch": 0.05, + "learning_rate": 4.066463019531612e-05, + "loss": 5.4614, + "step": 4134 + }, + { + "epoch": 0.05, + "learning_rate": 4.066000185133759e-05, + "loss": 6.818, + "step": 4136 + }, + { + "epoch": 0.05, + "learning_rate": 4.065537350735907e-05, + "loss": 4.6532, + "step": 4138 + }, + { + "epoch": 0.05, + "learning_rate": 4.065074516338054e-05, + "loss": 0.4316, + "step": 4140 + }, + { + "epoch": 0.05, + "learning_rate": 4.064611681940202e-05, + "loss": 1.396, + "step": 4142 + }, + { + "epoch": 0.05, + "learning_rate": 4.0641488475423494e-05, + "loss": 5.4068, + "step": 4144 + }, + { + "epoch": 0.05, + "learning_rate": 4.063686013144497e-05, + "loss": 0.9648, + "step": 4146 + }, + { + "epoch": 0.05, + "learning_rate": 4.0632231787466445e-05, + "loss": 2.1129, + "step": 4148 + }, + { + "epoch": 0.05, + "learning_rate": 4.0627603443487924e-05, + "loss": 7.7229, + "step": 4150 + }, + { + "epoch": 0.05, + "learning_rate": 4.0622975099509396e-05, + "loss": 1.7722, + "step": 4152 + }, + { + "epoch": 0.05, + "learning_rate": 4.0618346755530874e-05, + "loss": 1.5117, + "step": 4154 + }, + { + "epoch": 0.05, + "learning_rate": 4.0613718411552346e-05, + "loss": 0.3093, + "step": 4156 + }, + { + "epoch": 0.05, + "learning_rate": 4.0609090067573825e-05, + "loss": 3.8922, + "step": 4158 + }, + { + "epoch": 0.05, + "learning_rate": 4.06044617235953e-05, + "loss": 3.3537, + "step": 4160 + }, + { + "epoch": 0.05, + "learning_rate": 4.0599833379616776e-05, + "loss": 0.8515, + "step": 4162 + }, + { + "epoch": 0.05, + "learning_rate": 4.059520503563825e-05, + "loss": 1.952, + "step": 4164 + }, + { + "epoch": 0.05, + "learning_rate": 4.059057669165973e-05, + "loss": 0.4, + "step": 4166 + }, + { + "epoch": 0.05, + "learning_rate": 4.05859483476812e-05, + "loss": 0.0022, + "step": 4168 + }, + { + "epoch": 0.05, + "learning_rate": 4.058132000370268e-05, + "loss": 0.5044, + "step": 4170 + }, + { + "epoch": 0.05, + "learning_rate": 4.057669165972415e-05, + "loss": 1.5347, + "step": 4172 + }, + { + "epoch": 0.05, + "learning_rate": 4.057206331574563e-05, + "loss": 1.1235, + "step": 4174 + }, + { + "epoch": 0.05, + "learning_rate": 4.056743497176711e-05, + "loss": 2.0561, + "step": 4176 + }, + { + "epoch": 0.05, + "learning_rate": 4.056280662778858e-05, + "loss": 0.4242, + "step": 4178 + }, + { + "epoch": 0.05, + "learning_rate": 4.055817828381006e-05, + "loss": 1.3928, + "step": 4180 + }, + { + "epoch": 0.05, + "learning_rate": 4.055354993983153e-05, + "loss": 0.4701, + "step": 4182 + }, + { + "epoch": 0.05, + "learning_rate": 4.054892159585301e-05, + "loss": 3.8047, + "step": 4184 + }, + { + "epoch": 0.05, + "learning_rate": 4.054429325187448e-05, + "loss": 4.6918, + "step": 4186 + }, + { + "epoch": 0.05, + "learning_rate": 4.053966490789596e-05, + "loss": 6.9842, + "step": 4188 + }, + { + "epoch": 0.05, + "learning_rate": 4.053503656391743e-05, + "loss": 1.2547, + "step": 4190 + }, + { + "epoch": 0.05, + "learning_rate": 4.053040821993891e-05, + "loss": 4.3598, + "step": 4192 + }, + { + "epoch": 0.05, + "learning_rate": 4.052577987596038e-05, + "loss": 0.0019, + "step": 4194 + }, + { + "epoch": 0.05, + "learning_rate": 4.052115153198186e-05, + "loss": 2.3273, + "step": 4196 + }, + { + "epoch": 0.05, + "learning_rate": 4.0516523188003334e-05, + "loss": 2.5301, + "step": 4198 + }, + { + "epoch": 0.05, + "learning_rate": 4.051189484402481e-05, + "loss": 0.1585, + "step": 4200 + }, + { + "epoch": 0.05, + "learning_rate": 4.0507266500046285e-05, + "loss": 4.3762, + "step": 4202 + }, + { + "epoch": 0.05, + "learning_rate": 4.050263815606776e-05, + "loss": 3.0751, + "step": 4204 + }, + { + "epoch": 0.05, + "learning_rate": 4.0498009812089235e-05, + "loss": 0.5771, + "step": 4206 + }, + { + "epoch": 0.05, + "learning_rate": 4.0493381468110714e-05, + "loss": 2.4967, + "step": 4208 + }, + { + "epoch": 0.05, + "learning_rate": 4.0488753124132186e-05, + "loss": 1.4627, + "step": 4210 + }, + { + "epoch": 0.05, + "learning_rate": 4.0484124780153665e-05, + "loss": 3.2215, + "step": 4212 + }, + { + "epoch": 0.05, + "learning_rate": 4.0479496436175144e-05, + "loss": 2.5603, + "step": 4214 + }, + { + "epoch": 0.05, + "learning_rate": 4.0474868092196616e-05, + "loss": 0.2096, + "step": 4216 + }, + { + "epoch": 0.05, + "learning_rate": 4.0470239748218095e-05, + "loss": 1.0174, + "step": 4218 + }, + { + "epoch": 0.05, + "learning_rate": 4.046561140423957e-05, + "loss": 1.8722, + "step": 4220 + }, + { + "epoch": 0.05, + "learning_rate": 4.0460983060261046e-05, + "loss": 0.7786, + "step": 4222 + }, + { + "epoch": 0.05, + "learning_rate": 4.045635471628252e-05, + "loss": 2.2448, + "step": 4224 + }, + { + "epoch": 0.05, + "learning_rate": 4.0451726372303996e-05, + "loss": 4.8662, + "step": 4226 + }, + { + "epoch": 0.05, + "learning_rate": 4.044709802832547e-05, + "loss": 4.2928, + "step": 4228 + }, + { + "epoch": 0.05, + "learning_rate": 4.044246968434695e-05, + "loss": 3.1654, + "step": 4230 + }, + { + "epoch": 0.05, + "learning_rate": 4.043784134036842e-05, + "loss": 1.1447, + "step": 4232 + }, + { + "epoch": 0.05, + "learning_rate": 4.043321299638989e-05, + "loss": 6.4044, + "step": 4234 + }, + { + "epoch": 0.05, + "learning_rate": 4.042858465241136e-05, + "loss": 0.8323, + "step": 4236 + }, + { + "epoch": 0.05, + "learning_rate": 4.042395630843284e-05, + "loss": 2.3208, + "step": 4238 + }, + { + "epoch": 0.05, + "learning_rate": 4.041932796445432e-05, + "loss": 1.4892, + "step": 4240 + }, + { + "epoch": 0.05, + "learning_rate": 4.041469962047579e-05, + "loss": 1.4713, + "step": 4242 + }, + { + "epoch": 0.05, + "learning_rate": 4.041007127649727e-05, + "loss": 0.5545, + "step": 4244 + }, + { + "epoch": 0.05, + "learning_rate": 4.0405442932518744e-05, + "loss": 0.7224, + "step": 4246 + }, + { + "epoch": 0.05, + "learning_rate": 4.040081458854022e-05, + "loss": 3.7163, + "step": 4248 + }, + { + "epoch": 0.05, + "learning_rate": 4.0396186244561695e-05, + "loss": 3.0758, + "step": 4250 + }, + { + "epoch": 0.05, + "learning_rate": 4.0391557900583174e-05, + "loss": 5.6511, + "step": 4252 + }, + { + "epoch": 0.05, + "learning_rate": 4.0386929556604646e-05, + "loss": 3.1082, + "step": 4254 + }, + { + "epoch": 0.05, + "learning_rate": 4.0382301212626124e-05, + "loss": 0.7766, + "step": 4256 + }, + { + "epoch": 0.05, + "learning_rate": 4.0377672868647596e-05, + "loss": 4.6442, + "step": 4258 + }, + { + "epoch": 0.05, + "learning_rate": 4.0373044524669075e-05, + "loss": 0.3585, + "step": 4260 + }, + { + "epoch": 0.05, + "learning_rate": 4.036841618069055e-05, + "loss": 6.637, + "step": 4262 + }, + { + "epoch": 0.05, + "learning_rate": 4.0363787836712026e-05, + "loss": 2.9893, + "step": 4264 + }, + { + "epoch": 0.05, + "learning_rate": 4.03591594927335e-05, + "loss": 5.3417, + "step": 4266 + }, + { + "epoch": 0.05, + "learning_rate": 4.035453114875498e-05, + "loss": 1.3353, + "step": 4268 + }, + { + "epoch": 0.05, + "learning_rate": 4.034990280477645e-05, + "loss": 0.0177, + "step": 4270 + }, + { + "epoch": 0.05, + "learning_rate": 4.034527446079793e-05, + "loss": 2.2657, + "step": 4272 + }, + { + "epoch": 0.05, + "learning_rate": 4.03406461168194e-05, + "loss": 1.5579, + "step": 4274 + }, + { + "epoch": 0.05, + "learning_rate": 4.033601777284088e-05, + "loss": 2.3915, + "step": 4276 + }, + { + "epoch": 0.05, + "learning_rate": 4.033138942886235e-05, + "loss": 7.0624, + "step": 4278 + }, + { + "epoch": 0.05, + "learning_rate": 4.032676108488383e-05, + "loss": 8.9266, + "step": 4280 + }, + { + "epoch": 0.05, + "learning_rate": 4.032213274090531e-05, + "loss": 2.5701, + "step": 4282 + }, + { + "epoch": 0.05, + "learning_rate": 4.031750439692678e-05, + "loss": 3.201, + "step": 4284 + }, + { + "epoch": 0.05, + "learning_rate": 4.031287605294826e-05, + "loss": 4.4094, + "step": 4286 + }, + { + "epoch": 0.05, + "learning_rate": 4.030824770896973e-05, + "loss": 0.5134, + "step": 4288 + }, + { + "epoch": 0.05, + "learning_rate": 4.030361936499121e-05, + "loss": 1.792, + "step": 4290 + }, + { + "epoch": 0.05, + "learning_rate": 4.029899102101268e-05, + "loss": 0.7249, + "step": 4292 + }, + { + "epoch": 0.05, + "learning_rate": 4.029436267703416e-05, + "loss": 3.0788, + "step": 4294 + }, + { + "epoch": 0.05, + "learning_rate": 4.028973433305563e-05, + "loss": 2.8909, + "step": 4296 + }, + { + "epoch": 0.05, + "learning_rate": 4.028510598907711e-05, + "loss": 2.28, + "step": 4298 + }, + { + "epoch": 0.05, + "learning_rate": 4.0280477645098584e-05, + "loss": 0.9718, + "step": 4300 + }, + { + "epoch": 0.05, + "learning_rate": 4.027584930112006e-05, + "loss": 4.3157, + "step": 4302 + }, + { + "epoch": 0.05, + "learning_rate": 4.0271220957141535e-05, + "loss": 1.9616, + "step": 4304 + }, + { + "epoch": 0.05, + "learning_rate": 4.0266592613163013e-05, + "loss": 2.5443, + "step": 4306 + }, + { + "epoch": 0.05, + "learning_rate": 4.0261964269184485e-05, + "loss": 0.1318, + "step": 4308 + }, + { + "epoch": 0.05, + "learning_rate": 4.0257335925205964e-05, + "loss": 0.0122, + "step": 4310 + }, + { + "epoch": 0.05, + "learning_rate": 4.0252707581227436e-05, + "loss": 3.1132, + "step": 4312 + }, + { + "epoch": 0.05, + "learning_rate": 4.0248079237248915e-05, + "loss": 0.0094, + "step": 4314 + }, + { + "epoch": 0.05, + "learning_rate": 4.024345089327039e-05, + "loss": 6.276, + "step": 4316 + }, + { + "epoch": 0.05, + "learning_rate": 4.0238822549291866e-05, + "loss": 0.0023, + "step": 4318 + }, + { + "epoch": 0.05, + "learning_rate": 4.0234194205313345e-05, + "loss": 3.5058, + "step": 4320 + }, + { + "epoch": 0.05, + "learning_rate": 4.022956586133482e-05, + "loss": 5.4863, + "step": 4322 + }, + { + "epoch": 0.05, + "learning_rate": 4.0224937517356296e-05, + "loss": 1.3456, + "step": 4324 + }, + { + "epoch": 0.05, + "learning_rate": 4.022030917337777e-05, + "loss": 1.2159, + "step": 4326 + }, + { + "epoch": 0.05, + "learning_rate": 4.0215680829399246e-05, + "loss": 6.2764, + "step": 4328 + }, + { + "epoch": 0.05, + "learning_rate": 4.021105248542072e-05, + "loss": 1.7363, + "step": 4330 + }, + { + "epoch": 0.05, + "learning_rate": 4.02064241414422e-05, + "loss": 2.4261, + "step": 4332 + }, + { + "epoch": 0.05, + "learning_rate": 4.020179579746367e-05, + "loss": 0.3455, + "step": 4334 + }, + { + "epoch": 0.05, + "learning_rate": 4.019716745348515e-05, + "loss": 1.1766, + "step": 4336 + }, + { + "epoch": 0.05, + "learning_rate": 4.019253910950662e-05, + "loss": 0.0485, + "step": 4338 + }, + { + "epoch": 0.05, + "learning_rate": 4.01879107655281e-05, + "loss": 2.3596, + "step": 4340 + }, + { + "epoch": 0.05, + "learning_rate": 4.018328242154957e-05, + "loss": 0.0007, + "step": 4342 + }, + { + "epoch": 0.05, + "learning_rate": 4.017865407757105e-05, + "loss": 2.3314, + "step": 4344 + }, + { + "epoch": 0.05, + "learning_rate": 4.017402573359252e-05, + "loss": 2.0275, + "step": 4346 + }, + { + "epoch": 0.05, + "learning_rate": 4.0169397389614e-05, + "loss": 2.057, + "step": 4348 + }, + { + "epoch": 0.05, + "learning_rate": 4.016476904563547e-05, + "loss": 0.787, + "step": 4350 + }, + { + "epoch": 0.05, + "learning_rate": 4.016014070165695e-05, + "loss": 2.3226, + "step": 4352 + }, + { + "epoch": 0.05, + "learning_rate": 4.0155512357678424e-05, + "loss": 0.3899, + "step": 4354 + }, + { + "epoch": 0.05, + "learning_rate": 4.01508840136999e-05, + "loss": 1.1081, + "step": 4356 + }, + { + "epoch": 0.05, + "learning_rate": 4.0146255669721374e-05, + "loss": 1.9573, + "step": 4358 + }, + { + "epoch": 0.05, + "learning_rate": 4.014162732574285e-05, + "loss": 3.5918, + "step": 4360 + }, + { + "epoch": 0.05, + "learning_rate": 4.013699898176433e-05, + "loss": 2.4175, + "step": 4362 + }, + { + "epoch": 0.05, + "learning_rate": 4.0132370637785804e-05, + "loss": 1.1265, + "step": 4364 + }, + { + "epoch": 0.05, + "learning_rate": 4.012774229380728e-05, + "loss": 1.9814, + "step": 4366 + }, + { + "epoch": 0.05, + "learning_rate": 4.0123113949828755e-05, + "loss": 3.7133, + "step": 4368 + }, + { + "epoch": 0.05, + "learning_rate": 4.0118485605850234e-05, + "loss": 2.6646, + "step": 4370 + }, + { + "epoch": 0.05, + "learning_rate": 4.0113857261871706e-05, + "loss": 4.5453, + "step": 4372 + }, + { + "epoch": 0.05, + "learning_rate": 4.0109228917893185e-05, + "loss": 2.2176, + "step": 4374 + }, + { + "epoch": 0.05, + "learning_rate": 4.010460057391466e-05, + "loss": 2.1201, + "step": 4376 + }, + { + "epoch": 0.05, + "learning_rate": 4.009997222993613e-05, + "loss": 0.6756, + "step": 4378 + }, + { + "epoch": 0.05, + "learning_rate": 4.00953438859576e-05, + "loss": 2.1069, + "step": 4380 + }, + { + "epoch": 0.05, + "learning_rate": 4.009071554197908e-05, + "loss": 0.4949, + "step": 4382 + }, + { + "epoch": 0.05, + "learning_rate": 4.008608719800055e-05, + "loss": 0.2945, + "step": 4384 + }, + { + "epoch": 0.05, + "learning_rate": 4.008145885402203e-05, + "loss": 0.0049, + "step": 4386 + }, + { + "epoch": 0.05, + "learning_rate": 4.007683051004351e-05, + "loss": 2.5287, + "step": 4388 + }, + { + "epoch": 0.05, + "learning_rate": 4.007220216606498e-05, + "loss": 1.9824, + "step": 4390 + }, + { + "epoch": 0.05, + "learning_rate": 4.006757382208646e-05, + "loss": 2.1965, + "step": 4392 + }, + { + "epoch": 0.05, + "learning_rate": 4.006294547810793e-05, + "loss": 5.2576, + "step": 4394 + }, + { + "epoch": 0.05, + "learning_rate": 4.005831713412941e-05, + "loss": 1.4359, + "step": 4396 + }, + { + "epoch": 0.05, + "learning_rate": 4.005368879015088e-05, + "loss": 0.7869, + "step": 4398 + }, + { + "epoch": 0.05, + "learning_rate": 4.004906044617236e-05, + "loss": 1.6542, + "step": 4400 + }, + { + "epoch": 0.05, + "learning_rate": 4.0044432102193834e-05, + "loss": 0.0343, + "step": 4402 + }, + { + "epoch": 0.05, + "learning_rate": 4.003980375821531e-05, + "loss": 8.9779, + "step": 4404 + }, + { + "epoch": 0.05, + "learning_rate": 4.0035175414236785e-05, + "loss": 2.3943, + "step": 4406 + }, + { + "epoch": 0.05, + "learning_rate": 4.0030547070258263e-05, + "loss": 0.5783, + "step": 4408 + }, + { + "epoch": 0.05, + "learning_rate": 4.0025918726279735e-05, + "loss": 6.561, + "step": 4410 + }, + { + "epoch": 0.05, + "learning_rate": 4.0021290382301214e-05, + "loss": 0.0152, + "step": 4412 + }, + { + "epoch": 0.05, + "learning_rate": 4.0016662038322686e-05, + "loss": 0.7434, + "step": 4414 + }, + { + "epoch": 0.05, + "learning_rate": 4.0012033694344165e-05, + "loss": 4.2037, + "step": 4416 + }, + { + "epoch": 0.05, + "learning_rate": 4.000740535036564e-05, + "loss": 2.2369, + "step": 4418 + }, + { + "epoch": 0.05, + "learning_rate": 4.0002777006387116e-05, + "loss": 5.2959, + "step": 4420 + }, + { + "epoch": 0.05, + "learning_rate": 3.999814866240859e-05, + "loss": 1.5798, + "step": 4422 + }, + { + "epoch": 0.05, + "learning_rate": 3.999352031843007e-05, + "loss": 1.7281, + "step": 4424 + }, + { + "epoch": 0.05, + "learning_rate": 3.998889197445154e-05, + "loss": 2.5788, + "step": 4426 + }, + { + "epoch": 0.05, + "learning_rate": 3.998426363047302e-05, + "loss": 0.0967, + "step": 4428 + }, + { + "epoch": 0.05, + "learning_rate": 3.9979635286494496e-05, + "loss": 2.2562, + "step": 4430 + }, + { + "epoch": 0.05, + "learning_rate": 3.997500694251597e-05, + "loss": 1.3388, + "step": 4432 + }, + { + "epoch": 0.05, + "learning_rate": 3.997037859853745e-05, + "loss": 5.109, + "step": 4434 + }, + { + "epoch": 0.05, + "learning_rate": 3.996575025455892e-05, + "loss": 0.4022, + "step": 4436 + }, + { + "epoch": 0.05, + "learning_rate": 3.99611219105804e-05, + "loss": 0.348, + "step": 4438 + }, + { + "epoch": 0.05, + "learning_rate": 3.995649356660187e-05, + "loss": 1.5391, + "step": 4440 + }, + { + "epoch": 0.05, + "learning_rate": 3.995186522262335e-05, + "loss": 2.2461, + "step": 4442 + }, + { + "epoch": 0.05, + "learning_rate": 3.994723687864482e-05, + "loss": 1.9752, + "step": 4444 + }, + { + "epoch": 0.05, + "learning_rate": 3.99426085346663e-05, + "loss": 0.7827, + "step": 4446 + }, + { + "epoch": 0.05, + "learning_rate": 3.993798019068777e-05, + "loss": 0.4375, + "step": 4448 + }, + { + "epoch": 0.05, + "learning_rate": 3.993335184670925e-05, + "loss": 2.1314, + "step": 4450 + }, + { + "epoch": 0.05, + "learning_rate": 3.992872350273072e-05, + "loss": 1.9471, + "step": 4452 + }, + { + "epoch": 0.05, + "learning_rate": 3.99240951587522e-05, + "loss": 1.1289, + "step": 4454 + }, + { + "epoch": 0.05, + "learning_rate": 3.9919466814773674e-05, + "loss": 0.2362, + "step": 4456 + }, + { + "epoch": 0.05, + "learning_rate": 3.991483847079515e-05, + "loss": 1.528, + "step": 4458 + }, + { + "epoch": 0.05, + "learning_rate": 3.9910210126816624e-05, + "loss": 6.2223, + "step": 4460 + }, + { + "epoch": 0.05, + "learning_rate": 3.99055817828381e-05, + "loss": 3.5753, + "step": 4462 + }, + { + "epoch": 0.05, + "learning_rate": 3.9900953438859575e-05, + "loss": 0.8897, + "step": 4464 + }, + { + "epoch": 0.05, + "learning_rate": 3.9896325094881054e-05, + "loss": 3.3783, + "step": 4466 + }, + { + "epoch": 0.05, + "learning_rate": 3.989169675090253e-05, + "loss": 3.46, + "step": 4468 + }, + { + "epoch": 0.05, + "learning_rate": 3.9887068406924005e-05, + "loss": 1.3344, + "step": 4470 + }, + { + "epoch": 0.05, + "learning_rate": 3.9882440062945484e-05, + "loss": 3.425, + "step": 4472 + }, + { + "epoch": 0.05, + "learning_rate": 3.9877811718966956e-05, + "loss": 1.7753, + "step": 4474 + }, + { + "epoch": 0.05, + "learning_rate": 3.9873183374988435e-05, + "loss": 1.6073, + "step": 4476 + }, + { + "epoch": 0.05, + "learning_rate": 3.986855503100991e-05, + "loss": 7.4978, + "step": 4478 + }, + { + "epoch": 0.05, + "learning_rate": 3.9863926687031385e-05, + "loss": 2.2612, + "step": 4480 + }, + { + "epoch": 0.05, + "learning_rate": 3.985929834305286e-05, + "loss": 4.1757, + "step": 4482 + }, + { + "epoch": 0.05, + "learning_rate": 3.9854669999074336e-05, + "loss": 1.2904, + "step": 4484 + }, + { + "epoch": 0.05, + "learning_rate": 3.985004165509581e-05, + "loss": 1.5816, + "step": 4486 + }, + { + "epoch": 0.05, + "learning_rate": 3.984541331111729e-05, + "loss": 2.2755, + "step": 4488 + }, + { + "epoch": 0.05, + "learning_rate": 3.984078496713876e-05, + "loss": 1.2274, + "step": 4490 + }, + { + "epoch": 0.05, + "learning_rate": 3.983615662316024e-05, + "loss": 2.6802, + "step": 4492 + }, + { + "epoch": 0.05, + "learning_rate": 3.983152827918171e-05, + "loss": 2.8658, + "step": 4494 + }, + { + "epoch": 0.05, + "learning_rate": 3.982689993520319e-05, + "loss": 4.0406, + "step": 4496 + }, + { + "epoch": 0.05, + "learning_rate": 3.982227159122466e-05, + "loss": 0.5544, + "step": 4498 + }, + { + "epoch": 0.05, + "learning_rate": 3.981764324724614e-05, + "loss": 4.6537, + "step": 4500 + }, + { + "epoch": 0.05, + "learning_rate": 3.981301490326761e-05, + "loss": 4.3703, + "step": 4502 + }, + { + "epoch": 0.05, + "learning_rate": 3.980838655928909e-05, + "loss": 1.4101, + "step": 4504 + }, + { + "epoch": 0.05, + "learning_rate": 3.980375821531056e-05, + "loss": 0.5175, + "step": 4506 + }, + { + "epoch": 0.05, + "learning_rate": 3.979912987133204e-05, + "loss": 1.8145, + "step": 4508 + }, + { + "epoch": 0.05, + "learning_rate": 3.979450152735352e-05, + "loss": 2.4829, + "step": 4510 + }, + { + "epoch": 0.05, + "learning_rate": 3.978987318337499e-05, + "loss": 0.9897, + "step": 4512 + }, + { + "epoch": 0.05, + "learning_rate": 3.978524483939647e-05, + "loss": 5.1008, + "step": 4514 + }, + { + "epoch": 0.05, + "learning_rate": 3.978061649541794e-05, + "loss": 5.1175, + "step": 4516 + }, + { + "epoch": 0.05, + "learning_rate": 3.977598815143942e-05, + "loss": 0.8235, + "step": 4518 + }, + { + "epoch": 0.05, + "learning_rate": 3.9771359807460894e-05, + "loss": 5.4076, + "step": 4520 + }, + { + "epoch": 0.05, + "learning_rate": 3.976673146348237e-05, + "loss": 1.9871, + "step": 4522 + }, + { + "epoch": 0.05, + "learning_rate": 3.976210311950384e-05, + "loss": 2.0262, + "step": 4524 + }, + { + "epoch": 0.05, + "learning_rate": 3.975747477552532e-05, + "loss": 4.065, + "step": 4526 + }, + { + "epoch": 0.05, + "learning_rate": 3.975284643154679e-05, + "loss": 0.0108, + "step": 4528 + }, + { + "epoch": 0.05, + "learning_rate": 3.974821808756827e-05, + "loss": 0.0038, + "step": 4530 + }, + { + "epoch": 0.05, + "learning_rate": 3.974358974358974e-05, + "loss": 4.4575, + "step": 4532 + }, + { + "epoch": 0.05, + "learning_rate": 3.973896139961122e-05, + "loss": 1.2768, + "step": 4534 + }, + { + "epoch": 0.05, + "learning_rate": 3.97343330556327e-05, + "loss": 2.3199, + "step": 4536 + }, + { + "epoch": 0.05, + "learning_rate": 3.972970471165417e-05, + "loss": 2.0048, + "step": 4538 + }, + { + "epoch": 0.05, + "learning_rate": 3.972507636767565e-05, + "loss": 1.9252, + "step": 4540 + }, + { + "epoch": 0.05, + "learning_rate": 3.972044802369712e-05, + "loss": 5.9763, + "step": 4542 + }, + { + "epoch": 0.05, + "learning_rate": 3.97158196797186e-05, + "loss": 3.3168, + "step": 4544 + }, + { + "epoch": 0.05, + "learning_rate": 3.971119133574007e-05, + "loss": 1.6326, + "step": 4546 + }, + { + "epoch": 0.05, + "learning_rate": 3.970656299176155e-05, + "loss": 2.0513, + "step": 4548 + }, + { + "epoch": 0.05, + "learning_rate": 3.970193464778302e-05, + "loss": 1.9088, + "step": 4550 + }, + { + "epoch": 0.05, + "learning_rate": 3.96973063038045e-05, + "loss": 5.2572, + "step": 4552 + }, + { + "epoch": 0.05, + "learning_rate": 3.969267795982597e-05, + "loss": 3.707, + "step": 4554 + }, + { + "epoch": 0.05, + "learning_rate": 3.968804961584745e-05, + "loss": 0.6561, + "step": 4556 + }, + { + "epoch": 0.05, + "learning_rate": 3.9683421271868924e-05, + "loss": 2.9832, + "step": 4558 + }, + { + "epoch": 0.05, + "learning_rate": 3.96787929278904e-05, + "loss": 5.836, + "step": 4560 + }, + { + "epoch": 0.05, + "learning_rate": 3.9674164583911875e-05, + "loss": 0.0449, + "step": 4562 + }, + { + "epoch": 0.05, + "learning_rate": 3.966953623993335e-05, + "loss": 2.8851, + "step": 4564 + }, + { + "epoch": 0.05, + "learning_rate": 3.9664907895954825e-05, + "loss": 2.8572, + "step": 4566 + }, + { + "epoch": 0.05, + "learning_rate": 3.9660279551976304e-05, + "loss": 1.932, + "step": 4568 + }, + { + "epoch": 0.05, + "learning_rate": 3.9655651207997776e-05, + "loss": 0.9523, + "step": 4570 + }, + { + "epoch": 0.05, + "learning_rate": 3.9651022864019255e-05, + "loss": 0.5522, + "step": 4572 + }, + { + "epoch": 0.05, + "learning_rate": 3.9646394520040734e-05, + "loss": 4.3791, + "step": 4574 + }, + { + "epoch": 0.05, + "learning_rate": 3.9641766176062206e-05, + "loss": 0.509, + "step": 4576 + }, + { + "epoch": 0.05, + "learning_rate": 3.9637137832083685e-05, + "loss": 4.6404, + "step": 4578 + }, + { + "epoch": 0.05, + "learning_rate": 3.963250948810516e-05, + "loss": 1.0693, + "step": 4580 + }, + { + "epoch": 0.05, + "learning_rate": 3.9627881144126636e-05, + "loss": 1.3206, + "step": 4582 + }, + { + "epoch": 0.05, + "learning_rate": 3.962325280014811e-05, + "loss": 0.8808, + "step": 4584 + }, + { + "epoch": 0.05, + "learning_rate": 3.9618624456169586e-05, + "loss": 2.1969, + "step": 4586 + }, + { + "epoch": 0.05, + "learning_rate": 3.961399611219106e-05, + "loss": 5.3653, + "step": 4588 + }, + { + "epoch": 0.05, + "learning_rate": 3.960936776821254e-05, + "loss": 3.7456, + "step": 4590 + }, + { + "epoch": 0.05, + "learning_rate": 3.960473942423401e-05, + "loss": 3.8856, + "step": 4592 + }, + { + "epoch": 0.05, + "learning_rate": 3.960011108025549e-05, + "loss": 0.429, + "step": 4594 + }, + { + "epoch": 0.05, + "learning_rate": 3.959548273627696e-05, + "loss": 0.7908, + "step": 4596 + }, + { + "epoch": 0.05, + "learning_rate": 3.959085439229844e-05, + "loss": 1.8853, + "step": 4598 + }, + { + "epoch": 0.05, + "learning_rate": 3.958622604831991e-05, + "loss": 2.6664, + "step": 4600 + }, + { + "epoch": 0.05, + "learning_rate": 3.958159770434139e-05, + "loss": 3.8257, + "step": 4602 + }, + { + "epoch": 0.05, + "learning_rate": 3.957696936036286e-05, + "loss": 1.4002, + "step": 4604 + }, + { + "epoch": 0.05, + "learning_rate": 3.957234101638434e-05, + "loss": 1.588, + "step": 4606 + }, + { + "epoch": 0.05, + "learning_rate": 3.956771267240581e-05, + "loss": 0.5521, + "step": 4608 + }, + { + "epoch": 0.05, + "learning_rate": 3.956308432842729e-05, + "loss": 1.0549, + "step": 4610 + }, + { + "epoch": 0.05, + "learning_rate": 3.9558455984448764e-05, + "loss": 0.1035, + "step": 4612 + }, + { + "epoch": 0.05, + "learning_rate": 3.955382764047024e-05, + "loss": 1.3683, + "step": 4614 + }, + { + "epoch": 0.05, + "learning_rate": 3.954919929649172e-05, + "loss": 2.3394, + "step": 4616 + }, + { + "epoch": 0.05, + "learning_rate": 3.954457095251319e-05, + "loss": 4.025, + "step": 4618 + }, + { + "epoch": 0.05, + "learning_rate": 3.953994260853467e-05, + "loss": 4.1844, + "step": 4620 + }, + { + "epoch": 0.05, + "learning_rate": 3.9535314264556144e-05, + "loss": 0.0213, + "step": 4622 + }, + { + "epoch": 0.05, + "learning_rate": 3.953068592057762e-05, + "loss": 1.2124, + "step": 4624 + }, + { + "epoch": 0.05, + "learning_rate": 3.9526057576599095e-05, + "loss": 1.8971, + "step": 4626 + }, + { + "epoch": 0.05, + "learning_rate": 3.9521429232620574e-05, + "loss": 2.398, + "step": 4628 + }, + { + "epoch": 0.05, + "learning_rate": 3.9516800888642046e-05, + "loss": 0.6788, + "step": 4630 + }, + { + "epoch": 0.05, + "learning_rate": 3.9512172544663525e-05, + "loss": 2.8854, + "step": 4632 + }, + { + "epoch": 0.05, + "learning_rate": 3.9507544200684997e-05, + "loss": 1.6213, + "step": 4634 + }, + { + "epoch": 0.05, + "learning_rate": 3.9502915856706475e-05, + "loss": 1.0189, + "step": 4636 + }, + { + "epoch": 0.05, + "learning_rate": 3.949828751272795e-05, + "loss": 0.0136, + "step": 4638 + }, + { + "epoch": 0.05, + "learning_rate": 3.9493659168749426e-05, + "loss": 1.4118, + "step": 4640 + }, + { + "epoch": 0.05, + "learning_rate": 3.94890308247709e-05, + "loss": 4.3905, + "step": 4642 + }, + { + "epoch": 0.05, + "learning_rate": 3.948440248079238e-05, + "loss": 1.4226, + "step": 4644 + }, + { + "epoch": 0.05, + "learning_rate": 3.947977413681385e-05, + "loss": 3.1797, + "step": 4646 + }, + { + "epoch": 0.05, + "learning_rate": 3.947514579283533e-05, + "loss": 0.8538, + "step": 4648 + }, + { + "epoch": 0.05, + "learning_rate": 3.94705174488568e-05, + "loss": 0.0128, + "step": 4650 + }, + { + "epoch": 0.05, + "learning_rate": 3.946588910487828e-05, + "loss": 2.024, + "step": 4652 + }, + { + "epoch": 0.05, + "learning_rate": 3.946126076089975e-05, + "loss": 2.9342, + "step": 4654 + }, + { + "epoch": 0.05, + "learning_rate": 3.945663241692123e-05, + "loss": 3.1793, + "step": 4656 + }, + { + "epoch": 0.05, + "learning_rate": 3.945200407294271e-05, + "loss": 1.088, + "step": 4658 + }, + { + "epoch": 0.05, + "learning_rate": 3.944737572896418e-05, + "loss": 1.5399, + "step": 4660 + }, + { + "epoch": 0.05, + "learning_rate": 3.944274738498566e-05, + "loss": 0.0477, + "step": 4662 + }, + { + "epoch": 0.05, + "learning_rate": 3.943811904100713e-05, + "loss": 2.2955, + "step": 4664 + }, + { + "epoch": 0.05, + "learning_rate": 3.943349069702861e-05, + "loss": 2.9965, + "step": 4666 + }, + { + "epoch": 0.05, + "learning_rate": 3.9428862353050075e-05, + "loss": 4.1841, + "step": 4668 + }, + { + "epoch": 0.05, + "learning_rate": 3.9424234009071554e-05, + "loss": 1.6437, + "step": 4670 + }, + { + "epoch": 0.05, + "learning_rate": 3.9419605665093026e-05, + "loss": 0.1889, + "step": 4672 + }, + { + "epoch": 0.05, + "learning_rate": 3.9414977321114505e-05, + "loss": 2.7675, + "step": 4674 + }, + { + "epoch": 0.05, + "learning_rate": 3.941034897713598e-05, + "loss": 2.3276, + "step": 4676 + }, + { + "epoch": 0.05, + "learning_rate": 3.9405720633157456e-05, + "loss": 3.8957, + "step": 4678 + }, + { + "epoch": 0.05, + "learning_rate": 3.9401092289178935e-05, + "loss": 3.2478, + "step": 4680 + }, + { + "epoch": 0.05, + "learning_rate": 3.939646394520041e-05, + "loss": 2.045, + "step": 4682 + }, + { + "epoch": 0.05, + "learning_rate": 3.9391835601221886e-05, + "loss": 0.0743, + "step": 4684 + }, + { + "epoch": 0.05, + "learning_rate": 3.938720725724336e-05, + "loss": 1.5927, + "step": 4686 + }, + { + "epoch": 0.05, + "learning_rate": 3.9382578913264836e-05, + "loss": 2.9151, + "step": 4688 + }, + { + "epoch": 0.05, + "learning_rate": 3.937795056928631e-05, + "loss": 1.62, + "step": 4690 + }, + { + "epoch": 0.05, + "learning_rate": 3.937332222530779e-05, + "loss": 0.1815, + "step": 4692 + }, + { + "epoch": 0.05, + "learning_rate": 3.936869388132926e-05, + "loss": 2.3717, + "step": 4694 + }, + { + "epoch": 0.05, + "learning_rate": 3.936406553735074e-05, + "loss": 5.7022, + "step": 4696 + }, + { + "epoch": 0.05, + "learning_rate": 3.935943719337221e-05, + "loss": 2.7208, + "step": 4698 + }, + { + "epoch": 0.05, + "learning_rate": 3.935480884939369e-05, + "loss": 3.6258, + "step": 4700 + }, + { + "epoch": 0.05, + "learning_rate": 3.935018050541516e-05, + "loss": 1.1701, + "step": 4702 + }, + { + "epoch": 0.05, + "learning_rate": 3.934555216143664e-05, + "loss": 4.1411, + "step": 4704 + }, + { + "epoch": 0.05, + "learning_rate": 3.934092381745811e-05, + "loss": 1.7656, + "step": 4706 + }, + { + "epoch": 0.05, + "learning_rate": 3.933629547347959e-05, + "loss": 4.7991, + "step": 4708 + }, + { + "epoch": 0.05, + "learning_rate": 3.933166712950106e-05, + "loss": 1.0016, + "step": 4710 + }, + { + "epoch": 0.05, + "learning_rate": 3.932703878552254e-05, + "loss": 2.9736, + "step": 4712 + }, + { + "epoch": 0.05, + "learning_rate": 3.9322410441544014e-05, + "loss": 1.3675, + "step": 4714 + }, + { + "epoch": 0.05, + "learning_rate": 3.931778209756549e-05, + "loss": 3.3842, + "step": 4716 + }, + { + "epoch": 0.05, + "learning_rate": 3.9313153753586964e-05, + "loss": 3.0452, + "step": 4718 + }, + { + "epoch": 0.05, + "learning_rate": 3.930852540960844e-05, + "loss": 1.6686, + "step": 4720 + }, + { + "epoch": 0.05, + "learning_rate": 3.930389706562992e-05, + "loss": 4.0743, + "step": 4722 + }, + { + "epoch": 0.05, + "learning_rate": 3.9299268721651394e-05, + "loss": 4.9611, + "step": 4724 + }, + { + "epoch": 0.05, + "learning_rate": 3.929464037767287e-05, + "loss": 0.0013, + "step": 4726 + }, + { + "epoch": 0.05, + "learning_rate": 3.9290012033694345e-05, + "loss": 3.8135, + "step": 4728 + }, + { + "epoch": 0.05, + "learning_rate": 3.9285383689715824e-05, + "loss": 2.6142, + "step": 4730 + }, + { + "epoch": 0.05, + "learning_rate": 3.9280755345737296e-05, + "loss": 1.6731, + "step": 4732 + }, + { + "epoch": 0.05, + "learning_rate": 3.9276127001758775e-05, + "loss": 5.0315, + "step": 4734 + }, + { + "epoch": 0.05, + "learning_rate": 3.927149865778025e-05, + "loss": 2.7396, + "step": 4736 + }, + { + "epoch": 0.05, + "learning_rate": 3.9266870313801725e-05, + "loss": 2.6589, + "step": 4738 + }, + { + "epoch": 0.05, + "learning_rate": 3.92622419698232e-05, + "loss": 1.0873, + "step": 4740 + }, + { + "epoch": 0.05, + "learning_rate": 3.9257613625844676e-05, + "loss": 3.6466, + "step": 4742 + }, + { + "epoch": 0.05, + "learning_rate": 3.925298528186615e-05, + "loss": 2.4287, + "step": 4744 + }, + { + "epoch": 0.05, + "learning_rate": 3.924835693788763e-05, + "loss": 3.077, + "step": 4746 + }, + { + "epoch": 0.05, + "learning_rate": 3.92437285939091e-05, + "loss": 1.052, + "step": 4748 + }, + { + "epoch": 0.05, + "learning_rate": 3.923910024993058e-05, + "loss": 0.0363, + "step": 4750 + }, + { + "epoch": 0.05, + "learning_rate": 3.923447190595205e-05, + "loss": 2.3535, + "step": 4752 + }, + { + "epoch": 0.05, + "learning_rate": 3.922984356197353e-05, + "loss": 2.7631, + "step": 4754 + }, + { + "epoch": 0.05, + "learning_rate": 3.9225215217995e-05, + "loss": 3.0131, + "step": 4756 + }, + { + "epoch": 0.05, + "learning_rate": 3.922058687401648e-05, + "loss": 7.7517, + "step": 4758 + }, + { + "epoch": 0.05, + "learning_rate": 3.921595853003795e-05, + "loss": 2.4111, + "step": 4760 + }, + { + "epoch": 0.05, + "learning_rate": 3.921133018605943e-05, + "loss": 2.5326, + "step": 4762 + }, + { + "epoch": 0.05, + "learning_rate": 3.920670184208091e-05, + "loss": 0.0182, + "step": 4764 + }, + { + "epoch": 0.05, + "learning_rate": 3.920207349810238e-05, + "loss": 5.6213, + "step": 4766 + }, + { + "epoch": 0.05, + "learning_rate": 3.919744515412386e-05, + "loss": 2.2223, + "step": 4768 + }, + { + "epoch": 0.05, + "learning_rate": 3.919281681014533e-05, + "loss": 2.5403, + "step": 4770 + }, + { + "epoch": 0.05, + "learning_rate": 3.918818846616681e-05, + "loss": 1.0237, + "step": 4772 + }, + { + "epoch": 0.05, + "learning_rate": 3.918356012218828e-05, + "loss": 3.13, + "step": 4774 + }, + { + "epoch": 0.06, + "learning_rate": 3.917893177820976e-05, + "loss": 6.999, + "step": 4776 + }, + { + "epoch": 0.06, + "learning_rate": 3.9174303434231234e-05, + "loss": 1.8554, + "step": 4778 + }, + { + "epoch": 0.06, + "learning_rate": 3.916967509025271e-05, + "loss": 1.3802, + "step": 4780 + }, + { + "epoch": 0.06, + "learning_rate": 3.9165046746274185e-05, + "loss": 0.3086, + "step": 4782 + }, + { + "epoch": 0.06, + "learning_rate": 3.9160418402295664e-05, + "loss": 1.6431, + "step": 4784 + }, + { + "epoch": 0.06, + "learning_rate": 3.9155790058317136e-05, + "loss": 3.5398, + "step": 4786 + }, + { + "epoch": 0.06, + "learning_rate": 3.9151161714338614e-05, + "loss": 0.8844, + "step": 4788 + }, + { + "epoch": 0.06, + "learning_rate": 3.9146533370360086e-05, + "loss": 2.0063, + "step": 4790 + }, + { + "epoch": 0.06, + "learning_rate": 3.9141905026381565e-05, + "loss": 5.8426, + "step": 4792 + }, + { + "epoch": 0.06, + "learning_rate": 3.913727668240304e-05, + "loss": 0.6475, + "step": 4794 + }, + { + "epoch": 0.06, + "learning_rate": 3.9132648338424516e-05, + "loss": 0.2759, + "step": 4796 + }, + { + "epoch": 0.06, + "learning_rate": 3.912801999444599e-05, + "loss": 2.8464, + "step": 4798 + }, + { + "epoch": 0.06, + "learning_rate": 3.912339165046747e-05, + "loss": 1.4457, + "step": 4800 + }, + { + "epoch": 0.06, + "learning_rate": 3.9118763306488946e-05, + "loss": 1.8669, + "step": 4802 + }, + { + "epoch": 0.06, + "learning_rate": 3.911413496251042e-05, + "loss": 0.2135, + "step": 4804 + }, + { + "epoch": 0.06, + "learning_rate": 3.91095066185319e-05, + "loss": 2.2403, + "step": 4806 + }, + { + "epoch": 0.06, + "learning_rate": 3.910487827455337e-05, + "loss": 0.6845, + "step": 4808 + }, + { + "epoch": 0.06, + "learning_rate": 3.910024993057485e-05, + "loss": 2.9653, + "step": 4810 + }, + { + "epoch": 0.06, + "learning_rate": 3.909562158659632e-05, + "loss": 0.7603, + "step": 4812 + }, + { + "epoch": 0.06, + "learning_rate": 3.909099324261779e-05, + "loss": 0.1151, + "step": 4814 + }, + { + "epoch": 0.06, + "learning_rate": 3.9086364898639264e-05, + "loss": 2.5371, + "step": 4816 + }, + { + "epoch": 0.06, + "learning_rate": 3.908173655466074e-05, + "loss": 2.3167, + "step": 4818 + }, + { + "epoch": 0.06, + "learning_rate": 3.9077108210682214e-05, + "loss": 3.2592, + "step": 4820 + }, + { + "epoch": 0.06, + "learning_rate": 3.907247986670369e-05, + "loss": 3.7942, + "step": 4822 + }, + { + "epoch": 0.06, + "learning_rate": 3.9067851522725165e-05, + "loss": 0.0519, + "step": 4824 + }, + { + "epoch": 0.06, + "learning_rate": 3.9063223178746644e-05, + "loss": 0.6033, + "step": 4826 + }, + { + "epoch": 0.06, + "learning_rate": 3.905859483476812e-05, + "loss": 1.6656, + "step": 4828 + }, + { + "epoch": 0.06, + "learning_rate": 3.9053966490789595e-05, + "loss": 0.8656, + "step": 4830 + }, + { + "epoch": 0.06, + "learning_rate": 3.9049338146811074e-05, + "loss": 2.1191, + "step": 4832 + }, + { + "epoch": 0.06, + "learning_rate": 3.9044709802832546e-05, + "loss": 5.6373, + "step": 4834 + }, + { + "epoch": 0.06, + "learning_rate": 3.9040081458854025e-05, + "loss": 0.8983, + "step": 4836 + }, + { + "epoch": 0.06, + "learning_rate": 3.90354531148755e-05, + "loss": 2.9937, + "step": 4838 + }, + { + "epoch": 0.06, + "learning_rate": 3.9030824770896975e-05, + "loss": 7.4537, + "step": 4840 + }, + { + "epoch": 0.06, + "learning_rate": 3.902619642691845e-05, + "loss": 0.1108, + "step": 4842 + }, + { + "epoch": 0.06, + "learning_rate": 3.9021568082939926e-05, + "loss": 0.4265, + "step": 4844 + }, + { + "epoch": 0.06, + "learning_rate": 3.90169397389614e-05, + "loss": 2.5046, + "step": 4846 + }, + { + "epoch": 0.06, + "learning_rate": 3.901231139498288e-05, + "loss": 2.3107, + "step": 4848 + }, + { + "epoch": 0.06, + "learning_rate": 3.900768305100435e-05, + "loss": 0.1253, + "step": 4850 + }, + { + "epoch": 0.06, + "learning_rate": 3.900305470702583e-05, + "loss": 0.2637, + "step": 4852 + }, + { + "epoch": 0.06, + "learning_rate": 3.89984263630473e-05, + "loss": 0.3914, + "step": 4854 + }, + { + "epoch": 0.06, + "learning_rate": 3.899379801906878e-05, + "loss": 0.037, + "step": 4856 + }, + { + "epoch": 0.06, + "learning_rate": 3.898916967509025e-05, + "loss": 3.1492, + "step": 4858 + }, + { + "epoch": 0.06, + "learning_rate": 3.898454133111173e-05, + "loss": 1.905, + "step": 4860 + }, + { + "epoch": 0.06, + "learning_rate": 3.89799129871332e-05, + "loss": 0.0708, + "step": 4862 + }, + { + "epoch": 0.06, + "learning_rate": 3.897528464315468e-05, + "loss": 2.9641, + "step": 4864 + }, + { + "epoch": 0.06, + "learning_rate": 3.897065629917615e-05, + "loss": 1.7082, + "step": 4866 + }, + { + "epoch": 0.06, + "learning_rate": 3.896602795519763e-05, + "loss": 7.9043, + "step": 4868 + }, + { + "epoch": 0.06, + "learning_rate": 3.896139961121911e-05, + "loss": 2.2043, + "step": 4870 + }, + { + "epoch": 0.06, + "learning_rate": 3.895677126724058e-05, + "loss": 5.0196, + "step": 4872 + }, + { + "epoch": 0.06, + "learning_rate": 3.895214292326206e-05, + "loss": 0.3016, + "step": 4874 + }, + { + "epoch": 0.06, + "learning_rate": 3.894751457928353e-05, + "loss": 4.3036, + "step": 4876 + }, + { + "epoch": 0.06, + "learning_rate": 3.894288623530501e-05, + "loss": 2.68, + "step": 4878 + }, + { + "epoch": 0.06, + "learning_rate": 3.8938257891326484e-05, + "loss": 4.5417, + "step": 4880 + }, + { + "epoch": 0.06, + "learning_rate": 3.893362954734796e-05, + "loss": 2.5056, + "step": 4882 + }, + { + "epoch": 0.06, + "learning_rate": 3.8929001203369435e-05, + "loss": 2.4901, + "step": 4884 + }, + { + "epoch": 0.06, + "learning_rate": 3.8924372859390914e-05, + "loss": 0.7102, + "step": 4886 + }, + { + "epoch": 0.06, + "learning_rate": 3.8919744515412386e-05, + "loss": 1.3608, + "step": 4888 + }, + { + "epoch": 0.06, + "learning_rate": 3.8915116171433864e-05, + "loss": 1.6231, + "step": 4890 + }, + { + "epoch": 0.06, + "learning_rate": 3.8910487827455337e-05, + "loss": 0.9319, + "step": 4892 + }, + { + "epoch": 0.06, + "learning_rate": 3.8905859483476815e-05, + "loss": 1.3414, + "step": 4894 + }, + { + "epoch": 0.06, + "learning_rate": 3.890123113949829e-05, + "loss": 5.8441, + "step": 4896 + }, + { + "epoch": 0.06, + "learning_rate": 3.8896602795519766e-05, + "loss": 2.3685, + "step": 4898 + }, + { + "epoch": 0.06, + "learning_rate": 3.889197445154124e-05, + "loss": 2.6617, + "step": 4900 + }, + { + "epoch": 0.06, + "learning_rate": 3.888734610756272e-05, + "loss": 0.8293, + "step": 4902 + }, + { + "epoch": 0.06, + "learning_rate": 3.888271776358419e-05, + "loss": 0.0004, + "step": 4904 + }, + { + "epoch": 0.06, + "learning_rate": 3.887808941960567e-05, + "loss": 0.0345, + "step": 4906 + }, + { + "epoch": 0.06, + "learning_rate": 3.887346107562715e-05, + "loss": 0.2248, + "step": 4908 + }, + { + "epoch": 0.06, + "learning_rate": 3.886883273164862e-05, + "loss": 0.1051, + "step": 4910 + }, + { + "epoch": 0.06, + "learning_rate": 3.88642043876701e-05, + "loss": 0.1389, + "step": 4912 + }, + { + "epoch": 0.06, + "learning_rate": 3.885957604369157e-05, + "loss": 0.0084, + "step": 4914 + }, + { + "epoch": 0.06, + "learning_rate": 3.885494769971305e-05, + "loss": 0.0001, + "step": 4916 + }, + { + "epoch": 0.06, + "learning_rate": 3.885031935573452e-05, + "loss": 0.5947, + "step": 4918 + }, + { + "epoch": 0.06, + "learning_rate": 3.8845691011756e-05, + "loss": 10.7161, + "step": 4920 + }, + { + "epoch": 0.06, + "learning_rate": 3.884106266777747e-05, + "loss": 4.4956, + "step": 4922 + }, + { + "epoch": 0.06, + "learning_rate": 3.883643432379895e-05, + "loss": 5.4229, + "step": 4924 + }, + { + "epoch": 0.06, + "learning_rate": 3.883180597982042e-05, + "loss": 9.6197, + "step": 4926 + }, + { + "epoch": 0.06, + "learning_rate": 3.88271776358419e-05, + "loss": 7.5324, + "step": 4928 + }, + { + "epoch": 0.06, + "learning_rate": 3.882254929186337e-05, + "loss": 4.4045, + "step": 4930 + }, + { + "epoch": 0.06, + "learning_rate": 3.881792094788485e-05, + "loss": 1.107, + "step": 4932 + }, + { + "epoch": 0.06, + "learning_rate": 3.8813292603906324e-05, + "loss": 7.7481, + "step": 4934 + }, + { + "epoch": 0.06, + "learning_rate": 3.88086642599278e-05, + "loss": 1.9928, + "step": 4936 + }, + { + "epoch": 0.06, + "learning_rate": 3.8804035915949275e-05, + "loss": 1.3477, + "step": 4938 + }, + { + "epoch": 0.06, + "learning_rate": 3.8799407571970753e-05, + "loss": 2.3497, + "step": 4940 + }, + { + "epoch": 0.06, + "learning_rate": 3.8794779227992226e-05, + "loss": 2.2197, + "step": 4942 + }, + { + "epoch": 0.06, + "learning_rate": 3.8790150884013704e-05, + "loss": 1.3948, + "step": 4944 + }, + { + "epoch": 0.06, + "learning_rate": 3.8785522540035176e-05, + "loss": 3.4161, + "step": 4946 + }, + { + "epoch": 0.06, + "learning_rate": 3.8780894196056655e-05, + "loss": 2.249, + "step": 4948 + }, + { + "epoch": 0.06, + "learning_rate": 3.8776265852078134e-05, + "loss": 2.6379, + "step": 4950 + }, + { + "epoch": 0.06, + "learning_rate": 3.8771637508099606e-05, + "loss": 4.1561, + "step": 4952 + }, + { + "epoch": 0.06, + "learning_rate": 3.8767009164121085e-05, + "loss": 3.1149, + "step": 4954 + }, + { + "epoch": 0.06, + "learning_rate": 3.876238082014256e-05, + "loss": 0.392, + "step": 4956 + }, + { + "epoch": 0.06, + "learning_rate": 3.875775247616403e-05, + "loss": 1.341, + "step": 4958 + }, + { + "epoch": 0.06, + "learning_rate": 3.87531241321855e-05, + "loss": 0.1616, + "step": 4960 + }, + { + "epoch": 0.06, + "learning_rate": 3.874849578820698e-05, + "loss": 2.9244, + "step": 4962 + }, + { + "epoch": 0.06, + "learning_rate": 3.874386744422845e-05, + "loss": 1.9351, + "step": 4964 + }, + { + "epoch": 0.06, + "learning_rate": 3.873923910024993e-05, + "loss": 1.536, + "step": 4966 + }, + { + "epoch": 0.06, + "learning_rate": 3.87346107562714e-05, + "loss": 0.1586, + "step": 4968 + }, + { + "epoch": 0.06, + "learning_rate": 3.872998241229288e-05, + "loss": 3.6483, + "step": 4970 + }, + { + "epoch": 0.06, + "learning_rate": 3.8725354068314353e-05, + "loss": 0.0153, + "step": 4972 + }, + { + "epoch": 0.06, + "learning_rate": 3.872072572433583e-05, + "loss": 1.2945, + "step": 4974 + }, + { + "epoch": 0.06, + "learning_rate": 3.871609738035731e-05, + "loss": 4.8801, + "step": 4976 + }, + { + "epoch": 0.06, + "learning_rate": 3.871146903637878e-05, + "loss": 5.0867, + "step": 4978 + }, + { + "epoch": 0.06, + "learning_rate": 3.870684069240026e-05, + "loss": 1.8639, + "step": 4980 + }, + { + "epoch": 0.06, + "learning_rate": 3.8702212348421734e-05, + "loss": 0.4212, + "step": 4982 + }, + { + "epoch": 0.06, + "learning_rate": 3.869758400444321e-05, + "loss": 4.7299, + "step": 4984 + }, + { + "epoch": 0.06, + "learning_rate": 3.8692955660464685e-05, + "loss": 2.7267, + "step": 4986 + }, + { + "epoch": 0.06, + "learning_rate": 3.8688327316486164e-05, + "loss": 4.7414, + "step": 4988 + }, + { + "epoch": 0.06, + "learning_rate": 3.8683698972507636e-05, + "loss": 2.5881, + "step": 4990 + }, + { + "epoch": 0.06, + "learning_rate": 3.8679070628529115e-05, + "loss": 0.5599, + "step": 4992 + }, + { + "epoch": 0.06, + "learning_rate": 3.8674442284550587e-05, + "loss": 1.5483, + "step": 4994 + }, + { + "epoch": 0.06, + "learning_rate": 3.8669813940572065e-05, + "loss": 4.0784, + "step": 4996 + }, + { + "epoch": 0.06, + "learning_rate": 3.866518559659354e-05, + "loss": 0.7261, + "step": 4998 + }, + { + "epoch": 0.06, + "learning_rate": 3.8660557252615016e-05, + "loss": 2.8426, + "step": 5000 + }, + { + "epoch": 0.06, + "learning_rate": 3.865592890863649e-05, + "loss": 1.5697, + "step": 5002 + }, + { + "epoch": 0.06, + "learning_rate": 3.865130056465797e-05, + "loss": 0.1206, + "step": 5004 + }, + { + "epoch": 0.06, + "learning_rate": 3.864667222067944e-05, + "loss": 0.2807, + "step": 5006 + }, + { + "epoch": 0.06, + "learning_rate": 3.864204387670092e-05, + "loss": 3.3311, + "step": 5008 + }, + { + "epoch": 0.06, + "learning_rate": 3.863741553272239e-05, + "loss": 0.0544, + "step": 5010 + }, + { + "epoch": 0.06, + "learning_rate": 3.863278718874387e-05, + "loss": 0.0463, + "step": 5012 + }, + { + "epoch": 0.06, + "learning_rate": 3.862815884476535e-05, + "loss": 6.3057, + "step": 5014 + }, + { + "epoch": 0.06, + "learning_rate": 3.862353050078682e-05, + "loss": 1.5558, + "step": 5016 + }, + { + "epoch": 0.06, + "learning_rate": 3.86189021568083e-05, + "loss": 3.0809, + "step": 5018 + }, + { + "epoch": 0.06, + "learning_rate": 3.861427381282977e-05, + "loss": 0.4249, + "step": 5020 + }, + { + "epoch": 0.06, + "learning_rate": 3.860964546885125e-05, + "loss": 1.6821, + "step": 5022 + }, + { + "epoch": 0.06, + "learning_rate": 3.860501712487272e-05, + "loss": 1.2718, + "step": 5024 + }, + { + "epoch": 0.06, + "learning_rate": 3.86003887808942e-05, + "loss": 2.7965, + "step": 5026 + }, + { + "epoch": 0.06, + "learning_rate": 3.859576043691567e-05, + "loss": 6.838, + "step": 5028 + }, + { + "epoch": 0.06, + "learning_rate": 3.859113209293715e-05, + "loss": 2.1504, + "step": 5030 + }, + { + "epoch": 0.06, + "learning_rate": 3.858650374895862e-05, + "loss": 0.9382, + "step": 5032 + }, + { + "epoch": 0.06, + "learning_rate": 3.85818754049801e-05, + "loss": 1.3455, + "step": 5034 + }, + { + "epoch": 0.06, + "learning_rate": 3.8577247061001574e-05, + "loss": 1.9301, + "step": 5036 + }, + { + "epoch": 0.06, + "learning_rate": 3.857261871702305e-05, + "loss": 4.1787, + "step": 5038 + }, + { + "epoch": 0.06, + "learning_rate": 3.8567990373044525e-05, + "loss": 0.995, + "step": 5040 + }, + { + "epoch": 0.06, + "learning_rate": 3.8563362029066004e-05, + "loss": 0.3902, + "step": 5042 + }, + { + "epoch": 0.06, + "learning_rate": 3.8558733685087476e-05, + "loss": 2.2692, + "step": 5044 + }, + { + "epoch": 0.06, + "learning_rate": 3.8554105341108954e-05, + "loss": 1.8673, + "step": 5046 + }, + { + "epoch": 0.06, + "learning_rate": 3.8549476997130426e-05, + "loss": 4.03, + "step": 5048 + }, + { + "epoch": 0.06, + "learning_rate": 3.8544848653151905e-05, + "loss": 1.3055, + "step": 5050 + }, + { + "epoch": 0.06, + "learning_rate": 3.854022030917338e-05, + "loss": 4.1863, + "step": 5052 + }, + { + "epoch": 0.06, + "learning_rate": 3.8535591965194856e-05, + "loss": 0.5703, + "step": 5054 + }, + { + "epoch": 0.06, + "learning_rate": 3.8530963621216335e-05, + "loss": 4.6885, + "step": 5056 + }, + { + "epoch": 0.06, + "learning_rate": 3.852633527723781e-05, + "loss": 1.3242, + "step": 5058 + }, + { + "epoch": 0.06, + "learning_rate": 3.8521706933259286e-05, + "loss": 0.1505, + "step": 5060 + }, + { + "epoch": 0.06, + "learning_rate": 3.851707858928076e-05, + "loss": 2.5168, + "step": 5062 + }, + { + "epoch": 0.06, + "learning_rate": 3.8512450245302237e-05, + "loss": 0.5723, + "step": 5064 + }, + { + "epoch": 0.06, + "learning_rate": 3.850782190132371e-05, + "loss": 1.6519, + "step": 5066 + }, + { + "epoch": 0.06, + "learning_rate": 3.850319355734519e-05, + "loss": 4.9894, + "step": 5068 + }, + { + "epoch": 0.06, + "learning_rate": 3.849856521336666e-05, + "loss": 0.1856, + "step": 5070 + }, + { + "epoch": 0.06, + "learning_rate": 3.849393686938814e-05, + "loss": 1.777, + "step": 5072 + }, + { + "epoch": 0.06, + "learning_rate": 3.848930852540961e-05, + "loss": 2.2732, + "step": 5074 + }, + { + "epoch": 0.06, + "learning_rate": 3.848468018143109e-05, + "loss": 2.4011, + "step": 5076 + }, + { + "epoch": 0.06, + "learning_rate": 3.848005183745256e-05, + "loss": 3.0266, + "step": 5078 + }, + { + "epoch": 0.06, + "learning_rate": 3.847542349347404e-05, + "loss": 1.9835, + "step": 5080 + }, + { + "epoch": 0.06, + "learning_rate": 3.847079514949551e-05, + "loss": 5.839, + "step": 5082 + }, + { + "epoch": 0.06, + "learning_rate": 3.846616680551699e-05, + "loss": 1.9507, + "step": 5084 + }, + { + "epoch": 0.06, + "learning_rate": 3.846153846153846e-05, + "loss": 2.3814, + "step": 5086 + }, + { + "epoch": 0.06, + "learning_rate": 3.845691011755994e-05, + "loss": 0.5134, + "step": 5088 + }, + { + "epoch": 0.06, + "learning_rate": 3.8452281773581414e-05, + "loss": 2.2499, + "step": 5090 + }, + { + "epoch": 0.06, + "learning_rate": 3.844765342960289e-05, + "loss": 2.9633, + "step": 5092 + }, + { + "epoch": 0.06, + "learning_rate": 3.8443025085624365e-05, + "loss": 2.5803, + "step": 5094 + }, + { + "epoch": 0.06, + "learning_rate": 3.843839674164584e-05, + "loss": 2.356, + "step": 5096 + }, + { + "epoch": 0.06, + "learning_rate": 3.843376839766732e-05, + "loss": 0.0496, + "step": 5098 + }, + { + "epoch": 0.06, + "learning_rate": 3.8429140053688794e-05, + "loss": 1.3461, + "step": 5100 + }, + { + "epoch": 0.06, + "learning_rate": 3.842451170971027e-05, + "loss": 0.3546, + "step": 5102 + }, + { + "epoch": 0.06, + "learning_rate": 3.841988336573174e-05, + "loss": 1.6203, + "step": 5104 + }, + { + "epoch": 0.06, + "learning_rate": 3.841525502175322e-05, + "loss": 2.5106, + "step": 5106 + }, + { + "epoch": 0.06, + "learning_rate": 3.841062667777469e-05, + "loss": 0.0717, + "step": 5108 + }, + { + "epoch": 0.06, + "learning_rate": 3.840599833379617e-05, + "loss": 6.2198, + "step": 5110 + }, + { + "epoch": 0.06, + "learning_rate": 3.840136998981764e-05, + "loss": 2.9887, + "step": 5112 + }, + { + "epoch": 0.06, + "learning_rate": 3.839674164583912e-05, + "loss": 9.2002, + "step": 5114 + }, + { + "epoch": 0.06, + "learning_rate": 3.839211330186059e-05, + "loss": 0.7365, + "step": 5116 + }, + { + "epoch": 0.06, + "learning_rate": 3.838748495788207e-05, + "loss": 0.0608, + "step": 5118 + }, + { + "epoch": 0.06, + "learning_rate": 3.838285661390354e-05, + "loss": 2.0725, + "step": 5120 + }, + { + "epoch": 0.06, + "learning_rate": 3.837822826992502e-05, + "loss": 2.2139, + "step": 5122 + }, + { + "epoch": 0.06, + "learning_rate": 3.83735999259465e-05, + "loss": 1.8131, + "step": 5124 + }, + { + "epoch": 0.06, + "learning_rate": 3.836897158196797e-05, + "loss": 5.0428, + "step": 5126 + }, + { + "epoch": 0.06, + "learning_rate": 3.836434323798945e-05, + "loss": 2.3763, + "step": 5128 + }, + { + "epoch": 0.06, + "learning_rate": 3.835971489401092e-05, + "loss": 0.0037, + "step": 5130 + }, + { + "epoch": 0.06, + "learning_rate": 3.83550865500324e-05, + "loss": 0.0028, + "step": 5132 + }, + { + "epoch": 0.06, + "learning_rate": 3.835045820605387e-05, + "loss": 2.565, + "step": 5134 + }, + { + "epoch": 0.06, + "learning_rate": 3.834582986207535e-05, + "loss": 3.7344, + "step": 5136 + }, + { + "epoch": 0.06, + "learning_rate": 3.8341201518096824e-05, + "loss": 3.4581, + "step": 5138 + }, + { + "epoch": 0.06, + "learning_rate": 3.83365731741183e-05, + "loss": 2.9598, + "step": 5140 + }, + { + "epoch": 0.06, + "learning_rate": 3.8331944830139775e-05, + "loss": 3.2025, + "step": 5142 + }, + { + "epoch": 0.06, + "learning_rate": 3.8327316486161254e-05, + "loss": 4.3141, + "step": 5144 + }, + { + "epoch": 0.06, + "learning_rate": 3.8322688142182726e-05, + "loss": 5.3098, + "step": 5146 + }, + { + "epoch": 0.06, + "learning_rate": 3.8318059798204204e-05, + "loss": 5.6578, + "step": 5148 + }, + { + "epoch": 0.06, + "learning_rate": 3.8313431454225676e-05, + "loss": 2.0257, + "step": 5150 + }, + { + "epoch": 0.06, + "learning_rate": 3.8308803110247155e-05, + "loss": 1.9998, + "step": 5152 + }, + { + "epoch": 0.06, + "learning_rate": 3.830417476626863e-05, + "loss": 3.8605, + "step": 5154 + }, + { + "epoch": 0.06, + "learning_rate": 3.8299546422290106e-05, + "loss": 3.4464, + "step": 5156 + }, + { + "epoch": 0.06, + "learning_rate": 3.829491807831158e-05, + "loss": 2.5161, + "step": 5158 + }, + { + "epoch": 0.06, + "learning_rate": 3.829028973433306e-05, + "loss": 0.3848, + "step": 5160 + }, + { + "epoch": 0.06, + "learning_rate": 3.8285661390354536e-05, + "loss": 0.3286, + "step": 5162 + }, + { + "epoch": 0.06, + "learning_rate": 3.828103304637601e-05, + "loss": 3.7839, + "step": 5164 + }, + { + "epoch": 0.06, + "learning_rate": 3.827640470239749e-05, + "loss": 2.2429, + "step": 5166 + }, + { + "epoch": 0.06, + "learning_rate": 3.827177635841896e-05, + "loss": 3.4621, + "step": 5168 + }, + { + "epoch": 0.06, + "learning_rate": 3.826714801444044e-05, + "loss": 2.5731, + "step": 5170 + }, + { + "epoch": 0.06, + "learning_rate": 3.826251967046191e-05, + "loss": 3.2839, + "step": 5172 + }, + { + "epoch": 0.06, + "learning_rate": 3.825789132648339e-05, + "loss": 0.2645, + "step": 5174 + }, + { + "epoch": 0.06, + "learning_rate": 3.825326298250486e-05, + "loss": 4.986, + "step": 5176 + }, + { + "epoch": 0.06, + "learning_rate": 3.824863463852634e-05, + "loss": 2.7931, + "step": 5178 + }, + { + "epoch": 0.06, + "learning_rate": 3.824400629454781e-05, + "loss": 1.9307, + "step": 5180 + }, + { + "epoch": 0.06, + "learning_rate": 3.823937795056929e-05, + "loss": 7.6953, + "step": 5182 + }, + { + "epoch": 0.06, + "learning_rate": 3.823474960659076e-05, + "loss": 1.0336, + "step": 5184 + }, + { + "epoch": 0.06, + "learning_rate": 3.823012126261224e-05, + "loss": 2.6635, + "step": 5186 + }, + { + "epoch": 0.06, + "learning_rate": 3.822549291863371e-05, + "loss": 2.7224, + "step": 5188 + }, + { + "epoch": 0.06, + "learning_rate": 3.822086457465519e-05, + "loss": 4.6668, + "step": 5190 + }, + { + "epoch": 0.06, + "learning_rate": 3.8216236230676664e-05, + "loss": 2.2009, + "step": 5192 + }, + { + "epoch": 0.06, + "learning_rate": 3.821160788669814e-05, + "loss": 0.7188, + "step": 5194 + }, + { + "epoch": 0.06, + "learning_rate": 3.8206979542719615e-05, + "loss": 3.4383, + "step": 5196 + }, + { + "epoch": 0.06, + "learning_rate": 3.8202351198741093e-05, + "loss": 6.9004, + "step": 5198 + }, + { + "epoch": 0.06, + "learning_rate": 3.8197722854762565e-05, + "loss": 1.104, + "step": 5200 + }, + { + "epoch": 0.06, + "learning_rate": 3.8193094510784044e-05, + "loss": 3.5941, + "step": 5202 + }, + { + "epoch": 0.06, + "learning_rate": 3.818846616680552e-05, + "loss": 3.1965, + "step": 5204 + }, + { + "epoch": 0.06, + "learning_rate": 3.8183837822826995e-05, + "loss": 2.0026, + "step": 5206 + }, + { + "epoch": 0.06, + "learning_rate": 3.8179209478848474e-05, + "loss": 3.9871, + "step": 5208 + }, + { + "epoch": 0.06, + "learning_rate": 3.8174581134869946e-05, + "loss": 1.4225, + "step": 5210 + }, + { + "epoch": 0.06, + "learning_rate": 3.8169952790891425e-05, + "loss": 0.024, + "step": 5212 + }, + { + "epoch": 0.06, + "learning_rate": 3.81653244469129e-05, + "loss": 2.7044, + "step": 5214 + }, + { + "epoch": 0.06, + "learning_rate": 3.8160696102934376e-05, + "loss": 1.3941, + "step": 5216 + }, + { + "epoch": 0.06, + "learning_rate": 3.815606775895585e-05, + "loss": 0.271, + "step": 5218 + }, + { + "epoch": 0.06, + "learning_rate": 3.8151439414977326e-05, + "loss": 0.7559, + "step": 5220 + }, + { + "epoch": 0.06, + "learning_rate": 3.81468110709988e-05, + "loss": 0.9346, + "step": 5222 + }, + { + "epoch": 0.06, + "learning_rate": 3.814218272702028e-05, + "loss": 1.6737, + "step": 5224 + }, + { + "epoch": 0.06, + "learning_rate": 3.813755438304175e-05, + "loss": 0.0022, + "step": 5226 + }, + { + "epoch": 0.06, + "learning_rate": 3.813292603906323e-05, + "loss": 2.5481, + "step": 5228 + }, + { + "epoch": 0.06, + "learning_rate": 3.81282976950847e-05, + "loss": 0.5465, + "step": 5230 + }, + { + "epoch": 0.06, + "learning_rate": 3.812366935110618e-05, + "loss": 2.7756, + "step": 5232 + }, + { + "epoch": 0.06, + "learning_rate": 3.811904100712765e-05, + "loss": 5.7632, + "step": 5234 + }, + { + "epoch": 0.06, + "learning_rate": 3.811441266314913e-05, + "loss": 2.1915, + "step": 5236 + }, + { + "epoch": 0.06, + "learning_rate": 3.81097843191706e-05, + "loss": 3.282, + "step": 5238 + }, + { + "epoch": 0.06, + "learning_rate": 3.810515597519208e-05, + "loss": 4.1088, + "step": 5240 + }, + { + "epoch": 0.06, + "learning_rate": 3.810052763121356e-05, + "loss": 4.383, + "step": 5242 + }, + { + "epoch": 0.06, + "learning_rate": 3.809589928723503e-05, + "loss": 1.5307, + "step": 5244 + }, + { + "epoch": 0.06, + "learning_rate": 3.809127094325651e-05, + "loss": 1.9321, + "step": 5246 + }, + { + "epoch": 0.06, + "learning_rate": 3.8086642599277976e-05, + "loss": 0.3864, + "step": 5248 + }, + { + "epoch": 0.06, + "learning_rate": 3.8082014255299454e-05, + "loss": 3.9858, + "step": 5250 + }, + { + "epoch": 0.06, + "learning_rate": 3.8077385911320926e-05, + "loss": 3.2411, + "step": 5252 + }, + { + "epoch": 0.06, + "learning_rate": 3.8072757567342405e-05, + "loss": 4.6345, + "step": 5254 + }, + { + "epoch": 0.06, + "learning_rate": 3.806812922336388e-05, + "loss": 1.2572, + "step": 5256 + }, + { + "epoch": 0.06, + "learning_rate": 3.8063500879385356e-05, + "loss": 1.6974, + "step": 5258 + }, + { + "epoch": 0.06, + "learning_rate": 3.805887253540683e-05, + "loss": 4.5061, + "step": 5260 + }, + { + "epoch": 0.06, + "learning_rate": 3.805424419142831e-05, + "loss": 3.5761, + "step": 5262 + }, + { + "epoch": 0.06, + "learning_rate": 3.804961584744978e-05, + "loss": 0.6903, + "step": 5264 + }, + { + "epoch": 0.06, + "learning_rate": 3.804498750347126e-05, + "loss": 5.0607, + "step": 5266 + }, + { + "epoch": 0.06, + "learning_rate": 3.804035915949274e-05, + "loss": 0.3206, + "step": 5268 + }, + { + "epoch": 0.06, + "learning_rate": 3.803573081551421e-05, + "loss": 0.9143, + "step": 5270 + }, + { + "epoch": 0.06, + "learning_rate": 3.803110247153569e-05, + "loss": 0.1264, + "step": 5272 + }, + { + "epoch": 0.06, + "learning_rate": 3.802647412755716e-05, + "loss": 1.9484, + "step": 5274 + }, + { + "epoch": 0.06, + "learning_rate": 3.802184578357864e-05, + "loss": 1.5683, + "step": 5276 + }, + { + "epoch": 0.06, + "learning_rate": 3.801721743960011e-05, + "loss": 1.4723, + "step": 5278 + }, + { + "epoch": 0.06, + "learning_rate": 3.801258909562159e-05, + "loss": 1.4874, + "step": 5280 + }, + { + "epoch": 0.06, + "learning_rate": 3.800796075164306e-05, + "loss": 0.9079, + "step": 5282 + }, + { + "epoch": 0.06, + "learning_rate": 3.800333240766454e-05, + "loss": 0.6631, + "step": 5284 + }, + { + "epoch": 0.06, + "learning_rate": 3.799870406368601e-05, + "loss": 0.9062, + "step": 5286 + }, + { + "epoch": 0.06, + "learning_rate": 3.799407571970749e-05, + "loss": 0.6113, + "step": 5288 + }, + { + "epoch": 0.06, + "learning_rate": 3.798944737572896e-05, + "loss": 6.3689, + "step": 5290 + }, + { + "epoch": 0.06, + "learning_rate": 3.798481903175044e-05, + "loss": 3.1224, + "step": 5292 + }, + { + "epoch": 0.06, + "learning_rate": 3.7980190687771914e-05, + "loss": 1.6779, + "step": 5294 + }, + { + "epoch": 0.06, + "learning_rate": 3.797556234379339e-05, + "loss": 0.0004, + "step": 5296 + }, + { + "epoch": 0.06, + "learning_rate": 3.7970933999814865e-05, + "loss": 1.2546, + "step": 5298 + }, + { + "epoch": 0.06, + "learning_rate": 3.7966305655836343e-05, + "loss": 1.4407, + "step": 5300 + }, + { + "epoch": 0.06, + "learning_rate": 3.7961677311857815e-05, + "loss": 4.6335, + "step": 5302 + }, + { + "epoch": 0.06, + "learning_rate": 3.7957048967879294e-05, + "loss": 6.8399, + "step": 5304 + }, + { + "epoch": 0.06, + "learning_rate": 3.7952420623900766e-05, + "loss": 8.094, + "step": 5306 + }, + { + "epoch": 0.06, + "learning_rate": 3.7947792279922245e-05, + "loss": 2.2177, + "step": 5308 + }, + { + "epoch": 0.06, + "learning_rate": 3.7943163935943724e-05, + "loss": 5.5943, + "step": 5310 + }, + { + "epoch": 0.06, + "learning_rate": 3.7938535591965196e-05, + "loss": 0.0181, + "step": 5312 + }, + { + "epoch": 0.06, + "learning_rate": 3.7933907247986675e-05, + "loss": 1.6041, + "step": 5314 + }, + { + "epoch": 0.06, + "learning_rate": 3.792927890400815e-05, + "loss": 1.452, + "step": 5316 + }, + { + "epoch": 0.06, + "learning_rate": 3.7924650560029626e-05, + "loss": 5.6957, + "step": 5318 + }, + { + "epoch": 0.06, + "learning_rate": 3.79200222160511e-05, + "loss": 0.6356, + "step": 5320 + }, + { + "epoch": 0.06, + "learning_rate": 3.7915393872072577e-05, + "loss": 1.9811, + "step": 5322 + }, + { + "epoch": 0.06, + "learning_rate": 3.791076552809405e-05, + "loss": 2.7511, + "step": 5324 + }, + { + "epoch": 0.06, + "learning_rate": 3.790613718411553e-05, + "loss": 2.3161, + "step": 5326 + }, + { + "epoch": 0.06, + "learning_rate": 3.7901508840137e-05, + "loss": 3.8673, + "step": 5328 + }, + { + "epoch": 0.06, + "learning_rate": 3.789688049615848e-05, + "loss": 1.6003, + "step": 5330 + }, + { + "epoch": 0.06, + "learning_rate": 3.789225215217995e-05, + "loss": 0.9096, + "step": 5332 + }, + { + "epoch": 0.06, + "learning_rate": 3.788762380820143e-05, + "loss": 1.1544, + "step": 5334 + }, + { + "epoch": 0.06, + "learning_rate": 3.78829954642229e-05, + "loss": 0.0033, + "step": 5336 + }, + { + "epoch": 0.06, + "learning_rate": 3.787836712024438e-05, + "loss": 0.9571, + "step": 5338 + }, + { + "epoch": 0.06, + "learning_rate": 3.787373877626585e-05, + "loss": 0.001, + "step": 5340 + }, + { + "epoch": 0.06, + "learning_rate": 3.786911043228733e-05, + "loss": 1.9321, + "step": 5342 + }, + { + "epoch": 0.06, + "learning_rate": 3.78644820883088e-05, + "loss": 0.1667, + "step": 5344 + }, + { + "epoch": 0.06, + "learning_rate": 3.785985374433028e-05, + "loss": 6.1003, + "step": 5346 + }, + { + "epoch": 0.06, + "learning_rate": 3.7855225400351754e-05, + "loss": 1.4425, + "step": 5348 + }, + { + "epoch": 0.06, + "learning_rate": 3.785059705637323e-05, + "loss": 2.3592, + "step": 5350 + }, + { + "epoch": 0.06, + "learning_rate": 3.784596871239471e-05, + "loss": 1.515, + "step": 5352 + }, + { + "epoch": 0.06, + "learning_rate": 3.784134036841618e-05, + "loss": 0.0006, + "step": 5354 + }, + { + "epoch": 0.06, + "learning_rate": 3.783671202443766e-05, + "loss": 3.3751, + "step": 5356 + }, + { + "epoch": 0.06, + "learning_rate": 3.7832083680459134e-05, + "loss": 0.0743, + "step": 5358 + }, + { + "epoch": 0.06, + "learning_rate": 3.782745533648061e-05, + "loss": 5.1054, + "step": 5360 + }, + { + "epoch": 0.06, + "learning_rate": 3.7822826992502085e-05, + "loss": 7.6052, + "step": 5362 + }, + { + "epoch": 0.06, + "learning_rate": 3.7818198648523564e-05, + "loss": 1.0047, + "step": 5364 + }, + { + "epoch": 0.06, + "learning_rate": 3.7813570304545036e-05, + "loss": 6.4278, + "step": 5366 + }, + { + "epoch": 0.06, + "learning_rate": 3.7808941960566515e-05, + "loss": 4.1618, + "step": 5368 + }, + { + "epoch": 0.06, + "learning_rate": 3.780431361658799e-05, + "loss": 3.9177, + "step": 5370 + }, + { + "epoch": 0.06, + "learning_rate": 3.7799685272609466e-05, + "loss": 2.218, + "step": 5372 + }, + { + "epoch": 0.06, + "learning_rate": 3.779505692863094e-05, + "loss": 1.1417, + "step": 5374 + }, + { + "epoch": 0.06, + "learning_rate": 3.7790428584652416e-05, + "loss": 5.7648, + "step": 5376 + }, + { + "epoch": 0.06, + "learning_rate": 3.778580024067389e-05, + "loss": 6.1388, + "step": 5378 + }, + { + "epoch": 0.06, + "learning_rate": 3.778117189669537e-05, + "loss": 4.8558, + "step": 5380 + }, + { + "epoch": 0.06, + "learning_rate": 3.777654355271684e-05, + "loss": 0.4554, + "step": 5382 + }, + { + "epoch": 0.06, + "learning_rate": 3.777191520873832e-05, + "loss": 2.8716, + "step": 5384 + }, + { + "epoch": 0.06, + "learning_rate": 3.776728686475979e-05, + "loss": 2.2251, + "step": 5386 + }, + { + "epoch": 0.06, + "learning_rate": 3.776265852078127e-05, + "loss": 5.503, + "step": 5388 + }, + { + "epoch": 0.06, + "learning_rate": 3.775803017680275e-05, + "loss": 0.9463, + "step": 5390 + }, + { + "epoch": 0.06, + "learning_rate": 3.775340183282422e-05, + "loss": 1.5516, + "step": 5392 + }, + { + "epoch": 0.06, + "learning_rate": 3.774877348884569e-05, + "loss": 0.0038, + "step": 5394 + }, + { + "epoch": 0.06, + "learning_rate": 3.7744145144867164e-05, + "loss": 4.1381, + "step": 5396 + }, + { + "epoch": 0.06, + "learning_rate": 3.773951680088864e-05, + "loss": 0.4716, + "step": 5398 + }, + { + "epoch": 0.06, + "learning_rate": 3.7734888456910115e-05, + "loss": 4.9654, + "step": 5400 + }, + { + "epoch": 0.06, + "learning_rate": 3.7730260112931593e-05, + "loss": 2.5872, + "step": 5402 + }, + { + "epoch": 0.06, + "learning_rate": 3.7725631768953066e-05, + "loss": 3.546, + "step": 5404 + }, + { + "epoch": 0.06, + "learning_rate": 3.7721003424974544e-05, + "loss": 1.5726, + "step": 5406 + }, + { + "epoch": 0.06, + "learning_rate": 3.7716375080996016e-05, + "loss": 2.2651, + "step": 5408 + }, + { + "epoch": 0.06, + "learning_rate": 3.7711746737017495e-05, + "loss": 1.372, + "step": 5410 + }, + { + "epoch": 0.06, + "learning_rate": 3.770711839303897e-05, + "loss": 1.7954, + "step": 5412 + }, + { + "epoch": 0.06, + "learning_rate": 3.7702490049060446e-05, + "loss": 6.6975, + "step": 5414 + }, + { + "epoch": 0.06, + "learning_rate": 3.7697861705081925e-05, + "loss": 1.4121, + "step": 5416 + }, + { + "epoch": 0.06, + "learning_rate": 3.76932333611034e-05, + "loss": 1.1777, + "step": 5418 + }, + { + "epoch": 0.06, + "learning_rate": 3.7688605017124876e-05, + "loss": 1.8239, + "step": 5420 + }, + { + "epoch": 0.06, + "learning_rate": 3.768397667314635e-05, + "loss": 0.5091, + "step": 5422 + }, + { + "epoch": 0.06, + "learning_rate": 3.7679348329167827e-05, + "loss": 1.5648, + "step": 5424 + }, + { + "epoch": 0.06, + "learning_rate": 3.76747199851893e-05, + "loss": 1.0792, + "step": 5426 + }, + { + "epoch": 0.06, + "learning_rate": 3.767009164121078e-05, + "loss": 2.9863, + "step": 5428 + }, + { + "epoch": 0.06, + "learning_rate": 3.766546329723225e-05, + "loss": 0.2073, + "step": 5430 + }, + { + "epoch": 0.06, + "learning_rate": 3.766083495325373e-05, + "loss": 1.5689, + "step": 5432 + }, + { + "epoch": 0.06, + "learning_rate": 3.76562066092752e-05, + "loss": 1.5686, + "step": 5434 + }, + { + "epoch": 0.06, + "learning_rate": 3.765157826529668e-05, + "loss": 1.1832, + "step": 5436 + }, + { + "epoch": 0.06, + "learning_rate": 3.764694992131815e-05, + "loss": 1.4252, + "step": 5438 + }, + { + "epoch": 0.06, + "learning_rate": 3.764232157733963e-05, + "loss": 2.125, + "step": 5440 + }, + { + "epoch": 0.06, + "learning_rate": 3.76376932333611e-05, + "loss": 3.8125, + "step": 5442 + }, + { + "epoch": 0.06, + "learning_rate": 3.763306488938258e-05, + "loss": 0.0066, + "step": 5444 + }, + { + "epoch": 0.06, + "learning_rate": 3.762843654540405e-05, + "loss": 0.8037, + "step": 5446 + }, + { + "epoch": 0.06, + "learning_rate": 3.762380820142553e-05, + "loss": 0.864, + "step": 5448 + }, + { + "epoch": 0.06, + "learning_rate": 3.7619179857447004e-05, + "loss": 0.4955, + "step": 5450 + }, + { + "epoch": 0.06, + "learning_rate": 3.761455151346848e-05, + "loss": 2.5811, + "step": 5452 + }, + { + "epoch": 0.06, + "learning_rate": 3.7609923169489955e-05, + "loss": 2.1034, + "step": 5454 + }, + { + "epoch": 0.06, + "learning_rate": 3.760529482551143e-05, + "loss": 1.5487, + "step": 5456 + }, + { + "epoch": 0.06, + "learning_rate": 3.760066648153291e-05, + "loss": 4.8437, + "step": 5458 + }, + { + "epoch": 0.06, + "learning_rate": 3.7596038137554384e-05, + "loss": 2.5366, + "step": 5460 + }, + { + "epoch": 0.06, + "learning_rate": 3.759140979357586e-05, + "loss": 3.8967, + "step": 5462 + }, + { + "epoch": 0.06, + "learning_rate": 3.7586781449597335e-05, + "loss": 0.4815, + "step": 5464 + }, + { + "epoch": 0.06, + "learning_rate": 3.7582153105618814e-05, + "loss": 1.585, + "step": 5466 + }, + { + "epoch": 0.06, + "learning_rate": 3.7577524761640286e-05, + "loss": 1.2285, + "step": 5468 + }, + { + "epoch": 0.06, + "learning_rate": 3.7572896417661765e-05, + "loss": 4.3866, + "step": 5470 + }, + { + "epoch": 0.06, + "learning_rate": 3.756826807368324e-05, + "loss": 0.0037, + "step": 5472 + }, + { + "epoch": 0.06, + "learning_rate": 3.7563639729704716e-05, + "loss": 4.1452, + "step": 5474 + }, + { + "epoch": 0.06, + "learning_rate": 3.755901138572619e-05, + "loss": 0.1319, + "step": 5476 + }, + { + "epoch": 0.06, + "learning_rate": 3.7554383041747666e-05, + "loss": 3.4078, + "step": 5478 + }, + { + "epoch": 0.06, + "learning_rate": 3.754975469776914e-05, + "loss": 3.2085, + "step": 5480 + }, + { + "epoch": 0.06, + "learning_rate": 3.754512635379062e-05, + "loss": 9.0038, + "step": 5482 + }, + { + "epoch": 0.06, + "learning_rate": 3.754049800981209e-05, + "loss": 4.3999, + "step": 5484 + }, + { + "epoch": 0.06, + "learning_rate": 3.753586966583357e-05, + "loss": 5.9237, + "step": 5486 + }, + { + "epoch": 0.06, + "learning_rate": 3.753124132185504e-05, + "loss": 3.5989, + "step": 5488 + }, + { + "epoch": 0.06, + "learning_rate": 3.752661297787652e-05, + "loss": 0.7047, + "step": 5490 + }, + { + "epoch": 0.06, + "learning_rate": 3.752198463389799e-05, + "loss": 2.0872, + "step": 5492 + }, + { + "epoch": 0.06, + "learning_rate": 3.751735628991947e-05, + "loss": 3.2558, + "step": 5494 + }, + { + "epoch": 0.06, + "learning_rate": 3.751272794594095e-05, + "loss": 2.7376, + "step": 5496 + }, + { + "epoch": 0.06, + "learning_rate": 3.750809960196242e-05, + "loss": 3.8667, + "step": 5498 + }, + { + "epoch": 0.06, + "learning_rate": 3.75034712579839e-05, + "loss": 2.8913, + "step": 5500 + }, + { + "epoch": 0.06, + "learning_rate": 3.749884291400537e-05, + "loss": 1.4729, + "step": 5502 + }, + { + "epoch": 0.06, + "learning_rate": 3.749421457002685e-05, + "loss": 2.8378, + "step": 5504 + }, + { + "epoch": 0.06, + "learning_rate": 3.748958622604832e-05, + "loss": 3.0792, + "step": 5506 + }, + { + "epoch": 0.06, + "learning_rate": 3.74849578820698e-05, + "loss": 1.2205, + "step": 5508 + }, + { + "epoch": 0.06, + "learning_rate": 3.748032953809127e-05, + "loss": 2.6787, + "step": 5510 + }, + { + "epoch": 0.06, + "learning_rate": 3.747570119411275e-05, + "loss": 2.3314, + "step": 5512 + }, + { + "epoch": 0.06, + "learning_rate": 3.7471072850134224e-05, + "loss": 1.2626, + "step": 5514 + }, + { + "epoch": 0.06, + "learning_rate": 3.74664445061557e-05, + "loss": 2.5572, + "step": 5516 + }, + { + "epoch": 0.06, + "learning_rate": 3.7461816162177175e-05, + "loss": 2.3761, + "step": 5518 + }, + { + "epoch": 0.06, + "learning_rate": 3.7457187818198654e-05, + "loss": 3.2702, + "step": 5520 + }, + { + "epoch": 0.06, + "learning_rate": 3.7452559474220126e-05, + "loss": 1.7202, + "step": 5522 + }, + { + "epoch": 0.06, + "learning_rate": 3.7447931130241605e-05, + "loss": 0.1645, + "step": 5524 + }, + { + "epoch": 0.06, + "learning_rate": 3.7443302786263077e-05, + "loss": 1.6948, + "step": 5526 + }, + { + "epoch": 0.06, + "learning_rate": 3.7438674442284555e-05, + "loss": 0.9105, + "step": 5528 + }, + { + "epoch": 0.06, + "learning_rate": 3.743404609830603e-05, + "loss": 1.1298, + "step": 5530 + }, + { + "epoch": 0.06, + "learning_rate": 3.7429417754327506e-05, + "loss": 0.0021, + "step": 5532 + }, + { + "epoch": 0.06, + "learning_rate": 3.742478941034898e-05, + "loss": 0.3627, + "step": 5534 + }, + { + "epoch": 0.06, + "learning_rate": 3.742016106637046e-05, + "loss": 2.2107, + "step": 5536 + }, + { + "epoch": 0.06, + "learning_rate": 3.741553272239193e-05, + "loss": 1.3065, + "step": 5538 + }, + { + "epoch": 0.06, + "learning_rate": 3.74109043784134e-05, + "loss": 1.5029, + "step": 5540 + }, + { + "epoch": 0.06, + "learning_rate": 3.740627603443488e-05, + "loss": 0.876, + "step": 5542 + }, + { + "epoch": 0.06, + "learning_rate": 3.740164769045635e-05, + "loss": 0.4546, + "step": 5544 + }, + { + "epoch": 0.06, + "learning_rate": 3.739701934647783e-05, + "loss": 6.8861, + "step": 5546 + }, + { + "epoch": 0.06, + "learning_rate": 3.73923910024993e-05, + "loss": 1.2284, + "step": 5548 + }, + { + "epoch": 0.06, + "learning_rate": 3.738776265852078e-05, + "loss": 0.3435, + "step": 5550 + }, + { + "epoch": 0.06, + "learning_rate": 3.7383134314542254e-05, + "loss": 1.4988, + "step": 5552 + }, + { + "epoch": 0.06, + "learning_rate": 3.737850597056373e-05, + "loss": 0.7403, + "step": 5554 + }, + { + "epoch": 0.06, + "learning_rate": 3.7373877626585205e-05, + "loss": 2.4201, + "step": 5556 + }, + { + "epoch": 0.06, + "learning_rate": 3.7369249282606683e-05, + "loss": 6.4367, + "step": 5558 + }, + { + "epoch": 0.06, + "learning_rate": 3.7364620938628155e-05, + "loss": 0.3026, + "step": 5560 + }, + { + "epoch": 0.06, + "learning_rate": 3.7359992594649634e-05, + "loss": 2.3819, + "step": 5562 + }, + { + "epoch": 0.06, + "learning_rate": 3.735536425067111e-05, + "loss": 2.8059, + "step": 5564 + }, + { + "epoch": 0.06, + "learning_rate": 3.7350735906692585e-05, + "loss": 1.4897, + "step": 5566 + }, + { + "epoch": 0.06, + "learning_rate": 3.7346107562714064e-05, + "loss": 0.2903, + "step": 5568 + }, + { + "epoch": 0.06, + "learning_rate": 3.7341479218735536e-05, + "loss": 4.1454, + "step": 5570 + }, + { + "epoch": 0.06, + "learning_rate": 3.7336850874757015e-05, + "loss": 6.9496, + "step": 5572 + }, + { + "epoch": 0.06, + "learning_rate": 3.733222253077849e-05, + "loss": 4.2972, + "step": 5574 + }, + { + "epoch": 0.06, + "learning_rate": 3.7327594186799966e-05, + "loss": 0.038, + "step": 5576 + }, + { + "epoch": 0.06, + "learning_rate": 3.732296584282144e-05, + "loss": 1.9108, + "step": 5578 + }, + { + "epoch": 0.06, + "learning_rate": 3.7318337498842916e-05, + "loss": 0.0217, + "step": 5580 + }, + { + "epoch": 0.06, + "learning_rate": 3.731370915486439e-05, + "loss": 1.3793, + "step": 5582 + }, + { + "epoch": 0.06, + "learning_rate": 3.730908081088587e-05, + "loss": 0.0888, + "step": 5584 + }, + { + "epoch": 0.06, + "learning_rate": 3.730445246690734e-05, + "loss": 1.2839, + "step": 5586 + }, + { + "epoch": 0.06, + "learning_rate": 3.729982412292882e-05, + "loss": 0.9168, + "step": 5588 + }, + { + "epoch": 0.06, + "learning_rate": 3.729519577895029e-05, + "loss": 1.2055, + "step": 5590 + }, + { + "epoch": 0.06, + "learning_rate": 3.729056743497177e-05, + "loss": 3.9692, + "step": 5592 + }, + { + "epoch": 0.06, + "learning_rate": 3.728593909099324e-05, + "loss": 0.2553, + "step": 5594 + }, + { + "epoch": 0.06, + "learning_rate": 3.728131074701472e-05, + "loss": 5.4677, + "step": 5596 + }, + { + "epoch": 0.06, + "learning_rate": 3.727668240303619e-05, + "loss": 0.0024, + "step": 5598 + }, + { + "epoch": 0.06, + "learning_rate": 3.727205405905767e-05, + "loss": 1.3315, + "step": 5600 + }, + { + "epoch": 0.06, + "learning_rate": 3.726742571507915e-05, + "loss": 0.1544, + "step": 5602 + }, + { + "epoch": 0.06, + "learning_rate": 3.726279737110062e-05, + "loss": 0.9973, + "step": 5604 + }, + { + "epoch": 0.06, + "learning_rate": 3.72581690271221e-05, + "loss": 3.0374, + "step": 5606 + }, + { + "epoch": 0.06, + "learning_rate": 3.725354068314357e-05, + "loss": 2.9196, + "step": 5608 + }, + { + "epoch": 0.06, + "learning_rate": 3.724891233916505e-05, + "loss": 1.7169, + "step": 5610 + }, + { + "epoch": 0.06, + "learning_rate": 3.724428399518652e-05, + "loss": 0.0318, + "step": 5612 + }, + { + "epoch": 0.06, + "learning_rate": 3.7239655651208e-05, + "loss": 3.1469, + "step": 5614 + }, + { + "epoch": 0.06, + "learning_rate": 3.7235027307229474e-05, + "loss": 6.1062, + "step": 5616 + }, + { + "epoch": 0.06, + "learning_rate": 3.723039896325095e-05, + "loss": 0.7046, + "step": 5618 + }, + { + "epoch": 0.06, + "learning_rate": 3.7225770619272425e-05, + "loss": 2.917, + "step": 5620 + }, + { + "epoch": 0.06, + "learning_rate": 3.7221142275293904e-05, + "loss": 6.3644, + "step": 5622 + }, + { + "epoch": 0.06, + "learning_rate": 3.7216513931315376e-05, + "loss": 3.5582, + "step": 5624 + }, + { + "epoch": 0.06, + "learning_rate": 3.7211885587336855e-05, + "loss": 0.09, + "step": 5626 + }, + { + "epoch": 0.06, + "learning_rate": 3.720725724335833e-05, + "loss": 0.0009, + "step": 5628 + }, + { + "epoch": 0.06, + "learning_rate": 3.7202628899379805e-05, + "loss": 2.3071, + "step": 5630 + }, + { + "epoch": 0.06, + "learning_rate": 3.719800055540128e-05, + "loss": 2.16, + "step": 5632 + }, + { + "epoch": 0.06, + "learning_rate": 3.7193372211422756e-05, + "loss": 6.2689, + "step": 5634 + }, + { + "epoch": 0.06, + "learning_rate": 3.718874386744423e-05, + "loss": 0.3007, + "step": 5636 + }, + { + "epoch": 0.06, + "learning_rate": 3.718411552346571e-05, + "loss": 4.3451, + "step": 5638 + }, + { + "epoch": 0.06, + "learning_rate": 3.717948717948718e-05, + "loss": 3.9313, + "step": 5640 + }, + { + "epoch": 0.06, + "learning_rate": 3.717485883550866e-05, + "loss": 0.0019, + "step": 5642 + }, + { + "epoch": 0.07, + "learning_rate": 3.717023049153014e-05, + "loss": 0.2941, + "step": 5644 + }, + { + "epoch": 0.07, + "learning_rate": 3.716560214755161e-05, + "loss": 1.3549, + "step": 5646 + }, + { + "epoch": 0.07, + "learning_rate": 3.716097380357309e-05, + "loss": 0.8108, + "step": 5648 + }, + { + "epoch": 0.07, + "learning_rate": 3.715634545959456e-05, + "loss": 4.2084, + "step": 5650 + }, + { + "epoch": 0.07, + "learning_rate": 3.715171711561604e-05, + "loss": 3.3019, + "step": 5652 + }, + { + "epoch": 0.07, + "learning_rate": 3.714708877163751e-05, + "loss": 3.6804, + "step": 5654 + }, + { + "epoch": 0.07, + "learning_rate": 3.714246042765899e-05, + "loss": 0.8392, + "step": 5656 + }, + { + "epoch": 0.07, + "learning_rate": 3.713783208368046e-05, + "loss": 0.2575, + "step": 5658 + }, + { + "epoch": 0.07, + "learning_rate": 3.713320373970194e-05, + "loss": 0.5797, + "step": 5660 + }, + { + "epoch": 0.07, + "learning_rate": 3.712857539572341e-05, + "loss": 0.0052, + "step": 5662 + }, + { + "epoch": 0.07, + "learning_rate": 3.712394705174489e-05, + "loss": 1.2945, + "step": 5664 + }, + { + "epoch": 0.07, + "learning_rate": 3.711931870776636e-05, + "loss": 3.1983, + "step": 5666 + }, + { + "epoch": 0.07, + "learning_rate": 3.711469036378784e-05, + "loss": 0.0316, + "step": 5668 + }, + { + "epoch": 0.07, + "learning_rate": 3.7110062019809314e-05, + "loss": 0.0004, + "step": 5670 + }, + { + "epoch": 0.07, + "learning_rate": 3.710543367583079e-05, + "loss": 2.8497, + "step": 5672 + }, + { + "epoch": 0.07, + "learning_rate": 3.7100805331852265e-05, + "loss": 3.1057, + "step": 5674 + }, + { + "epoch": 0.07, + "learning_rate": 3.7096176987873744e-05, + "loss": 0.7433, + "step": 5676 + }, + { + "epoch": 0.07, + "learning_rate": 3.7091548643895216e-05, + "loss": 0.9245, + "step": 5678 + }, + { + "epoch": 0.07, + "learning_rate": 3.7086920299916694e-05, + "loss": 3.9832, + "step": 5680 + }, + { + "epoch": 0.07, + "learning_rate": 3.7082291955938166e-05, + "loss": 0.5022, + "step": 5682 + }, + { + "epoch": 0.07, + "learning_rate": 3.707766361195964e-05, + "loss": 5.4921, + "step": 5684 + }, + { + "epoch": 0.07, + "learning_rate": 3.707303526798112e-05, + "loss": 2.8462, + "step": 5686 + }, + { + "epoch": 0.07, + "learning_rate": 3.706840692400259e-05, + "loss": 3.0545, + "step": 5688 + }, + { + "epoch": 0.07, + "learning_rate": 3.706377858002407e-05, + "loss": 0.0185, + "step": 5690 + }, + { + "epoch": 0.07, + "learning_rate": 3.705915023604554e-05, + "loss": 0.2458, + "step": 5692 + }, + { + "epoch": 0.07, + "learning_rate": 3.705452189206702e-05, + "loss": 1.6811, + "step": 5694 + }, + { + "epoch": 0.07, + "learning_rate": 3.704989354808849e-05, + "loss": 3.7359, + "step": 5696 + }, + { + "epoch": 0.07, + "learning_rate": 3.704526520410997e-05, + "loss": 3.3413, + "step": 5698 + }, + { + "epoch": 0.07, + "learning_rate": 3.704063686013144e-05, + "loss": 0.0004, + "step": 5700 + }, + { + "epoch": 0.07, + "learning_rate": 3.703600851615292e-05, + "loss": 5.206, + "step": 5702 + }, + { + "epoch": 0.07, + "learning_rate": 3.703138017217439e-05, + "loss": 1.5071, + "step": 5704 + }, + { + "epoch": 0.07, + "learning_rate": 3.702675182819587e-05, + "loss": 2.818, + "step": 5706 + }, + { + "epoch": 0.07, + "learning_rate": 3.7022123484217344e-05, + "loss": 0.5471, + "step": 5708 + }, + { + "epoch": 0.07, + "learning_rate": 3.701749514023882e-05, + "loss": 5.8081, + "step": 5710 + }, + { + "epoch": 0.07, + "learning_rate": 3.70128667962603e-05, + "loss": 5.2874, + "step": 5712 + }, + { + "epoch": 0.07, + "learning_rate": 3.700823845228177e-05, + "loss": 0.4417, + "step": 5714 + }, + { + "epoch": 0.07, + "learning_rate": 3.700361010830325e-05, + "loss": 6.7978, + "step": 5716 + }, + { + "epoch": 0.07, + "learning_rate": 3.6998981764324724e-05, + "loss": 4.1572, + "step": 5718 + }, + { + "epoch": 0.07, + "learning_rate": 3.69943534203462e-05, + "loss": 5.5485, + "step": 5720 + }, + { + "epoch": 0.07, + "learning_rate": 3.6989725076367675e-05, + "loss": 3.4657, + "step": 5722 + }, + { + "epoch": 0.07, + "learning_rate": 3.6985096732389154e-05, + "loss": 1.7109, + "step": 5724 + }, + { + "epoch": 0.07, + "learning_rate": 3.6980468388410626e-05, + "loss": 7.6438, + "step": 5726 + }, + { + "epoch": 0.07, + "learning_rate": 3.6975840044432105e-05, + "loss": 3.7325, + "step": 5728 + }, + { + "epoch": 0.07, + "learning_rate": 3.697121170045358e-05, + "loss": 1.2219, + "step": 5730 + }, + { + "epoch": 0.07, + "learning_rate": 3.6966583356475055e-05, + "loss": 1.9663, + "step": 5732 + }, + { + "epoch": 0.07, + "learning_rate": 3.696195501249653e-05, + "loss": 3.284, + "step": 5734 + }, + { + "epoch": 0.07, + "learning_rate": 3.6957326668518006e-05, + "loss": 3.7475, + "step": 5736 + }, + { + "epoch": 0.07, + "learning_rate": 3.695269832453948e-05, + "loss": 0.9925, + "step": 5738 + }, + { + "epoch": 0.07, + "learning_rate": 3.694806998056096e-05, + "loss": 2.4751, + "step": 5740 + }, + { + "epoch": 0.07, + "learning_rate": 3.694344163658243e-05, + "loss": 4.5168, + "step": 5742 + }, + { + "epoch": 0.07, + "learning_rate": 3.693881329260391e-05, + "loss": 0.0838, + "step": 5744 + }, + { + "epoch": 0.07, + "learning_rate": 3.693418494862538e-05, + "loss": 1.7816, + "step": 5746 + }, + { + "epoch": 0.07, + "learning_rate": 3.692955660464686e-05, + "loss": 2.1024, + "step": 5748 + }, + { + "epoch": 0.07, + "learning_rate": 3.692492826066834e-05, + "loss": 1.6748, + "step": 5750 + }, + { + "epoch": 0.07, + "learning_rate": 3.692029991668981e-05, + "loss": 4.8753, + "step": 5752 + }, + { + "epoch": 0.07, + "learning_rate": 3.691567157271129e-05, + "loss": 2.4181, + "step": 5754 + }, + { + "epoch": 0.07, + "learning_rate": 3.691104322873276e-05, + "loss": 1.5341, + "step": 5756 + }, + { + "epoch": 0.07, + "learning_rate": 3.690641488475424e-05, + "loss": 1.2361, + "step": 5758 + }, + { + "epoch": 0.07, + "learning_rate": 3.690178654077571e-05, + "loss": 2.9296, + "step": 5760 + }, + { + "epoch": 0.07, + "learning_rate": 3.689715819679719e-05, + "loss": 2.458, + "step": 5762 + }, + { + "epoch": 0.07, + "learning_rate": 3.689252985281866e-05, + "loss": 0.6524, + "step": 5764 + }, + { + "epoch": 0.07, + "learning_rate": 3.688790150884014e-05, + "loss": 1.2668, + "step": 5766 + }, + { + "epoch": 0.07, + "learning_rate": 3.688327316486161e-05, + "loss": 3.8284, + "step": 5768 + }, + { + "epoch": 0.07, + "learning_rate": 3.687864482088309e-05, + "loss": 1.1153, + "step": 5770 + }, + { + "epoch": 0.07, + "learning_rate": 3.6874016476904564e-05, + "loss": 3.1253, + "step": 5772 + }, + { + "epoch": 0.07, + "learning_rate": 3.686938813292604e-05, + "loss": 4.7454, + "step": 5774 + }, + { + "epoch": 0.07, + "learning_rate": 3.6864759788947515e-05, + "loss": 1.214, + "step": 5776 + }, + { + "epoch": 0.07, + "learning_rate": 3.6860131444968994e-05, + "loss": 4.9028, + "step": 5778 + }, + { + "epoch": 0.07, + "learning_rate": 3.6855503100990466e-05, + "loss": 1.0108, + "step": 5780 + }, + { + "epoch": 0.07, + "learning_rate": 3.6850874757011944e-05, + "loss": 2.1585, + "step": 5782 + }, + { + "epoch": 0.07, + "learning_rate": 3.6846246413033417e-05, + "loss": 1.7023, + "step": 5784 + }, + { + "epoch": 0.07, + "learning_rate": 3.6841618069054895e-05, + "loss": 2.6818, + "step": 5786 + }, + { + "epoch": 0.07, + "learning_rate": 3.683698972507637e-05, + "loss": 3.198, + "step": 5788 + }, + { + "epoch": 0.07, + "learning_rate": 3.6832361381097846e-05, + "loss": 4.4978, + "step": 5790 + }, + { + "epoch": 0.07, + "learning_rate": 3.6827733037119325e-05, + "loss": 0.36, + "step": 5792 + }, + { + "epoch": 0.07, + "learning_rate": 3.68231046931408e-05, + "loss": 4.1888, + "step": 5794 + }, + { + "epoch": 0.07, + "learning_rate": 3.6818476349162276e-05, + "loss": 3.7276, + "step": 5796 + }, + { + "epoch": 0.07, + "learning_rate": 3.681384800518375e-05, + "loss": 2.5698, + "step": 5798 + }, + { + "epoch": 0.07, + "learning_rate": 3.680921966120523e-05, + "loss": 0.8808, + "step": 5800 + }, + { + "epoch": 0.07, + "learning_rate": 3.68045913172267e-05, + "loss": 1.1776, + "step": 5802 + }, + { + "epoch": 0.07, + "learning_rate": 3.679996297324818e-05, + "loss": 1.738, + "step": 5804 + }, + { + "epoch": 0.07, + "learning_rate": 3.679533462926965e-05, + "loss": 2.1031, + "step": 5806 + }, + { + "epoch": 0.07, + "learning_rate": 3.679070628529113e-05, + "loss": 1.9786, + "step": 5808 + }, + { + "epoch": 0.07, + "learning_rate": 3.67860779413126e-05, + "loss": 1.9495, + "step": 5810 + }, + { + "epoch": 0.07, + "learning_rate": 3.678144959733408e-05, + "loss": 1.5391, + "step": 5812 + }, + { + "epoch": 0.07, + "learning_rate": 3.677682125335555e-05, + "loss": 0.4247, + "step": 5814 + }, + { + "epoch": 0.07, + "learning_rate": 3.677219290937703e-05, + "loss": 2.594, + "step": 5816 + }, + { + "epoch": 0.07, + "learning_rate": 3.67675645653985e-05, + "loss": 4.9515, + "step": 5818 + }, + { + "epoch": 0.07, + "learning_rate": 3.676293622141998e-05, + "loss": 0.02, + "step": 5820 + }, + { + "epoch": 0.07, + "learning_rate": 3.675830787744145e-05, + "loss": 1.0862, + "step": 5822 + }, + { + "epoch": 0.07, + "learning_rate": 3.675367953346293e-05, + "loss": 2.0752, + "step": 5824 + }, + { + "epoch": 0.07, + "learning_rate": 3.6749051189484404e-05, + "loss": 0.3555, + "step": 5826 + }, + { + "epoch": 0.07, + "learning_rate": 3.6744422845505876e-05, + "loss": 2.5844, + "step": 5828 + }, + { + "epoch": 0.07, + "learning_rate": 3.6739794501527355e-05, + "loss": 0.9155, + "step": 5830 + }, + { + "epoch": 0.07, + "learning_rate": 3.673516615754883e-05, + "loss": 0.4181, + "step": 5832 + }, + { + "epoch": 0.07, + "learning_rate": 3.6730537813570306e-05, + "loss": 4.7296, + "step": 5834 + }, + { + "epoch": 0.07, + "learning_rate": 3.672590946959178e-05, + "loss": 1.6416, + "step": 5836 + }, + { + "epoch": 0.07, + "learning_rate": 3.6721281125613256e-05, + "loss": 5.7184, + "step": 5838 + }, + { + "epoch": 0.07, + "learning_rate": 3.671665278163473e-05, + "loss": 2.388, + "step": 5840 + }, + { + "epoch": 0.07, + "learning_rate": 3.671202443765621e-05, + "loss": 5.1771, + "step": 5842 + }, + { + "epoch": 0.07, + "learning_rate": 3.670739609367768e-05, + "loss": 1.1083, + "step": 5844 + }, + { + "epoch": 0.07, + "learning_rate": 3.670276774969916e-05, + "loss": 1.0888, + "step": 5846 + }, + { + "epoch": 0.07, + "learning_rate": 3.669813940572063e-05, + "loss": 5.6697, + "step": 5848 + }, + { + "epoch": 0.07, + "learning_rate": 3.669351106174211e-05, + "loss": 1.8407, + "step": 5850 + }, + { + "epoch": 0.07, + "learning_rate": 3.668888271776358e-05, + "loss": 2.4514, + "step": 5852 + }, + { + "epoch": 0.07, + "learning_rate": 3.668425437378506e-05, + "loss": 0.9198, + "step": 5854 + }, + { + "epoch": 0.07, + "learning_rate": 3.667962602980654e-05, + "loss": 0.4436, + "step": 5856 + }, + { + "epoch": 0.07, + "learning_rate": 3.667499768582801e-05, + "loss": 1.8852, + "step": 5858 + }, + { + "epoch": 0.07, + "learning_rate": 3.667036934184949e-05, + "loss": 0.6295, + "step": 5860 + }, + { + "epoch": 0.07, + "learning_rate": 3.666574099787096e-05, + "loss": 6.2802, + "step": 5862 + }, + { + "epoch": 0.07, + "learning_rate": 3.666111265389244e-05, + "loss": 1.4778, + "step": 5864 + }, + { + "epoch": 0.07, + "learning_rate": 3.665648430991391e-05, + "loss": 2.3817, + "step": 5866 + }, + { + "epoch": 0.07, + "learning_rate": 3.665185596593539e-05, + "loss": 3.8138, + "step": 5868 + }, + { + "epoch": 0.07, + "learning_rate": 3.664722762195686e-05, + "loss": 0.6826, + "step": 5870 + }, + { + "epoch": 0.07, + "learning_rate": 3.664259927797834e-05, + "loss": 1.7768, + "step": 5872 + }, + { + "epoch": 0.07, + "learning_rate": 3.6637970933999814e-05, + "loss": 0.8487, + "step": 5874 + }, + { + "epoch": 0.07, + "learning_rate": 3.663334259002129e-05, + "loss": 5.1746, + "step": 5876 + }, + { + "epoch": 0.07, + "learning_rate": 3.6628714246042765e-05, + "loss": 0.0661, + "step": 5878 + }, + { + "epoch": 0.07, + "learning_rate": 3.6624085902064244e-05, + "loss": 0.0008, + "step": 5880 + }, + { + "epoch": 0.07, + "learning_rate": 3.6619457558085716e-05, + "loss": 4.3587, + "step": 5882 + }, + { + "epoch": 0.07, + "learning_rate": 3.6614829214107195e-05, + "loss": 0.0065, + "step": 5884 + }, + { + "epoch": 0.07, + "learning_rate": 3.6610200870128667e-05, + "loss": 0.0649, + "step": 5886 + }, + { + "epoch": 0.07, + "learning_rate": 3.6605572526150145e-05, + "loss": 2.4656, + "step": 5888 + }, + { + "epoch": 0.07, + "learning_rate": 3.660094418217162e-05, + "loss": 3.3962, + "step": 5890 + }, + { + "epoch": 0.07, + "learning_rate": 3.6596315838193096e-05, + "loss": 7.4604, + "step": 5892 + }, + { + "epoch": 0.07, + "learning_rate": 3.659168749421457e-05, + "loss": 0.0284, + "step": 5894 + }, + { + "epoch": 0.07, + "learning_rate": 3.658705915023605e-05, + "loss": 5.5076, + "step": 5896 + }, + { + "epoch": 0.07, + "learning_rate": 3.6582430806257526e-05, + "loss": 1.4567, + "step": 5898 + }, + { + "epoch": 0.07, + "learning_rate": 3.6577802462279e-05, + "loss": 1.5263, + "step": 5900 + }, + { + "epoch": 0.07, + "learning_rate": 3.657317411830048e-05, + "loss": 2.5261, + "step": 5902 + }, + { + "epoch": 0.07, + "learning_rate": 3.656854577432195e-05, + "loss": 0.7518, + "step": 5904 + }, + { + "epoch": 0.07, + "learning_rate": 3.656391743034343e-05, + "loss": 2.2555, + "step": 5906 + }, + { + "epoch": 0.07, + "learning_rate": 3.65592890863649e-05, + "loss": 3.4021, + "step": 5908 + }, + { + "epoch": 0.07, + "learning_rate": 3.655466074238638e-05, + "loss": 0.7644, + "step": 5910 + }, + { + "epoch": 0.07, + "learning_rate": 3.655003239840785e-05, + "loss": 0.7425, + "step": 5912 + }, + { + "epoch": 0.07, + "learning_rate": 3.654540405442933e-05, + "loss": 2.92, + "step": 5914 + }, + { + "epoch": 0.07, + "learning_rate": 3.65407757104508e-05, + "loss": 3.7615, + "step": 5916 + }, + { + "epoch": 0.07, + "learning_rate": 3.653614736647228e-05, + "loss": 2.7561, + "step": 5918 + }, + { + "epoch": 0.07, + "learning_rate": 3.653151902249375e-05, + "loss": 0.5982, + "step": 5920 + }, + { + "epoch": 0.07, + "learning_rate": 3.652689067851523e-05, + "loss": 0.7864, + "step": 5922 + }, + { + "epoch": 0.07, + "learning_rate": 3.65222623345367e-05, + "loss": 0.3019, + "step": 5924 + }, + { + "epoch": 0.07, + "learning_rate": 3.651763399055818e-05, + "loss": 0.2331, + "step": 5926 + }, + { + "epoch": 0.07, + "learning_rate": 3.6513005646579654e-05, + "loss": 1.7811, + "step": 5928 + }, + { + "epoch": 0.07, + "learning_rate": 3.650837730260113e-05, + "loss": 3.9967, + "step": 5930 + }, + { + "epoch": 0.07, + "learning_rate": 3.6503748958622605e-05, + "loss": 1.1955, + "step": 5932 + }, + { + "epoch": 0.07, + "learning_rate": 3.6499120614644084e-05, + "loss": 5.445, + "step": 5934 + }, + { + "epoch": 0.07, + "learning_rate": 3.649449227066556e-05, + "loss": 0.7575, + "step": 5936 + }, + { + "epoch": 0.07, + "learning_rate": 3.6489863926687034e-05, + "loss": 2.0712, + "step": 5938 + }, + { + "epoch": 0.07, + "learning_rate": 3.648523558270851e-05, + "loss": 4.7086, + "step": 5940 + }, + { + "epoch": 0.07, + "learning_rate": 3.6480607238729985e-05, + "loss": 0.003, + "step": 5942 + }, + { + "epoch": 0.07, + "learning_rate": 3.6475978894751464e-05, + "loss": 8.5615, + "step": 5944 + }, + { + "epoch": 0.07, + "learning_rate": 3.6471350550772936e-05, + "loss": 3.2922, + "step": 5946 + }, + { + "epoch": 0.07, + "learning_rate": 3.6466722206794415e-05, + "loss": 0.6517, + "step": 5948 + }, + { + "epoch": 0.07, + "learning_rate": 3.646209386281589e-05, + "loss": 2.4854, + "step": 5950 + }, + { + "epoch": 0.07, + "learning_rate": 3.6457465518837366e-05, + "loss": 3.7134, + "step": 5952 + }, + { + "epoch": 0.07, + "learning_rate": 3.645283717485884e-05, + "loss": 2.3408, + "step": 5954 + }, + { + "epoch": 0.07, + "learning_rate": 3.6448208830880317e-05, + "loss": 1.9116, + "step": 5956 + }, + { + "epoch": 0.07, + "learning_rate": 3.644358048690179e-05, + "loss": 2.5449, + "step": 5958 + }, + { + "epoch": 0.07, + "learning_rate": 3.643895214292327e-05, + "loss": 0.1201, + "step": 5960 + }, + { + "epoch": 0.07, + "learning_rate": 3.643432379894474e-05, + "loss": 0.103, + "step": 5962 + }, + { + "epoch": 0.07, + "learning_rate": 3.642969545496622e-05, + "loss": 0.0232, + "step": 5964 + }, + { + "epoch": 0.07, + "learning_rate": 3.642506711098769e-05, + "loss": 0.9663, + "step": 5966 + }, + { + "epoch": 0.07, + "learning_rate": 3.642043876700917e-05, + "loss": 4.4563, + "step": 5968 + }, + { + "epoch": 0.07, + "learning_rate": 3.641581042303064e-05, + "loss": 0.0559, + "step": 5970 + }, + { + "epoch": 0.07, + "learning_rate": 3.641118207905212e-05, + "loss": 2.3147, + "step": 5972 + }, + { + "epoch": 0.07, + "learning_rate": 3.640655373507359e-05, + "loss": 2.0908, + "step": 5974 + }, + { + "epoch": 0.07, + "learning_rate": 3.6401925391095064e-05, + "loss": 0.0003, + "step": 5976 + }, + { + "epoch": 0.07, + "learning_rate": 3.639729704711654e-05, + "loss": 3.7318, + "step": 5978 + }, + { + "epoch": 0.07, + "learning_rate": 3.6392668703138015e-05, + "loss": 4.9654, + "step": 5980 + }, + { + "epoch": 0.07, + "learning_rate": 3.6388040359159494e-05, + "loss": 10.8805, + "step": 5982 + }, + { + "epoch": 0.07, + "learning_rate": 3.6383412015180966e-05, + "loss": 1.9183, + "step": 5984 + }, + { + "epoch": 0.07, + "learning_rate": 3.6378783671202445e-05, + "loss": 0.0587, + "step": 5986 + }, + { + "epoch": 0.07, + "learning_rate": 3.637415532722392e-05, + "loss": 2.9662, + "step": 5988 + }, + { + "epoch": 0.07, + "learning_rate": 3.6369526983245395e-05, + "loss": 0.9363, + "step": 5990 + }, + { + "epoch": 0.07, + "learning_rate": 3.636489863926687e-05, + "loss": 5.6496, + "step": 5992 + }, + { + "epoch": 0.07, + "learning_rate": 3.6360270295288346e-05, + "loss": 1.0666, + "step": 5994 + }, + { + "epoch": 0.07, + "learning_rate": 3.635564195130982e-05, + "loss": 3.6521, + "step": 5996 + }, + { + "epoch": 0.07, + "learning_rate": 3.63510136073313e-05, + "loss": 2.9315, + "step": 5998 + }, + { + "epoch": 0.07, + "learning_rate": 3.634638526335277e-05, + "loss": 0.0155, + "step": 6000 + }, + { + "epoch": 0.07, + "learning_rate": 3.634175691937425e-05, + "loss": 2.5646, + "step": 6002 + }, + { + "epoch": 0.07, + "learning_rate": 3.633712857539573e-05, + "loss": 0.3798, + "step": 6004 + }, + { + "epoch": 0.07, + "learning_rate": 3.63325002314172e-05, + "loss": 4.6437, + "step": 6006 + }, + { + "epoch": 0.07, + "learning_rate": 3.632787188743868e-05, + "loss": 2.8592, + "step": 6008 + }, + { + "epoch": 0.07, + "learning_rate": 3.632324354346015e-05, + "loss": 3.5942, + "step": 6010 + }, + { + "epoch": 0.07, + "learning_rate": 3.631861519948163e-05, + "loss": 3.8092, + "step": 6012 + }, + { + "epoch": 0.07, + "learning_rate": 3.63139868555031e-05, + "loss": 4.0088, + "step": 6014 + }, + { + "epoch": 0.07, + "learning_rate": 3.630935851152458e-05, + "loss": 1.0512, + "step": 6016 + }, + { + "epoch": 0.07, + "learning_rate": 3.630473016754605e-05, + "loss": 3.2136, + "step": 6018 + }, + { + "epoch": 0.07, + "learning_rate": 3.630010182356753e-05, + "loss": 4.5774, + "step": 6020 + }, + { + "epoch": 0.07, + "learning_rate": 3.6295473479589e-05, + "loss": 1.4297, + "step": 6022 + }, + { + "epoch": 0.07, + "learning_rate": 3.629084513561048e-05, + "loss": 2.7349, + "step": 6024 + }, + { + "epoch": 0.07, + "learning_rate": 3.628621679163195e-05, + "loss": 0.0743, + "step": 6026 + }, + { + "epoch": 0.07, + "learning_rate": 3.628158844765343e-05, + "loss": 1.7092, + "step": 6028 + }, + { + "epoch": 0.07, + "learning_rate": 3.6276960103674904e-05, + "loss": 3.9974, + "step": 6030 + }, + { + "epoch": 0.07, + "learning_rate": 3.627233175969638e-05, + "loss": 0.8572, + "step": 6032 + }, + { + "epoch": 0.07, + "learning_rate": 3.6267703415717855e-05, + "loss": 0.0522, + "step": 6034 + }, + { + "epoch": 0.07, + "learning_rate": 3.6263075071739334e-05, + "loss": 2.5119, + "step": 6036 + }, + { + "epoch": 0.07, + "learning_rate": 3.6258446727760806e-05, + "loss": 2.2725, + "step": 6038 + }, + { + "epoch": 0.07, + "learning_rate": 3.6253818383782284e-05, + "loss": 3.8868, + "step": 6040 + }, + { + "epoch": 0.07, + "learning_rate": 3.6249190039803756e-05, + "loss": 0.7213, + "step": 6042 + }, + { + "epoch": 0.07, + "learning_rate": 3.6244561695825235e-05, + "loss": 3.7715, + "step": 6044 + }, + { + "epoch": 0.07, + "learning_rate": 3.6239933351846714e-05, + "loss": 1.9637, + "step": 6046 + }, + { + "epoch": 0.07, + "learning_rate": 3.6235305007868186e-05, + "loss": 0.2706, + "step": 6048 + }, + { + "epoch": 0.07, + "learning_rate": 3.6230676663889665e-05, + "loss": 3.3656, + "step": 6050 + }, + { + "epoch": 0.07, + "learning_rate": 3.622604831991114e-05, + "loss": 1.8178, + "step": 6052 + }, + { + "epoch": 0.07, + "learning_rate": 3.6221419975932616e-05, + "loss": 1.4188, + "step": 6054 + }, + { + "epoch": 0.07, + "learning_rate": 3.621679163195409e-05, + "loss": 6.6426, + "step": 6056 + }, + { + "epoch": 0.07, + "learning_rate": 3.621216328797557e-05, + "loss": 0.6246, + "step": 6058 + }, + { + "epoch": 0.07, + "learning_rate": 3.620753494399704e-05, + "loss": 2.4584, + "step": 6060 + }, + { + "epoch": 0.07, + "learning_rate": 3.620290660001852e-05, + "loss": 1.6974, + "step": 6062 + }, + { + "epoch": 0.07, + "learning_rate": 3.619827825603999e-05, + "loss": 0.1049, + "step": 6064 + }, + { + "epoch": 0.07, + "learning_rate": 3.619364991206147e-05, + "loss": 0.0168, + "step": 6066 + }, + { + "epoch": 0.07, + "learning_rate": 3.618902156808294e-05, + "loss": 5.5314, + "step": 6068 + }, + { + "epoch": 0.07, + "learning_rate": 3.618439322410442e-05, + "loss": 3.2488, + "step": 6070 + }, + { + "epoch": 0.07, + "learning_rate": 3.617976488012589e-05, + "loss": 2.6756, + "step": 6072 + }, + { + "epoch": 0.07, + "learning_rate": 3.617513653614737e-05, + "loss": 5.0494, + "step": 6074 + }, + { + "epoch": 0.07, + "learning_rate": 3.617050819216884e-05, + "loss": 0.0158, + "step": 6076 + }, + { + "epoch": 0.07, + "learning_rate": 3.616587984819032e-05, + "loss": 3.5959, + "step": 6078 + }, + { + "epoch": 0.07, + "learning_rate": 3.616125150421179e-05, + "loss": 1.6582, + "step": 6080 + }, + { + "epoch": 0.07, + "learning_rate": 3.615662316023327e-05, + "loss": 4.3228, + "step": 6082 + }, + { + "epoch": 0.07, + "learning_rate": 3.615199481625475e-05, + "loss": 1.6021, + "step": 6084 + }, + { + "epoch": 0.07, + "learning_rate": 3.614736647227622e-05, + "loss": 1.2701, + "step": 6086 + }, + { + "epoch": 0.07, + "learning_rate": 3.61427381282977e-05, + "loss": 1.3065, + "step": 6088 + }, + { + "epoch": 0.07, + "learning_rate": 3.6138109784319173e-05, + "loss": 1.7874, + "step": 6090 + }, + { + "epoch": 0.07, + "learning_rate": 3.613348144034065e-05, + "loss": 1.4222, + "step": 6092 + }, + { + "epoch": 0.07, + "learning_rate": 3.6128853096362124e-05, + "loss": 1.1869, + "step": 6094 + }, + { + "epoch": 0.07, + "learning_rate": 3.61242247523836e-05, + "loss": 0.3538, + "step": 6096 + }, + { + "epoch": 0.07, + "learning_rate": 3.6119596408405075e-05, + "loss": 3.225, + "step": 6098 + }, + { + "epoch": 0.07, + "learning_rate": 3.6114968064426554e-05, + "loss": 0.0483, + "step": 6100 + }, + { + "epoch": 0.07, + "learning_rate": 3.6110339720448026e-05, + "loss": 1.8142, + "step": 6102 + }, + { + "epoch": 0.07, + "learning_rate": 3.6105711376469505e-05, + "loss": 2.9634, + "step": 6104 + }, + { + "epoch": 0.07, + "learning_rate": 3.610108303249098e-05, + "loss": 4.7529, + "step": 6106 + }, + { + "epoch": 0.07, + "learning_rate": 3.6096454688512456e-05, + "loss": 1.4707, + "step": 6108 + }, + { + "epoch": 0.07, + "learning_rate": 3.609182634453393e-05, + "loss": 0.0631, + "step": 6110 + }, + { + "epoch": 0.07, + "learning_rate": 3.6087198000555406e-05, + "loss": 2.3548, + "step": 6112 + }, + { + "epoch": 0.07, + "learning_rate": 3.608256965657688e-05, + "loss": 0.0004, + "step": 6114 + }, + { + "epoch": 0.07, + "learning_rate": 3.607794131259836e-05, + "loss": 0.9257, + "step": 6116 + }, + { + "epoch": 0.07, + "learning_rate": 3.607331296861983e-05, + "loss": 0.8627, + "step": 6118 + }, + { + "epoch": 0.07, + "learning_rate": 3.60686846246413e-05, + "loss": 6.8348, + "step": 6120 + }, + { + "epoch": 0.07, + "learning_rate": 3.606405628066278e-05, + "loss": 0.0003, + "step": 6122 + }, + { + "epoch": 0.07, + "learning_rate": 3.605942793668425e-05, + "loss": 0.8744, + "step": 6124 + }, + { + "epoch": 0.07, + "learning_rate": 3.605479959270573e-05, + "loss": 4.0649, + "step": 6126 + }, + { + "epoch": 0.07, + "learning_rate": 3.60501712487272e-05, + "loss": 4.7017, + "step": 6128 + }, + { + "epoch": 0.07, + "learning_rate": 3.604554290474868e-05, + "loss": 3.3374, + "step": 6130 + }, + { + "epoch": 0.07, + "learning_rate": 3.6040914560770154e-05, + "loss": 2.1628, + "step": 6132 + }, + { + "epoch": 0.07, + "learning_rate": 3.603628621679163e-05, + "loss": 0.9867, + "step": 6134 + }, + { + "epoch": 0.07, + "learning_rate": 3.6031657872813105e-05, + "loss": 1.2089, + "step": 6136 + }, + { + "epoch": 0.07, + "learning_rate": 3.6027029528834584e-05, + "loss": 1.4474, + "step": 6138 + }, + { + "epoch": 0.07, + "learning_rate": 3.6022401184856056e-05, + "loss": 3.2858, + "step": 6140 + }, + { + "epoch": 0.07, + "learning_rate": 3.6017772840877534e-05, + "loss": 2.2717, + "step": 6142 + }, + { + "epoch": 0.07, + "learning_rate": 3.6013144496899007e-05, + "loss": 2.0238, + "step": 6144 + }, + { + "epoch": 0.07, + "learning_rate": 3.6008516152920485e-05, + "loss": 3.3728, + "step": 6146 + }, + { + "epoch": 0.07, + "learning_rate": 3.600388780894196e-05, + "loss": 2.2177, + "step": 6148 + }, + { + "epoch": 0.07, + "learning_rate": 3.5999259464963436e-05, + "loss": 3.4565, + "step": 6150 + }, + { + "epoch": 0.07, + "learning_rate": 3.5994631120984915e-05, + "loss": 2.6939, + "step": 6152 + }, + { + "epoch": 0.07, + "learning_rate": 3.599000277700639e-05, + "loss": 4.7529, + "step": 6154 + }, + { + "epoch": 0.07, + "learning_rate": 3.5985374433027866e-05, + "loss": 2.699, + "step": 6156 + }, + { + "epoch": 0.07, + "learning_rate": 3.598074608904934e-05, + "loss": 1.7361, + "step": 6158 + }, + { + "epoch": 0.07, + "learning_rate": 3.597611774507082e-05, + "loss": 0.9078, + "step": 6160 + }, + { + "epoch": 0.07, + "learning_rate": 3.597148940109229e-05, + "loss": 1.8286, + "step": 6162 + }, + { + "epoch": 0.07, + "learning_rate": 3.596686105711377e-05, + "loss": 3.6526, + "step": 6164 + }, + { + "epoch": 0.07, + "learning_rate": 3.596223271313524e-05, + "loss": 2.3326, + "step": 6166 + }, + { + "epoch": 0.07, + "learning_rate": 3.595760436915672e-05, + "loss": 2.8203, + "step": 6168 + }, + { + "epoch": 0.07, + "learning_rate": 3.595297602517819e-05, + "loss": 0.1101, + "step": 6170 + }, + { + "epoch": 0.07, + "learning_rate": 3.594834768119967e-05, + "loss": 1.3344, + "step": 6172 + }, + { + "epoch": 0.07, + "learning_rate": 3.594371933722114e-05, + "loss": 0.0612, + "step": 6174 + }, + { + "epoch": 0.07, + "learning_rate": 3.593909099324262e-05, + "loss": 0.0122, + "step": 6176 + }, + { + "epoch": 0.07, + "learning_rate": 3.593446264926409e-05, + "loss": 0.2434, + "step": 6178 + }, + { + "epoch": 0.07, + "learning_rate": 3.592983430528557e-05, + "loss": 2.1963, + "step": 6180 + }, + { + "epoch": 0.07, + "learning_rate": 3.592520596130704e-05, + "loss": 1.9084, + "step": 6182 + }, + { + "epoch": 0.07, + "learning_rate": 3.592057761732852e-05, + "loss": 2.9722, + "step": 6184 + }, + { + "epoch": 0.07, + "learning_rate": 3.5915949273349994e-05, + "loss": 2.6964, + "step": 6186 + }, + { + "epoch": 0.07, + "learning_rate": 3.591132092937147e-05, + "loss": 6.7566, + "step": 6188 + }, + { + "epoch": 0.07, + "learning_rate": 3.590669258539295e-05, + "loss": 0.0095, + "step": 6190 + }, + { + "epoch": 0.07, + "learning_rate": 3.5902064241414423e-05, + "loss": 1.7288, + "step": 6192 + }, + { + "epoch": 0.07, + "learning_rate": 3.58974358974359e-05, + "loss": 4.7589, + "step": 6194 + }, + { + "epoch": 0.07, + "learning_rate": 3.5892807553457374e-05, + "loss": 1.4515, + "step": 6196 + }, + { + "epoch": 0.07, + "learning_rate": 3.588817920947885e-05, + "loss": 2.6139, + "step": 6198 + }, + { + "epoch": 0.07, + "learning_rate": 3.5883550865500325e-05, + "loss": 1.6919, + "step": 6200 + }, + { + "epoch": 0.07, + "learning_rate": 3.5878922521521804e-05, + "loss": 1.4653, + "step": 6202 + }, + { + "epoch": 0.07, + "learning_rate": 3.5874294177543276e-05, + "loss": 1.531, + "step": 6204 + }, + { + "epoch": 0.07, + "learning_rate": 3.5869665833564755e-05, + "loss": 6.292, + "step": 6206 + }, + { + "epoch": 0.07, + "learning_rate": 3.586503748958623e-05, + "loss": 0.8069, + "step": 6208 + }, + { + "epoch": 0.07, + "learning_rate": 3.5860409145607706e-05, + "loss": 3.0635, + "step": 6210 + }, + { + "epoch": 0.07, + "learning_rate": 3.585578080162918e-05, + "loss": 2.3639, + "step": 6212 + }, + { + "epoch": 0.07, + "learning_rate": 3.5851152457650657e-05, + "loss": 5.0818, + "step": 6214 + }, + { + "epoch": 0.07, + "learning_rate": 3.584652411367213e-05, + "loss": 2.1696, + "step": 6216 + }, + { + "epoch": 0.07, + "learning_rate": 3.584189576969361e-05, + "loss": 0.5575, + "step": 6218 + }, + { + "epoch": 0.07, + "learning_rate": 3.583726742571508e-05, + "loss": 5.9583, + "step": 6220 + }, + { + "epoch": 0.07, + "learning_rate": 3.583263908173656e-05, + "loss": 1.9208, + "step": 6222 + }, + { + "epoch": 0.07, + "learning_rate": 3.582801073775803e-05, + "loss": 2.6501, + "step": 6224 + }, + { + "epoch": 0.07, + "learning_rate": 3.582338239377951e-05, + "loss": 2.1868, + "step": 6226 + }, + { + "epoch": 0.07, + "learning_rate": 3.581875404980098e-05, + "loss": 1.9882, + "step": 6228 + }, + { + "epoch": 0.07, + "learning_rate": 3.581412570582246e-05, + "loss": 0.0597, + "step": 6230 + }, + { + "epoch": 0.07, + "learning_rate": 3.580949736184394e-05, + "loss": 1.6396, + "step": 6232 + }, + { + "epoch": 0.07, + "learning_rate": 3.580486901786541e-05, + "loss": 1.5216, + "step": 6234 + }, + { + "epoch": 0.07, + "learning_rate": 3.580024067388689e-05, + "loss": 1.9029, + "step": 6236 + }, + { + "epoch": 0.07, + "learning_rate": 3.579561232990836e-05, + "loss": 0.3035, + "step": 6238 + }, + { + "epoch": 0.07, + "learning_rate": 3.579098398592984e-05, + "loss": 1.3344, + "step": 6240 + }, + { + "epoch": 0.07, + "learning_rate": 3.578635564195131e-05, + "loss": 1.353, + "step": 6242 + }, + { + "epoch": 0.07, + "learning_rate": 3.578172729797279e-05, + "loss": 3.1444, + "step": 6244 + }, + { + "epoch": 0.07, + "learning_rate": 3.577709895399426e-05, + "loss": 0.9602, + "step": 6246 + }, + { + "epoch": 0.07, + "learning_rate": 3.577247061001574e-05, + "loss": 1.8484, + "step": 6248 + }, + { + "epoch": 0.07, + "learning_rate": 3.5767842266037214e-05, + "loss": 2.581, + "step": 6250 + }, + { + "epoch": 0.07, + "learning_rate": 3.576321392205869e-05, + "loss": 2.9517, + "step": 6252 + }, + { + "epoch": 0.07, + "learning_rate": 3.5758585578080165e-05, + "loss": 5.4905, + "step": 6254 + }, + { + "epoch": 0.07, + "learning_rate": 3.5753957234101644e-05, + "loss": 4.2167, + "step": 6256 + }, + { + "epoch": 0.07, + "learning_rate": 3.5749328890123116e-05, + "loss": 2.2642, + "step": 6258 + }, + { + "epoch": 0.07, + "learning_rate": 3.5744700546144595e-05, + "loss": 4.9888, + "step": 6260 + }, + { + "epoch": 0.07, + "learning_rate": 3.574007220216607e-05, + "loss": 1.7525, + "step": 6262 + }, + { + "epoch": 0.07, + "learning_rate": 3.573544385818754e-05, + "loss": 5.1173, + "step": 6264 + }, + { + "epoch": 0.07, + "learning_rate": 3.573081551420902e-05, + "loss": 0.1202, + "step": 6266 + }, + { + "epoch": 0.07, + "learning_rate": 3.572618717023049e-05, + "loss": 1.8679, + "step": 6268 + }, + { + "epoch": 0.07, + "learning_rate": 3.572155882625197e-05, + "loss": 6.0863, + "step": 6270 + }, + { + "epoch": 0.07, + "learning_rate": 3.571693048227344e-05, + "loss": 2.0076, + "step": 6272 + }, + { + "epoch": 0.07, + "learning_rate": 3.571230213829492e-05, + "loss": 0.7637, + "step": 6274 + }, + { + "epoch": 0.07, + "learning_rate": 3.570767379431639e-05, + "loss": 1.9778, + "step": 6276 + }, + { + "epoch": 0.07, + "learning_rate": 3.570304545033787e-05, + "loss": 0.9807, + "step": 6278 + }, + { + "epoch": 0.07, + "learning_rate": 3.569841710635934e-05, + "loss": 1.3302, + "step": 6280 + }, + { + "epoch": 0.07, + "learning_rate": 3.569378876238082e-05, + "loss": 3.2368, + "step": 6282 + }, + { + "epoch": 0.07, + "learning_rate": 3.568916041840229e-05, + "loss": 5.3624, + "step": 6284 + }, + { + "epoch": 0.07, + "learning_rate": 3.568453207442377e-05, + "loss": 5.2366, + "step": 6286 + }, + { + "epoch": 0.07, + "learning_rate": 3.5679903730445244e-05, + "loss": 0.432, + "step": 6288 + }, + { + "epoch": 0.07, + "learning_rate": 3.567527538646672e-05, + "loss": 4.274, + "step": 6290 + }, + { + "epoch": 0.07, + "learning_rate": 3.5670647042488195e-05, + "loss": 0.0405, + "step": 6292 + }, + { + "epoch": 0.07, + "learning_rate": 3.5666018698509674e-05, + "loss": 1.0779, + "step": 6294 + }, + { + "epoch": 0.07, + "learning_rate": 3.566139035453115e-05, + "loss": 2.5997, + "step": 6296 + }, + { + "epoch": 0.07, + "learning_rate": 3.5656762010552624e-05, + "loss": 2.7718, + "step": 6298 + }, + { + "epoch": 0.07, + "learning_rate": 3.56521336665741e-05, + "loss": 2.3072, + "step": 6300 + }, + { + "epoch": 0.07, + "learning_rate": 3.5647505322595575e-05, + "loss": 1.613, + "step": 6302 + }, + { + "epoch": 0.07, + "learning_rate": 3.5642876978617054e-05, + "loss": 3.2601, + "step": 6304 + }, + { + "epoch": 0.07, + "learning_rate": 3.5638248634638526e-05, + "loss": 1.656, + "step": 6306 + }, + { + "epoch": 0.07, + "learning_rate": 3.5633620290660005e-05, + "loss": 1.537, + "step": 6308 + }, + { + "epoch": 0.07, + "learning_rate": 3.562899194668148e-05, + "loss": 1.8746, + "step": 6310 + }, + { + "epoch": 0.07, + "learning_rate": 3.5624363602702956e-05, + "loss": 4.1175, + "step": 6312 + }, + { + "epoch": 0.07, + "learning_rate": 3.561973525872443e-05, + "loss": 4.0924, + "step": 6314 + }, + { + "epoch": 0.07, + "learning_rate": 3.5615106914745907e-05, + "loss": 0.2121, + "step": 6316 + }, + { + "epoch": 0.07, + "learning_rate": 3.561047857076738e-05, + "loss": 3.9925, + "step": 6318 + }, + { + "epoch": 0.07, + "learning_rate": 3.560585022678886e-05, + "loss": 4.606, + "step": 6320 + }, + { + "epoch": 0.07, + "learning_rate": 3.560122188281033e-05, + "loss": 0.0094, + "step": 6322 + }, + { + "epoch": 0.07, + "learning_rate": 3.559659353883181e-05, + "loss": 0.9487, + "step": 6324 + }, + { + "epoch": 0.07, + "learning_rate": 3.559196519485328e-05, + "loss": 2.1643, + "step": 6326 + }, + { + "epoch": 0.07, + "learning_rate": 3.558733685087476e-05, + "loss": 2.7325, + "step": 6328 + }, + { + "epoch": 0.07, + "learning_rate": 3.558270850689623e-05, + "loss": 2.2619, + "step": 6330 + }, + { + "epoch": 0.07, + "learning_rate": 3.557808016291771e-05, + "loss": 8.0316, + "step": 6332 + }, + { + "epoch": 0.07, + "learning_rate": 3.557345181893918e-05, + "loss": 4.8703, + "step": 6334 + }, + { + "epoch": 0.07, + "learning_rate": 3.556882347496066e-05, + "loss": 4.3353, + "step": 6336 + }, + { + "epoch": 0.07, + "learning_rate": 3.556419513098214e-05, + "loss": 2.4057, + "step": 6338 + }, + { + "epoch": 0.07, + "learning_rate": 3.555956678700361e-05, + "loss": 3.6493, + "step": 6340 + }, + { + "epoch": 0.07, + "learning_rate": 3.555493844302509e-05, + "loss": 0.8267, + "step": 6342 + }, + { + "epoch": 0.07, + "learning_rate": 3.555031009904656e-05, + "loss": 5.0896, + "step": 6344 + }, + { + "epoch": 0.07, + "learning_rate": 3.554568175506804e-05, + "loss": 3.2058, + "step": 6346 + }, + { + "epoch": 0.07, + "learning_rate": 3.554105341108951e-05, + "loss": 3.4696, + "step": 6348 + }, + { + "epoch": 0.07, + "learning_rate": 3.553642506711099e-05, + "loss": 6.3969, + "step": 6350 + }, + { + "epoch": 0.07, + "learning_rate": 3.5531796723132464e-05, + "loss": 1.5527, + "step": 6352 + }, + { + "epoch": 0.07, + "learning_rate": 3.552716837915394e-05, + "loss": 1.6831, + "step": 6354 + }, + { + "epoch": 0.07, + "learning_rate": 3.5522540035175415e-05, + "loss": 0.4649, + "step": 6356 + }, + { + "epoch": 0.07, + "learning_rate": 3.5517911691196894e-05, + "loss": 0.9041, + "step": 6358 + }, + { + "epoch": 0.07, + "learning_rate": 3.5513283347218366e-05, + "loss": 1.4439, + "step": 6360 + }, + { + "epoch": 0.07, + "learning_rate": 3.5508655003239845e-05, + "loss": 4.1808, + "step": 6362 + }, + { + "epoch": 0.07, + "learning_rate": 3.550402665926132e-05, + "loss": 0.7698, + "step": 6364 + }, + { + "epoch": 0.07, + "learning_rate": 3.5499398315282796e-05, + "loss": 0.4028, + "step": 6366 + }, + { + "epoch": 0.07, + "learning_rate": 3.549476997130427e-05, + "loss": 0.0617, + "step": 6368 + }, + { + "epoch": 0.07, + "learning_rate": 3.5490141627325746e-05, + "loss": 2.7425, + "step": 6370 + }, + { + "epoch": 0.07, + "learning_rate": 3.548551328334722e-05, + "loss": 3.8886, + "step": 6372 + }, + { + "epoch": 0.07, + "learning_rate": 3.54808849393687e-05, + "loss": 4.3806, + "step": 6374 + }, + { + "epoch": 0.07, + "learning_rate": 3.547625659539017e-05, + "loss": 2.2566, + "step": 6376 + }, + { + "epoch": 0.07, + "learning_rate": 3.547162825141165e-05, + "loss": 0.6849, + "step": 6378 + }, + { + "epoch": 0.07, + "learning_rate": 3.546699990743313e-05, + "loss": 2.1014, + "step": 6380 + }, + { + "epoch": 0.07, + "learning_rate": 3.54623715634546e-05, + "loss": 0.9316, + "step": 6382 + }, + { + "epoch": 0.07, + "learning_rate": 3.545774321947608e-05, + "loss": 2.9032, + "step": 6384 + }, + { + "epoch": 0.07, + "learning_rate": 3.545311487549755e-05, + "loss": 2.8254, + "step": 6386 + }, + { + "epoch": 0.07, + "learning_rate": 3.544848653151903e-05, + "loss": 5.0328, + "step": 6388 + }, + { + "epoch": 0.07, + "learning_rate": 3.54438581875405e-05, + "loss": 4.252, + "step": 6390 + }, + { + "epoch": 0.07, + "learning_rate": 3.543922984356198e-05, + "loss": 2.4049, + "step": 6392 + }, + { + "epoch": 0.07, + "learning_rate": 3.543460149958345e-05, + "loss": 1.4766, + "step": 6394 + }, + { + "epoch": 0.07, + "learning_rate": 3.542997315560493e-05, + "loss": 3.2125, + "step": 6396 + }, + { + "epoch": 0.07, + "learning_rate": 3.54253448116264e-05, + "loss": 3.9578, + "step": 6398 + }, + { + "epoch": 0.07, + "learning_rate": 3.542071646764788e-05, + "loss": 1.2052, + "step": 6400 + }, + { + "epoch": 0.07, + "learning_rate": 3.541608812366935e-05, + "loss": 3.9805, + "step": 6402 + }, + { + "epoch": 0.07, + "learning_rate": 3.541145977969083e-05, + "loss": 0.7248, + "step": 6404 + }, + { + "epoch": 0.07, + "learning_rate": 3.5406831435712304e-05, + "loss": 0.0032, + "step": 6406 + }, + { + "epoch": 0.07, + "learning_rate": 3.5402203091733776e-05, + "loss": 1.1156, + "step": 6408 + }, + { + "epoch": 0.07, + "learning_rate": 3.5397574747755255e-05, + "loss": 2.2453, + "step": 6410 + }, + { + "epoch": 0.07, + "learning_rate": 3.539294640377673e-05, + "loss": 4.924, + "step": 6412 + }, + { + "epoch": 0.07, + "learning_rate": 3.5388318059798206e-05, + "loss": 3.3819, + "step": 6414 + }, + { + "epoch": 0.07, + "learning_rate": 3.538368971581968e-05, + "loss": 3.3417, + "step": 6416 + }, + { + "epoch": 0.07, + "learning_rate": 3.537906137184116e-05, + "loss": 0.619, + "step": 6418 + }, + { + "epoch": 0.07, + "learning_rate": 3.537443302786263e-05, + "loss": 2.0827, + "step": 6420 + }, + { + "epoch": 0.07, + "learning_rate": 3.536980468388411e-05, + "loss": 2.3757, + "step": 6422 + }, + { + "epoch": 0.07, + "learning_rate": 3.536517633990558e-05, + "loss": 2.6508, + "step": 6424 + }, + { + "epoch": 0.07, + "learning_rate": 3.536054799592706e-05, + "loss": 0.4813, + "step": 6426 + }, + { + "epoch": 0.07, + "learning_rate": 3.535591965194853e-05, + "loss": 1.1806, + "step": 6428 + }, + { + "epoch": 0.07, + "learning_rate": 3.535129130797001e-05, + "loss": 2.312, + "step": 6430 + }, + { + "epoch": 0.07, + "learning_rate": 3.534666296399148e-05, + "loss": 3.6402, + "step": 6432 + }, + { + "epoch": 0.07, + "learning_rate": 3.534203462001296e-05, + "loss": 0.5747, + "step": 6434 + }, + { + "epoch": 0.07, + "learning_rate": 3.533740627603443e-05, + "loss": 2.7136, + "step": 6436 + }, + { + "epoch": 0.07, + "learning_rate": 3.533277793205591e-05, + "loss": 6.3394, + "step": 6438 + }, + { + "epoch": 0.07, + "learning_rate": 3.532814958807738e-05, + "loss": 2.585, + "step": 6440 + }, + { + "epoch": 0.07, + "learning_rate": 3.532352124409886e-05, + "loss": 0.2716, + "step": 6442 + }, + { + "epoch": 0.07, + "learning_rate": 3.531889290012034e-05, + "loss": 1.9206, + "step": 6444 + }, + { + "epoch": 0.07, + "learning_rate": 3.531426455614181e-05, + "loss": 1.2189, + "step": 6446 + }, + { + "epoch": 0.07, + "learning_rate": 3.530963621216329e-05, + "loss": 0.3429, + "step": 6448 + }, + { + "epoch": 0.07, + "learning_rate": 3.5305007868184763e-05, + "loss": 0.899, + "step": 6450 + }, + { + "epoch": 0.07, + "learning_rate": 3.530037952420624e-05, + "loss": 1.0293, + "step": 6452 + }, + { + "epoch": 0.07, + "learning_rate": 3.5295751180227714e-05, + "loss": 1.3007, + "step": 6454 + }, + { + "epoch": 0.07, + "learning_rate": 3.529112283624919e-05, + "loss": 6.9037, + "step": 6456 + }, + { + "epoch": 0.07, + "learning_rate": 3.5286494492270665e-05, + "loss": 0.1019, + "step": 6458 + }, + { + "epoch": 0.07, + "learning_rate": 3.5281866148292144e-05, + "loss": 3.0572, + "step": 6460 + }, + { + "epoch": 0.07, + "learning_rate": 3.5277237804313616e-05, + "loss": 0.1321, + "step": 6462 + }, + { + "epoch": 0.07, + "learning_rate": 3.5272609460335095e-05, + "loss": 1.2291, + "step": 6464 + }, + { + "epoch": 0.07, + "learning_rate": 3.526798111635657e-05, + "loss": 5.5503, + "step": 6466 + }, + { + "epoch": 0.07, + "learning_rate": 3.5263352772378046e-05, + "loss": 2.3046, + "step": 6468 + }, + { + "epoch": 0.07, + "learning_rate": 3.525872442839952e-05, + "loss": 0.8816, + "step": 6470 + }, + { + "epoch": 0.07, + "learning_rate": 3.5254096084420996e-05, + "loss": 1.8612, + "step": 6472 + }, + { + "epoch": 0.07, + "learning_rate": 3.524946774044247e-05, + "loss": 1.0378, + "step": 6474 + }, + { + "epoch": 0.07, + "learning_rate": 3.524483939646395e-05, + "loss": 4.6728, + "step": 6476 + }, + { + "epoch": 0.07, + "learning_rate": 3.524021105248542e-05, + "loss": 3.0004, + "step": 6478 + }, + { + "epoch": 0.07, + "learning_rate": 3.52355827085069e-05, + "loss": 2.2216, + "step": 6480 + }, + { + "epoch": 0.07, + "learning_rate": 3.523095436452837e-05, + "loss": 2.0644, + "step": 6482 + }, + { + "epoch": 0.07, + "learning_rate": 3.522632602054985e-05, + "loss": 2.2373, + "step": 6484 + }, + { + "epoch": 0.07, + "learning_rate": 3.522169767657133e-05, + "loss": 1.8305, + "step": 6486 + }, + { + "epoch": 0.07, + "learning_rate": 3.52170693325928e-05, + "loss": 5.1282, + "step": 6488 + }, + { + "epoch": 0.07, + "learning_rate": 3.521244098861428e-05, + "loss": 2.8125, + "step": 6490 + }, + { + "epoch": 0.07, + "learning_rate": 3.520781264463575e-05, + "loss": 2.8627, + "step": 6492 + }, + { + "epoch": 0.07, + "learning_rate": 3.520318430065723e-05, + "loss": 0.6792, + "step": 6494 + }, + { + "epoch": 0.07, + "learning_rate": 3.51985559566787e-05, + "loss": 0.9878, + "step": 6496 + }, + { + "epoch": 0.07, + "learning_rate": 3.519392761270018e-05, + "loss": 0.2892, + "step": 6498 + }, + { + "epoch": 0.07, + "learning_rate": 3.518929926872165e-05, + "loss": 0.0783, + "step": 6500 + }, + { + "epoch": 0.07, + "learning_rate": 3.518467092474313e-05, + "loss": 5.6691, + "step": 6502 + }, + { + "epoch": 0.07, + "learning_rate": 3.51800425807646e-05, + "loss": 0.8315, + "step": 6504 + }, + { + "epoch": 0.07, + "learning_rate": 3.517541423678608e-05, + "loss": 3.2097, + "step": 6506 + }, + { + "epoch": 0.07, + "learning_rate": 3.5170785892807554e-05, + "loss": 3.2165, + "step": 6508 + }, + { + "epoch": 0.07, + "learning_rate": 3.516615754882903e-05, + "loss": 0.0331, + "step": 6510 + }, + { + "epoch": 0.08, + "learning_rate": 3.5161529204850505e-05, + "loss": 0.5675, + "step": 6512 + }, + { + "epoch": 0.08, + "learning_rate": 3.5156900860871984e-05, + "loss": 1.0426, + "step": 6514 + }, + { + "epoch": 0.08, + "learning_rate": 3.5152272516893456e-05, + "loss": 0.8278, + "step": 6516 + }, + { + "epoch": 0.08, + "learning_rate": 3.5147644172914935e-05, + "loss": 1.2118, + "step": 6518 + }, + { + "epoch": 0.08, + "learning_rate": 3.514301582893641e-05, + "loss": 5.2994, + "step": 6520 + }, + { + "epoch": 0.08, + "learning_rate": 3.5138387484957885e-05, + "loss": 2.2023, + "step": 6522 + }, + { + "epoch": 0.08, + "learning_rate": 3.5133759140979364e-05, + "loss": 2.0936, + "step": 6524 + }, + { + "epoch": 0.08, + "learning_rate": 3.5129130797000836e-05, + "loss": 0.6253, + "step": 6526 + }, + { + "epoch": 0.08, + "learning_rate": 3.5124502453022315e-05, + "loss": 4.5296, + "step": 6528 + }, + { + "epoch": 0.08, + "learning_rate": 3.511987410904379e-05, + "loss": 2.4657, + "step": 6530 + }, + { + "epoch": 0.08, + "learning_rate": 3.5115245765065266e-05, + "loss": 4.2719, + "step": 6532 + }, + { + "epoch": 0.08, + "learning_rate": 3.511061742108674e-05, + "loss": 0.926, + "step": 6534 + }, + { + "epoch": 0.08, + "learning_rate": 3.510598907710822e-05, + "loss": 3.737, + "step": 6536 + }, + { + "epoch": 0.08, + "learning_rate": 3.510136073312969e-05, + "loss": 4.6151, + "step": 6538 + }, + { + "epoch": 0.08, + "learning_rate": 3.509673238915117e-05, + "loss": 2.7746, + "step": 6540 + }, + { + "epoch": 0.08, + "learning_rate": 3.509210404517264e-05, + "loss": 3.4812, + "step": 6542 + }, + { + "epoch": 0.08, + "learning_rate": 3.508747570119412e-05, + "loss": 1.5641, + "step": 6544 + }, + { + "epoch": 0.08, + "learning_rate": 3.508284735721559e-05, + "loss": 1.6813, + "step": 6546 + }, + { + "epoch": 0.08, + "learning_rate": 3.507821901323707e-05, + "loss": 0.4744, + "step": 6548 + }, + { + "epoch": 0.08, + "learning_rate": 3.507359066925854e-05, + "loss": 3.4244, + "step": 6550 + }, + { + "epoch": 0.08, + "learning_rate": 3.506896232528002e-05, + "loss": 0.1827, + "step": 6552 + }, + { + "epoch": 0.08, + "learning_rate": 3.506433398130149e-05, + "loss": 3.9521, + "step": 6554 + }, + { + "epoch": 0.08, + "learning_rate": 3.5059705637322964e-05, + "loss": 0.0408, + "step": 6556 + }, + { + "epoch": 0.08, + "learning_rate": 3.505507729334444e-05, + "loss": 4.701, + "step": 6558 + }, + { + "epoch": 0.08, + "learning_rate": 3.5050448949365915e-05, + "loss": 4.3409, + "step": 6560 + }, + { + "epoch": 0.08, + "learning_rate": 3.5045820605387394e-05, + "loss": 3.2009, + "step": 6562 + }, + { + "epoch": 0.08, + "learning_rate": 3.5041192261408866e-05, + "loss": 1.3537, + "step": 6564 + }, + { + "epoch": 0.08, + "learning_rate": 3.5036563917430345e-05, + "loss": 1.2877, + "step": 6566 + }, + { + "epoch": 0.08, + "learning_rate": 3.503193557345182e-05, + "loss": 2.0135, + "step": 6568 + }, + { + "epoch": 0.08, + "learning_rate": 3.5027307229473296e-05, + "loss": 2.6924, + "step": 6570 + }, + { + "epoch": 0.08, + "learning_rate": 3.502267888549477e-05, + "loss": 0.0741, + "step": 6572 + }, + { + "epoch": 0.08, + "learning_rate": 3.5018050541516247e-05, + "loss": 2.9057, + "step": 6574 + }, + { + "epoch": 0.08, + "learning_rate": 3.501342219753772e-05, + "loss": 0.0343, + "step": 6576 + }, + { + "epoch": 0.08, + "learning_rate": 3.50087938535592e-05, + "loss": 2.0718, + "step": 6578 + }, + { + "epoch": 0.08, + "learning_rate": 3.500416550958067e-05, + "loss": 1.6488, + "step": 6580 + }, + { + "epoch": 0.08, + "learning_rate": 3.499953716560215e-05, + "loss": 4.005, + "step": 6582 + }, + { + "epoch": 0.08, + "learning_rate": 3.499490882162362e-05, + "loss": 1.1, + "step": 6584 + }, + { + "epoch": 0.08, + "learning_rate": 3.49902804776451e-05, + "loss": 0.0787, + "step": 6586 + }, + { + "epoch": 0.08, + "learning_rate": 3.498565213366657e-05, + "loss": 0.2574, + "step": 6588 + }, + { + "epoch": 0.08, + "learning_rate": 3.498102378968805e-05, + "loss": 0.2301, + "step": 6590 + }, + { + "epoch": 0.08, + "learning_rate": 3.497639544570953e-05, + "loss": 0.582, + "step": 6592 + }, + { + "epoch": 0.08, + "learning_rate": 3.4971767101731e-05, + "loss": 2.7849, + "step": 6594 + }, + { + "epoch": 0.08, + "learning_rate": 3.496713875775248e-05, + "loss": 0.661, + "step": 6596 + }, + { + "epoch": 0.08, + "learning_rate": 3.496251041377395e-05, + "loss": 3.1386, + "step": 6598 + }, + { + "epoch": 0.08, + "learning_rate": 3.495788206979543e-05, + "loss": 5.8302, + "step": 6600 + }, + { + "epoch": 0.08, + "learning_rate": 3.49532537258169e-05, + "loss": 5.7031, + "step": 6602 + }, + { + "epoch": 0.08, + "learning_rate": 3.494862538183838e-05, + "loss": 0.7778, + "step": 6604 + }, + { + "epoch": 0.08, + "learning_rate": 3.494399703785985e-05, + "loss": 10.9303, + "step": 6606 + }, + { + "epoch": 0.08, + "learning_rate": 3.493936869388133e-05, + "loss": 4.0441, + "step": 6608 + }, + { + "epoch": 0.08, + "learning_rate": 3.4934740349902804e-05, + "loss": 0.7041, + "step": 6610 + }, + { + "epoch": 0.08, + "learning_rate": 3.493011200592428e-05, + "loss": 1.3676, + "step": 6612 + }, + { + "epoch": 0.08, + "learning_rate": 3.4925483661945755e-05, + "loss": 4.3869, + "step": 6614 + }, + { + "epoch": 0.08, + "learning_rate": 3.4920855317967234e-05, + "loss": 4.3033, + "step": 6616 + }, + { + "epoch": 0.08, + "learning_rate": 3.4916226973988706e-05, + "loss": 1.6583, + "step": 6618 + }, + { + "epoch": 0.08, + "learning_rate": 3.4911598630010185e-05, + "loss": 3.9062, + "step": 6620 + }, + { + "epoch": 0.08, + "learning_rate": 3.490697028603166e-05, + "loss": 1.0324, + "step": 6622 + }, + { + "epoch": 0.08, + "learning_rate": 3.4902341942053136e-05, + "loss": 1.6709, + "step": 6624 + }, + { + "epoch": 0.08, + "learning_rate": 3.489771359807461e-05, + "loss": 2.0905, + "step": 6626 + }, + { + "epoch": 0.08, + "learning_rate": 3.4893085254096086e-05, + "loss": 0.4015, + "step": 6628 + }, + { + "epoch": 0.08, + "learning_rate": 3.4888456910117565e-05, + "loss": 0.3686, + "step": 6630 + }, + { + "epoch": 0.08, + "learning_rate": 3.488382856613904e-05, + "loss": 4.5366, + "step": 6632 + }, + { + "epoch": 0.08, + "learning_rate": 3.4879200222160516e-05, + "loss": 1.696, + "step": 6634 + }, + { + "epoch": 0.08, + "learning_rate": 3.487457187818199e-05, + "loss": 0.6729, + "step": 6636 + }, + { + "epoch": 0.08, + "learning_rate": 3.486994353420347e-05, + "loss": 0.9021, + "step": 6638 + }, + { + "epoch": 0.08, + "learning_rate": 3.486531519022494e-05, + "loss": 1.8651, + "step": 6640 + }, + { + "epoch": 0.08, + "learning_rate": 3.486068684624642e-05, + "loss": 5.343, + "step": 6642 + }, + { + "epoch": 0.08, + "learning_rate": 3.485605850226789e-05, + "loss": 1.8399, + "step": 6644 + }, + { + "epoch": 0.08, + "learning_rate": 3.485143015828937e-05, + "loss": 2.9084, + "step": 6646 + }, + { + "epoch": 0.08, + "learning_rate": 3.484680181431084e-05, + "loss": 0.0079, + "step": 6648 + }, + { + "epoch": 0.08, + "learning_rate": 3.484217347033232e-05, + "loss": 5.2796, + "step": 6650 + }, + { + "epoch": 0.08, + "learning_rate": 3.483754512635379e-05, + "loss": 1.7186, + "step": 6652 + }, + { + "epoch": 0.08, + "learning_rate": 3.483291678237527e-05, + "loss": 3.7599, + "step": 6654 + }, + { + "epoch": 0.08, + "learning_rate": 3.482828843839674e-05, + "loss": 1.8109, + "step": 6656 + }, + { + "epoch": 0.08, + "learning_rate": 3.482366009441822e-05, + "loss": 1.8401, + "step": 6658 + }, + { + "epoch": 0.08, + "learning_rate": 3.481903175043969e-05, + "loss": 3.4504, + "step": 6660 + }, + { + "epoch": 0.08, + "learning_rate": 3.481440340646117e-05, + "loss": 1.2697, + "step": 6662 + }, + { + "epoch": 0.08, + "learning_rate": 3.4809775062482644e-05, + "loss": 2.455, + "step": 6664 + }, + { + "epoch": 0.08, + "learning_rate": 3.480514671850412e-05, + "loss": 6.1099, + "step": 6666 + }, + { + "epoch": 0.08, + "learning_rate": 3.4800518374525595e-05, + "loss": 7.3682, + "step": 6668 + }, + { + "epoch": 0.08, + "learning_rate": 3.4795890030547074e-05, + "loss": 1.1983, + "step": 6670 + }, + { + "epoch": 0.08, + "learning_rate": 3.479126168656855e-05, + "loss": 0.5042, + "step": 6672 + }, + { + "epoch": 0.08, + "learning_rate": 3.4786633342590025e-05, + "loss": 0.0694, + "step": 6674 + }, + { + "epoch": 0.08, + "learning_rate": 3.47820049986115e-05, + "loss": 2.5893, + "step": 6676 + }, + { + "epoch": 0.08, + "learning_rate": 3.4777376654632975e-05, + "loss": 4.3696, + "step": 6678 + }, + { + "epoch": 0.08, + "learning_rate": 3.4772748310654454e-05, + "loss": 3.769, + "step": 6680 + }, + { + "epoch": 0.08, + "learning_rate": 3.4768119966675926e-05, + "loss": 1.3759, + "step": 6682 + }, + { + "epoch": 0.08, + "learning_rate": 3.4763491622697405e-05, + "loss": 4.2688, + "step": 6684 + }, + { + "epoch": 0.08, + "learning_rate": 3.475886327871888e-05, + "loss": 1.6184, + "step": 6686 + }, + { + "epoch": 0.08, + "learning_rate": 3.4754234934740356e-05, + "loss": 1.2228, + "step": 6688 + }, + { + "epoch": 0.08, + "learning_rate": 3.474960659076183e-05, + "loss": 0.4686, + "step": 6690 + }, + { + "epoch": 0.08, + "learning_rate": 3.474497824678331e-05, + "loss": 3.4913, + "step": 6692 + }, + { + "epoch": 0.08, + "learning_rate": 3.474034990280478e-05, + "loss": 0.2598, + "step": 6694 + }, + { + "epoch": 0.08, + "learning_rate": 3.473572155882626e-05, + "loss": 2.1474, + "step": 6696 + }, + { + "epoch": 0.08, + "learning_rate": 3.473109321484773e-05, + "loss": 3.4515, + "step": 6698 + }, + { + "epoch": 0.08, + "learning_rate": 3.47264648708692e-05, + "loss": 0.5865, + "step": 6700 + }, + { + "epoch": 0.08, + "learning_rate": 3.472183652689068e-05, + "loss": 0.167, + "step": 6702 + }, + { + "epoch": 0.08, + "learning_rate": 3.471720818291215e-05, + "loss": 0.0917, + "step": 6704 + }, + { + "epoch": 0.08, + "learning_rate": 3.471257983893363e-05, + "loss": 4.3045, + "step": 6706 + }, + { + "epoch": 0.08, + "learning_rate": 3.47079514949551e-05, + "loss": 0.5585, + "step": 6708 + }, + { + "epoch": 0.08, + "learning_rate": 3.470332315097658e-05, + "loss": 1.9404, + "step": 6710 + }, + { + "epoch": 0.08, + "learning_rate": 3.4698694806998054e-05, + "loss": 0.7362, + "step": 6712 + }, + { + "epoch": 0.08, + "learning_rate": 3.469406646301953e-05, + "loss": 4.295, + "step": 6714 + }, + { + "epoch": 0.08, + "learning_rate": 3.4689438119041005e-05, + "loss": 7.2574, + "step": 6716 + }, + { + "epoch": 0.08, + "learning_rate": 3.4684809775062484e-05, + "loss": 8.0232, + "step": 6718 + }, + { + "epoch": 0.08, + "learning_rate": 3.4680181431083956e-05, + "loss": 2.6064, + "step": 6720 + }, + { + "epoch": 0.08, + "learning_rate": 3.4675553087105435e-05, + "loss": 0.8444, + "step": 6722 + }, + { + "epoch": 0.08, + "learning_rate": 3.467092474312691e-05, + "loss": 1.2242, + "step": 6724 + }, + { + "epoch": 0.08, + "learning_rate": 3.4666296399148386e-05, + "loss": 1.6801, + "step": 6726 + }, + { + "epoch": 0.08, + "learning_rate": 3.466166805516986e-05, + "loss": 0.8717, + "step": 6728 + }, + { + "epoch": 0.08, + "learning_rate": 3.4657039711191336e-05, + "loss": 2.9909, + "step": 6730 + }, + { + "epoch": 0.08, + "learning_rate": 3.465241136721281e-05, + "loss": 0.1373, + "step": 6732 + }, + { + "epoch": 0.08, + "learning_rate": 3.464778302323429e-05, + "loss": 1.8516, + "step": 6734 + }, + { + "epoch": 0.08, + "learning_rate": 3.464315467925576e-05, + "loss": 2.8465, + "step": 6736 + }, + { + "epoch": 0.08, + "learning_rate": 3.463852633527724e-05, + "loss": 1.2622, + "step": 6738 + }, + { + "epoch": 0.08, + "learning_rate": 3.463389799129872e-05, + "loss": 2.2624, + "step": 6740 + }, + { + "epoch": 0.08, + "learning_rate": 3.462926964732019e-05, + "loss": 4.2826, + "step": 6742 + }, + { + "epoch": 0.08, + "learning_rate": 3.462464130334167e-05, + "loss": 5.0696, + "step": 6744 + }, + { + "epoch": 0.08, + "learning_rate": 3.462001295936314e-05, + "loss": 3.732, + "step": 6746 + }, + { + "epoch": 0.08, + "learning_rate": 3.461538461538462e-05, + "loss": 3.1029, + "step": 6748 + }, + { + "epoch": 0.08, + "learning_rate": 3.461075627140609e-05, + "loss": 0.7313, + "step": 6750 + }, + { + "epoch": 0.08, + "learning_rate": 3.460612792742757e-05, + "loss": 4.4085, + "step": 6752 + }, + { + "epoch": 0.08, + "learning_rate": 3.460149958344904e-05, + "loss": 1.8729, + "step": 6754 + }, + { + "epoch": 0.08, + "learning_rate": 3.459687123947052e-05, + "loss": 0.0268, + "step": 6756 + }, + { + "epoch": 0.08, + "learning_rate": 3.459224289549199e-05, + "loss": 8.3039, + "step": 6758 + }, + { + "epoch": 0.08, + "learning_rate": 3.458761455151347e-05, + "loss": 0.2816, + "step": 6760 + }, + { + "epoch": 0.08, + "learning_rate": 3.458298620753494e-05, + "loss": 3.4315, + "step": 6762 + }, + { + "epoch": 0.08, + "learning_rate": 3.457835786355642e-05, + "loss": 2.2628, + "step": 6764 + }, + { + "epoch": 0.08, + "learning_rate": 3.4573729519577894e-05, + "loss": 2.7035, + "step": 6766 + }, + { + "epoch": 0.08, + "learning_rate": 3.456910117559937e-05, + "loss": 0.0714, + "step": 6768 + }, + { + "epoch": 0.08, + "learning_rate": 3.4564472831620845e-05, + "loss": 1.8976, + "step": 6770 + }, + { + "epoch": 0.08, + "learning_rate": 3.4559844487642324e-05, + "loss": 4.1117, + "step": 6772 + }, + { + "epoch": 0.08, + "learning_rate": 3.4555216143663796e-05, + "loss": 2.436, + "step": 6774 + }, + { + "epoch": 0.08, + "learning_rate": 3.4550587799685275e-05, + "loss": 3.1147, + "step": 6776 + }, + { + "epoch": 0.08, + "learning_rate": 3.454595945570675e-05, + "loss": 2.5759, + "step": 6778 + }, + { + "epoch": 0.08, + "learning_rate": 3.4541331111728225e-05, + "loss": 0.0236, + "step": 6780 + }, + { + "epoch": 0.08, + "learning_rate": 3.4536702767749704e-05, + "loss": 0.7883, + "step": 6782 + }, + { + "epoch": 0.08, + "learning_rate": 3.4532074423771176e-05, + "loss": 0.4863, + "step": 6784 + }, + { + "epoch": 0.08, + "learning_rate": 3.4527446079792655e-05, + "loss": 2.1716, + "step": 6786 + }, + { + "epoch": 0.08, + "learning_rate": 3.452281773581413e-05, + "loss": 0.0029, + "step": 6788 + }, + { + "epoch": 0.08, + "learning_rate": 3.4518189391835606e-05, + "loss": 2.7143, + "step": 6790 + }, + { + "epoch": 0.08, + "learning_rate": 3.451356104785708e-05, + "loss": 0.1565, + "step": 6792 + }, + { + "epoch": 0.08, + "learning_rate": 3.450893270387856e-05, + "loss": 4.1106, + "step": 6794 + }, + { + "epoch": 0.08, + "learning_rate": 3.450430435990003e-05, + "loss": 0.0971, + "step": 6796 + }, + { + "epoch": 0.08, + "learning_rate": 3.449967601592151e-05, + "loss": 4.7502, + "step": 6798 + }, + { + "epoch": 0.08, + "learning_rate": 3.449504767194298e-05, + "loss": 0.6609, + "step": 6800 + }, + { + "epoch": 0.08, + "learning_rate": 3.449041932796446e-05, + "loss": 1.0782, + "step": 6802 + }, + { + "epoch": 0.08, + "learning_rate": 3.448579098398593e-05, + "loss": 0.4513, + "step": 6804 + }, + { + "epoch": 0.08, + "learning_rate": 3.448116264000741e-05, + "loss": 2.3011, + "step": 6806 + }, + { + "epoch": 0.08, + "learning_rate": 3.447653429602888e-05, + "loss": 2.5149, + "step": 6808 + }, + { + "epoch": 0.08, + "learning_rate": 3.447190595205036e-05, + "loss": 3.5451, + "step": 6810 + }, + { + "epoch": 0.08, + "learning_rate": 3.446727760807183e-05, + "loss": 0.7672, + "step": 6812 + }, + { + "epoch": 0.08, + "learning_rate": 3.446264926409331e-05, + "loss": 3.0574, + "step": 6814 + }, + { + "epoch": 0.08, + "learning_rate": 3.445802092011478e-05, + "loss": 0.0037, + "step": 6816 + }, + { + "epoch": 0.08, + "learning_rate": 3.445339257613626e-05, + "loss": 1.3192, + "step": 6818 + }, + { + "epoch": 0.08, + "learning_rate": 3.444876423215774e-05, + "loss": 3.0898, + "step": 6820 + }, + { + "epoch": 0.08, + "learning_rate": 3.444413588817921e-05, + "loss": 7.7, + "step": 6822 + }, + { + "epoch": 0.08, + "learning_rate": 3.443950754420069e-05, + "loss": 0.3386, + "step": 6824 + }, + { + "epoch": 0.08, + "learning_rate": 3.4434879200222164e-05, + "loss": 0.0607, + "step": 6826 + }, + { + "epoch": 0.08, + "learning_rate": 3.443025085624364e-05, + "loss": 0.1084, + "step": 6828 + }, + { + "epoch": 0.08, + "learning_rate": 3.4425622512265114e-05, + "loss": 0.6104, + "step": 6830 + }, + { + "epoch": 0.08, + "learning_rate": 3.442099416828659e-05, + "loss": 5.7152, + "step": 6832 + }, + { + "epoch": 0.08, + "learning_rate": 3.4416365824308065e-05, + "loss": 5.336, + "step": 6834 + }, + { + "epoch": 0.08, + "learning_rate": 3.4411737480329544e-05, + "loss": 4.6637, + "step": 6836 + }, + { + "epoch": 0.08, + "learning_rate": 3.4407109136351016e-05, + "loss": 10.8184, + "step": 6838 + }, + { + "epoch": 0.08, + "learning_rate": 3.4402480792372495e-05, + "loss": 0.3813, + "step": 6840 + }, + { + "epoch": 0.08, + "learning_rate": 3.439785244839397e-05, + "loss": 0.9177, + "step": 6842 + }, + { + "epoch": 0.08, + "learning_rate": 3.439322410441544e-05, + "loss": 2.3462, + "step": 6844 + }, + { + "epoch": 0.08, + "learning_rate": 3.438859576043692e-05, + "loss": 2.2589, + "step": 6846 + }, + { + "epoch": 0.08, + "learning_rate": 3.438396741645839e-05, + "loss": 5.7854, + "step": 6848 + }, + { + "epoch": 0.08, + "learning_rate": 3.437933907247987e-05, + "loss": 1.9808, + "step": 6850 + }, + { + "epoch": 0.08, + "learning_rate": 3.437471072850134e-05, + "loss": 1.918, + "step": 6852 + }, + { + "epoch": 0.08, + "learning_rate": 3.437008238452282e-05, + "loss": 2.5519, + "step": 6854 + }, + { + "epoch": 0.08, + "learning_rate": 3.436545404054429e-05, + "loss": 4.5179, + "step": 6856 + }, + { + "epoch": 0.08, + "learning_rate": 3.436082569656577e-05, + "loss": 1.5449, + "step": 6858 + }, + { + "epoch": 0.08, + "learning_rate": 3.435619735258724e-05, + "loss": 0.3575, + "step": 6860 + }, + { + "epoch": 0.08, + "learning_rate": 3.435156900860872e-05, + "loss": 1.344, + "step": 6862 + }, + { + "epoch": 0.08, + "learning_rate": 3.434694066463019e-05, + "loss": 7.4997, + "step": 6864 + }, + { + "epoch": 0.08, + "learning_rate": 3.434231232065167e-05, + "loss": 0.131, + "step": 6866 + }, + { + "epoch": 0.08, + "learning_rate": 3.4337683976673144e-05, + "loss": 0.3638, + "step": 6868 + }, + { + "epoch": 0.08, + "learning_rate": 3.433305563269462e-05, + "loss": 1.4002, + "step": 6870 + }, + { + "epoch": 0.08, + "learning_rate": 3.4328427288716095e-05, + "loss": 1.0868, + "step": 6872 + }, + { + "epoch": 0.08, + "learning_rate": 3.4323798944737574e-05, + "loss": 1.301, + "step": 6874 + }, + { + "epoch": 0.08, + "learning_rate": 3.4319170600759046e-05, + "loss": 0.2701, + "step": 6876 + }, + { + "epoch": 0.08, + "learning_rate": 3.4314542256780525e-05, + "loss": 0.2271, + "step": 6878 + }, + { + "epoch": 0.08, + "learning_rate": 3.4309913912802e-05, + "loss": 1.3908, + "step": 6880 + }, + { + "epoch": 0.08, + "learning_rate": 3.4305285568823475e-05, + "loss": 3.7848, + "step": 6882 + }, + { + "epoch": 0.08, + "learning_rate": 3.4300657224844954e-05, + "loss": 1.8846, + "step": 6884 + }, + { + "epoch": 0.08, + "learning_rate": 3.4296028880866426e-05, + "loss": 0.2371, + "step": 6886 + }, + { + "epoch": 0.08, + "learning_rate": 3.4291400536887905e-05, + "loss": 2.9735, + "step": 6888 + }, + { + "epoch": 0.08, + "learning_rate": 3.428677219290938e-05, + "loss": 9.3522, + "step": 6890 + }, + { + "epoch": 0.08, + "learning_rate": 3.4282143848930856e-05, + "loss": 0.0021, + "step": 6892 + }, + { + "epoch": 0.08, + "learning_rate": 3.427751550495233e-05, + "loss": 5.1614, + "step": 6894 + }, + { + "epoch": 0.08, + "learning_rate": 3.427288716097381e-05, + "loss": 0.3718, + "step": 6896 + }, + { + "epoch": 0.08, + "learning_rate": 3.426825881699528e-05, + "loss": 1.7488, + "step": 6898 + }, + { + "epoch": 0.08, + "learning_rate": 3.426363047301676e-05, + "loss": 7.3218, + "step": 6900 + }, + { + "epoch": 0.08, + "learning_rate": 3.425900212903823e-05, + "loss": 0.0037, + "step": 6902 + }, + { + "epoch": 0.08, + "learning_rate": 3.425437378505971e-05, + "loss": 5.9834, + "step": 6904 + }, + { + "epoch": 0.08, + "learning_rate": 3.424974544108118e-05, + "loss": 4.5794, + "step": 6906 + }, + { + "epoch": 0.08, + "learning_rate": 3.424511709710266e-05, + "loss": 3.9434, + "step": 6908 + }, + { + "epoch": 0.08, + "learning_rate": 3.424048875312413e-05, + "loss": 0.0166, + "step": 6910 + }, + { + "epoch": 0.08, + "learning_rate": 3.423586040914561e-05, + "loss": 4.1739, + "step": 6912 + }, + { + "epoch": 0.08, + "learning_rate": 3.423123206516708e-05, + "loss": 3.3696, + "step": 6914 + }, + { + "epoch": 0.08, + "learning_rate": 3.422660372118856e-05, + "loss": 7.7404, + "step": 6916 + }, + { + "epoch": 0.08, + "learning_rate": 3.422197537721003e-05, + "loss": 1.3232, + "step": 6918 + }, + { + "epoch": 0.08, + "learning_rate": 3.421734703323151e-05, + "loss": 0.7196, + "step": 6920 + }, + { + "epoch": 0.08, + "learning_rate": 3.4212718689252984e-05, + "loss": 0.3499, + "step": 6922 + }, + { + "epoch": 0.08, + "learning_rate": 3.420809034527446e-05, + "loss": 0.2505, + "step": 6924 + }, + { + "epoch": 0.08, + "learning_rate": 3.420346200129594e-05, + "loss": 1.8428, + "step": 6926 + }, + { + "epoch": 0.08, + "learning_rate": 3.4198833657317414e-05, + "loss": 0.0047, + "step": 6928 + }, + { + "epoch": 0.08, + "learning_rate": 3.419420531333889e-05, + "loss": 1.9403, + "step": 6930 + }, + { + "epoch": 0.08, + "learning_rate": 3.4189576969360364e-05, + "loss": 1.2165, + "step": 6932 + }, + { + "epoch": 0.08, + "learning_rate": 3.418494862538184e-05, + "loss": 2.9158, + "step": 6934 + }, + { + "epoch": 0.08, + "learning_rate": 3.4180320281403315e-05, + "loss": 3.0777, + "step": 6936 + }, + { + "epoch": 0.08, + "learning_rate": 3.4175691937424794e-05, + "loss": 0.0241, + "step": 6938 + }, + { + "epoch": 0.08, + "learning_rate": 3.4171063593446266e-05, + "loss": 0.5876, + "step": 6940 + }, + { + "epoch": 0.08, + "learning_rate": 3.4166435249467745e-05, + "loss": 1.0737, + "step": 6942 + }, + { + "epoch": 0.08, + "learning_rate": 3.416180690548922e-05, + "loss": 6.4565, + "step": 6944 + }, + { + "epoch": 0.08, + "learning_rate": 3.4157178561510696e-05, + "loss": 0.7485, + "step": 6946 + }, + { + "epoch": 0.08, + "learning_rate": 3.415255021753217e-05, + "loss": 0.048, + "step": 6948 + }, + { + "epoch": 0.08, + "learning_rate": 3.414792187355365e-05, + "loss": 4.263, + "step": 6950 + }, + { + "epoch": 0.08, + "learning_rate": 3.414329352957512e-05, + "loss": 1.899, + "step": 6952 + }, + { + "epoch": 0.08, + "learning_rate": 3.41386651855966e-05, + "loss": 0.0323, + "step": 6954 + }, + { + "epoch": 0.08, + "learning_rate": 3.413403684161807e-05, + "loss": 0.2864, + "step": 6956 + }, + { + "epoch": 0.08, + "learning_rate": 3.412940849763955e-05, + "loss": 2.1373, + "step": 6958 + }, + { + "epoch": 0.08, + "learning_rate": 3.412478015366102e-05, + "loss": 4.2955, + "step": 6960 + }, + { + "epoch": 0.08, + "learning_rate": 3.41201518096825e-05, + "loss": 1.1327, + "step": 6962 + }, + { + "epoch": 0.08, + "learning_rate": 3.411552346570397e-05, + "loss": 9.0515, + "step": 6964 + }, + { + "epoch": 0.08, + "learning_rate": 3.411089512172545e-05, + "loss": 0.4004, + "step": 6966 + }, + { + "epoch": 0.08, + "learning_rate": 3.410626677774693e-05, + "loss": 2.7727, + "step": 6968 + }, + { + "epoch": 0.08, + "learning_rate": 3.41016384337684e-05, + "loss": 1.2299, + "step": 6970 + }, + { + "epoch": 0.08, + "learning_rate": 3.409701008978988e-05, + "loss": 5.0696, + "step": 6972 + }, + { + "epoch": 0.08, + "learning_rate": 3.409238174581135e-05, + "loss": 5.5039, + "step": 6974 + }, + { + "epoch": 0.08, + "learning_rate": 3.408775340183283e-05, + "loss": 3.7825, + "step": 6976 + }, + { + "epoch": 0.08, + "learning_rate": 3.40831250578543e-05, + "loss": 5.3389, + "step": 6978 + }, + { + "epoch": 0.08, + "learning_rate": 3.407849671387578e-05, + "loss": 0.8804, + "step": 6980 + }, + { + "epoch": 0.08, + "learning_rate": 3.4073868369897253e-05, + "loss": 1.1919, + "step": 6982 + }, + { + "epoch": 0.08, + "learning_rate": 3.406924002591873e-05, + "loss": 1.9333, + "step": 6984 + }, + { + "epoch": 0.08, + "learning_rate": 3.4064611681940204e-05, + "loss": 3.1352, + "step": 6986 + }, + { + "epoch": 0.08, + "learning_rate": 3.4059983337961676e-05, + "loss": 0.7814, + "step": 6988 + }, + { + "epoch": 0.08, + "learning_rate": 3.4055354993983155e-05, + "loss": 1.4546, + "step": 6990 + }, + { + "epoch": 0.08, + "learning_rate": 3.405072665000463e-05, + "loss": 2.9686, + "step": 6992 + }, + { + "epoch": 0.08, + "learning_rate": 3.4046098306026106e-05, + "loss": 0.3886, + "step": 6994 + }, + { + "epoch": 0.08, + "learning_rate": 3.404146996204758e-05, + "loss": 3.6217, + "step": 6996 + }, + { + "epoch": 0.08, + "learning_rate": 3.403684161806906e-05, + "loss": 1.413, + "step": 6998 + }, + { + "epoch": 0.08, + "learning_rate": 3.403221327409053e-05, + "loss": 3.1751, + "step": 7000 + }, + { + "epoch": 0.08, + "learning_rate": 3.402758493011201e-05, + "loss": 1.1151, + "step": 7002 + }, + { + "epoch": 0.08, + "learning_rate": 3.402295658613348e-05, + "loss": 0.9979, + "step": 7004 + }, + { + "epoch": 0.08, + "learning_rate": 3.401832824215496e-05, + "loss": 0.2063, + "step": 7006 + }, + { + "epoch": 0.08, + "learning_rate": 3.401369989817643e-05, + "loss": 0.0162, + "step": 7008 + }, + { + "epoch": 0.08, + "learning_rate": 3.400907155419791e-05, + "loss": 2.2038, + "step": 7010 + }, + { + "epoch": 0.08, + "learning_rate": 3.400444321021938e-05, + "loss": 2.7213, + "step": 7012 + }, + { + "epoch": 0.08, + "learning_rate": 3.399981486624086e-05, + "loss": 1.9044, + "step": 7014 + }, + { + "epoch": 0.08, + "learning_rate": 3.399518652226233e-05, + "loss": 0.0295, + "step": 7016 + }, + { + "epoch": 0.08, + "learning_rate": 3.399055817828381e-05, + "loss": 2.0632, + "step": 7018 + }, + { + "epoch": 0.08, + "learning_rate": 3.398592983430528e-05, + "loss": 6.0272, + "step": 7020 + }, + { + "epoch": 0.08, + "learning_rate": 3.398130149032676e-05, + "loss": 1.8069, + "step": 7022 + }, + { + "epoch": 0.08, + "learning_rate": 3.3976673146348234e-05, + "loss": 1.9398, + "step": 7024 + }, + { + "epoch": 0.08, + "learning_rate": 3.397204480236971e-05, + "loss": 0.9355, + "step": 7026 + }, + { + "epoch": 0.08, + "learning_rate": 3.3967416458391185e-05, + "loss": 2.5385, + "step": 7028 + }, + { + "epoch": 0.08, + "learning_rate": 3.3962788114412664e-05, + "loss": 1.2055, + "step": 7030 + }, + { + "epoch": 0.08, + "learning_rate": 3.395815977043414e-05, + "loss": 2.6746, + "step": 7032 + }, + { + "epoch": 0.08, + "learning_rate": 3.3953531426455614e-05, + "loss": 4.3458, + "step": 7034 + }, + { + "epoch": 0.08, + "learning_rate": 3.394890308247709e-05, + "loss": 0.3677, + "step": 7036 + }, + { + "epoch": 0.08, + "learning_rate": 3.3944274738498565e-05, + "loss": 1.4507, + "step": 7038 + }, + { + "epoch": 0.08, + "learning_rate": 3.3939646394520044e-05, + "loss": 1.7989, + "step": 7040 + }, + { + "epoch": 0.08, + "learning_rate": 3.3935018050541516e-05, + "loss": 1.574, + "step": 7042 + }, + { + "epoch": 0.08, + "learning_rate": 3.3930389706562995e-05, + "loss": 0.2546, + "step": 7044 + }, + { + "epoch": 0.08, + "learning_rate": 3.392576136258447e-05, + "loss": 0.9385, + "step": 7046 + }, + { + "epoch": 0.08, + "learning_rate": 3.3921133018605946e-05, + "loss": 0.0337, + "step": 7048 + }, + { + "epoch": 0.08, + "learning_rate": 3.391650467462742e-05, + "loss": 2.7575, + "step": 7050 + }, + { + "epoch": 0.08, + "learning_rate": 3.39118763306489e-05, + "loss": 1.3286, + "step": 7052 + }, + { + "epoch": 0.08, + "learning_rate": 3.390724798667037e-05, + "loss": 1.6902, + "step": 7054 + }, + { + "epoch": 0.08, + "learning_rate": 3.390261964269185e-05, + "loss": 1.3571, + "step": 7056 + }, + { + "epoch": 0.08, + "learning_rate": 3.389799129871332e-05, + "loss": 1.3362, + "step": 7058 + }, + { + "epoch": 0.08, + "learning_rate": 3.38933629547348e-05, + "loss": 6.8136, + "step": 7060 + }, + { + "epoch": 0.08, + "learning_rate": 3.388873461075627e-05, + "loss": 0.602, + "step": 7062 + }, + { + "epoch": 0.08, + "learning_rate": 3.388410626677775e-05, + "loss": 0.4026, + "step": 7064 + }, + { + "epoch": 0.08, + "learning_rate": 3.387947792279922e-05, + "loss": 2.9295, + "step": 7066 + }, + { + "epoch": 0.08, + "learning_rate": 3.38748495788207e-05, + "loss": 2.6847, + "step": 7068 + }, + { + "epoch": 0.08, + "learning_rate": 3.387022123484217e-05, + "loss": 5.6971, + "step": 7070 + }, + { + "epoch": 0.08, + "learning_rate": 3.386559289086365e-05, + "loss": 1.2851, + "step": 7072 + }, + { + "epoch": 0.08, + "learning_rate": 3.386096454688513e-05, + "loss": 0.3928, + "step": 7074 + }, + { + "epoch": 0.08, + "learning_rate": 3.38563362029066e-05, + "loss": 4.5452, + "step": 7076 + }, + { + "epoch": 0.08, + "learning_rate": 3.385170785892808e-05, + "loss": 5.7598, + "step": 7078 + }, + { + "epoch": 0.08, + "learning_rate": 3.384707951494955e-05, + "loss": 0.0005, + "step": 7080 + }, + { + "epoch": 0.08, + "learning_rate": 3.384245117097103e-05, + "loss": 4.1093, + "step": 7082 + }, + { + "epoch": 0.08, + "learning_rate": 3.3837822826992503e-05, + "loss": 2.6514, + "step": 7084 + }, + { + "epoch": 0.08, + "learning_rate": 3.383319448301398e-05, + "loss": 0.7469, + "step": 7086 + }, + { + "epoch": 0.08, + "learning_rate": 3.3828566139035454e-05, + "loss": 2.3298, + "step": 7088 + }, + { + "epoch": 0.08, + "learning_rate": 3.382393779505693e-05, + "loss": 1.93, + "step": 7090 + }, + { + "epoch": 0.08, + "learning_rate": 3.3819309451078405e-05, + "loss": 2.2738, + "step": 7092 + }, + { + "epoch": 0.08, + "learning_rate": 3.3814681107099884e-05, + "loss": 4.8404, + "step": 7094 + }, + { + "epoch": 0.08, + "learning_rate": 3.3810052763121356e-05, + "loss": 0.1941, + "step": 7096 + }, + { + "epoch": 0.08, + "learning_rate": 3.3805424419142835e-05, + "loss": 2.5031, + "step": 7098 + }, + { + "epoch": 0.08, + "learning_rate": 3.380079607516431e-05, + "loss": 3.8618, + "step": 7100 + }, + { + "epoch": 0.08, + "learning_rate": 3.3796167731185786e-05, + "loss": 1.3242, + "step": 7102 + }, + { + "epoch": 0.08, + "learning_rate": 3.379153938720726e-05, + "loss": 2.3361, + "step": 7104 + }, + { + "epoch": 0.08, + "learning_rate": 3.3786911043228737e-05, + "loss": 2.813, + "step": 7106 + }, + { + "epoch": 0.08, + "learning_rate": 3.378228269925021e-05, + "loss": 3.3334, + "step": 7108 + }, + { + "epoch": 0.08, + "learning_rate": 3.377765435527169e-05, + "loss": 0.9212, + "step": 7110 + }, + { + "epoch": 0.08, + "learning_rate": 3.3773026011293166e-05, + "loss": 0.2723, + "step": 7112 + }, + { + "epoch": 0.08, + "learning_rate": 3.376839766731464e-05, + "loss": 1.5667, + "step": 7114 + }, + { + "epoch": 0.08, + "learning_rate": 3.376376932333612e-05, + "loss": 0.1752, + "step": 7116 + }, + { + "epoch": 0.08, + "learning_rate": 3.375914097935759e-05, + "loss": 4.3975, + "step": 7118 + }, + { + "epoch": 0.08, + "learning_rate": 3.375451263537907e-05, + "loss": 1.962, + "step": 7120 + }, + { + "epoch": 0.08, + "learning_rate": 3.374988429140054e-05, + "loss": 1.7204, + "step": 7122 + }, + { + "epoch": 0.08, + "learning_rate": 3.374525594742202e-05, + "loss": 0.9031, + "step": 7124 + }, + { + "epoch": 0.08, + "learning_rate": 3.374062760344349e-05, + "loss": 1.411, + "step": 7126 + }, + { + "epoch": 0.08, + "learning_rate": 3.373599925946497e-05, + "loss": 3.4517, + "step": 7128 + }, + { + "epoch": 0.08, + "learning_rate": 3.373137091548644e-05, + "loss": 0.5046, + "step": 7130 + }, + { + "epoch": 0.08, + "learning_rate": 3.372674257150792e-05, + "loss": 1.2015, + "step": 7132 + }, + { + "epoch": 0.08, + "learning_rate": 3.3722114227529386e-05, + "loss": 0.4659, + "step": 7134 + }, + { + "epoch": 0.08, + "learning_rate": 3.3717485883550865e-05, + "loss": 3.9114, + "step": 7136 + }, + { + "epoch": 0.08, + "learning_rate": 3.371285753957234e-05, + "loss": 0.2037, + "step": 7138 + }, + { + "epoch": 0.08, + "learning_rate": 3.3708229195593815e-05, + "loss": 4.9396, + "step": 7140 + }, + { + "epoch": 0.08, + "learning_rate": 3.3703600851615294e-05, + "loss": 3.0566, + "step": 7142 + }, + { + "epoch": 0.08, + "learning_rate": 3.3698972507636766e-05, + "loss": 0.0223, + "step": 7144 + }, + { + "epoch": 0.08, + "learning_rate": 3.3694344163658245e-05, + "loss": 4.8946, + "step": 7146 + }, + { + "epoch": 0.08, + "learning_rate": 3.368971581967972e-05, + "loss": 3.2934, + "step": 7148 + }, + { + "epoch": 0.08, + "learning_rate": 3.3685087475701196e-05, + "loss": 0.902, + "step": 7150 + }, + { + "epoch": 0.08, + "learning_rate": 3.368045913172267e-05, + "loss": 1.2216, + "step": 7152 + }, + { + "epoch": 0.08, + "learning_rate": 3.367583078774415e-05, + "loss": 1.2559, + "step": 7154 + }, + { + "epoch": 0.08, + "learning_rate": 3.367120244376562e-05, + "loss": 0.7675, + "step": 7156 + }, + { + "epoch": 0.08, + "learning_rate": 3.36665740997871e-05, + "loss": 1.1895, + "step": 7158 + }, + { + "epoch": 0.08, + "learning_rate": 3.366194575580857e-05, + "loss": 2.4344, + "step": 7160 + }, + { + "epoch": 0.08, + "learning_rate": 3.365731741183005e-05, + "loss": 0.6809, + "step": 7162 + }, + { + "epoch": 0.08, + "learning_rate": 3.365268906785152e-05, + "loss": 1.7925, + "step": 7164 + }, + { + "epoch": 0.08, + "learning_rate": 3.3648060723873e-05, + "loss": 3.1567, + "step": 7166 + }, + { + "epoch": 0.08, + "learning_rate": 3.364343237989447e-05, + "loss": 1.4077, + "step": 7168 + }, + { + "epoch": 0.08, + "learning_rate": 3.363880403591595e-05, + "loss": 0.1837, + "step": 7170 + }, + { + "epoch": 0.08, + "learning_rate": 3.363417569193742e-05, + "loss": 2.6869, + "step": 7172 + }, + { + "epoch": 0.08, + "learning_rate": 3.36295473479589e-05, + "loss": 0.537, + "step": 7174 + }, + { + "epoch": 0.08, + "learning_rate": 3.362491900398037e-05, + "loss": 2.3665, + "step": 7176 + }, + { + "epoch": 0.08, + "learning_rate": 3.362029066000185e-05, + "loss": 2.6044, + "step": 7178 + }, + { + "epoch": 0.08, + "learning_rate": 3.361566231602333e-05, + "loss": 3.6506, + "step": 7180 + }, + { + "epoch": 0.08, + "learning_rate": 3.36110339720448e-05, + "loss": 4.11, + "step": 7182 + }, + { + "epoch": 0.08, + "learning_rate": 3.360640562806628e-05, + "loss": 4.7732, + "step": 7184 + }, + { + "epoch": 0.08, + "learning_rate": 3.3601777284087754e-05, + "loss": 0.306, + "step": 7186 + }, + { + "epoch": 0.08, + "learning_rate": 3.359714894010923e-05, + "loss": 7.5727, + "step": 7188 + }, + { + "epoch": 0.08, + "learning_rate": 3.3592520596130704e-05, + "loss": 0.7458, + "step": 7190 + }, + { + "epoch": 0.08, + "learning_rate": 3.358789225215218e-05, + "loss": 0.0066, + "step": 7192 + }, + { + "epoch": 0.08, + "learning_rate": 3.3583263908173655e-05, + "loss": 2.8496, + "step": 7194 + }, + { + "epoch": 0.08, + "learning_rate": 3.3578635564195134e-05, + "loss": 1.8856, + "step": 7196 + }, + { + "epoch": 0.08, + "learning_rate": 3.3574007220216606e-05, + "loss": 1.1265, + "step": 7198 + }, + { + "epoch": 0.08, + "learning_rate": 3.3569378876238085e-05, + "loss": 3.5462, + "step": 7200 + }, + { + "epoch": 0.08, + "learning_rate": 3.356475053225956e-05, + "loss": 1.3608, + "step": 7202 + }, + { + "epoch": 0.08, + "learning_rate": 3.3560122188281036e-05, + "loss": 0.9876, + "step": 7204 + }, + { + "epoch": 0.08, + "learning_rate": 3.355549384430251e-05, + "loss": 5.7284, + "step": 7206 + }, + { + "epoch": 0.08, + "learning_rate": 3.3550865500323987e-05, + "loss": 2.0203, + "step": 7208 + }, + { + "epoch": 0.08, + "learning_rate": 3.354623715634546e-05, + "loss": 0.4722, + "step": 7210 + }, + { + "epoch": 0.08, + "learning_rate": 3.354160881236694e-05, + "loss": 5.8665, + "step": 7212 + }, + { + "epoch": 0.08, + "learning_rate": 3.353698046838841e-05, + "loss": 6.9109, + "step": 7214 + }, + { + "epoch": 0.08, + "learning_rate": 3.353235212440989e-05, + "loss": 2.6135, + "step": 7216 + }, + { + "epoch": 0.08, + "learning_rate": 3.352772378043137e-05, + "loss": 1.4161, + "step": 7218 + }, + { + "epoch": 0.08, + "learning_rate": 3.352309543645284e-05, + "loss": 5.1989, + "step": 7220 + }, + { + "epoch": 0.08, + "learning_rate": 3.351846709247432e-05, + "loss": 3.1732, + "step": 7222 + }, + { + "epoch": 0.08, + "learning_rate": 3.351383874849579e-05, + "loss": 2.1165, + "step": 7224 + }, + { + "epoch": 0.08, + "learning_rate": 3.350921040451727e-05, + "loss": 2.8775, + "step": 7226 + }, + { + "epoch": 0.08, + "learning_rate": 3.350458206053874e-05, + "loss": 0.3382, + "step": 7228 + }, + { + "epoch": 0.08, + "learning_rate": 3.349995371656022e-05, + "loss": 1.7314, + "step": 7230 + }, + { + "epoch": 0.08, + "learning_rate": 3.349532537258169e-05, + "loss": 0.6708, + "step": 7232 + }, + { + "epoch": 0.08, + "learning_rate": 3.349069702860317e-05, + "loss": 3.3461, + "step": 7234 + }, + { + "epoch": 0.08, + "learning_rate": 3.348606868462464e-05, + "loss": 2.8509, + "step": 7236 + }, + { + "epoch": 0.08, + "learning_rate": 3.348144034064612e-05, + "loss": 0.0143, + "step": 7238 + }, + { + "epoch": 0.08, + "learning_rate": 3.347681199666759e-05, + "loss": 0.0074, + "step": 7240 + }, + { + "epoch": 0.08, + "learning_rate": 3.347218365268907e-05, + "loss": 3.2902, + "step": 7242 + }, + { + "epoch": 0.08, + "learning_rate": 3.3467555308710544e-05, + "loss": 4.074, + "step": 7244 + }, + { + "epoch": 0.08, + "learning_rate": 3.346292696473202e-05, + "loss": 0.3031, + "step": 7246 + }, + { + "epoch": 0.08, + "learning_rate": 3.3458298620753495e-05, + "loss": 0.0003, + "step": 7248 + }, + { + "epoch": 0.08, + "learning_rate": 3.3453670276774974e-05, + "loss": 7.7183, + "step": 7250 + }, + { + "epoch": 0.08, + "learning_rate": 3.3449041932796446e-05, + "loss": 0.1247, + "step": 7252 + }, + { + "epoch": 0.08, + "learning_rate": 3.3444413588817925e-05, + "loss": 2.4372, + "step": 7254 + }, + { + "epoch": 0.08, + "learning_rate": 3.34397852448394e-05, + "loss": 4.2571, + "step": 7256 + }, + { + "epoch": 0.08, + "learning_rate": 3.3435156900860876e-05, + "loss": 1.3378, + "step": 7258 + }, + { + "epoch": 0.08, + "learning_rate": 3.3430528556882354e-05, + "loss": 2.3891, + "step": 7260 + }, + { + "epoch": 0.08, + "learning_rate": 3.3425900212903826e-05, + "loss": 3.7999, + "step": 7262 + }, + { + "epoch": 0.08, + "learning_rate": 3.3421271868925305e-05, + "loss": 4.2878, + "step": 7264 + }, + { + "epoch": 0.08, + "learning_rate": 3.341664352494678e-05, + "loss": 1.1382, + "step": 7266 + }, + { + "epoch": 0.08, + "learning_rate": 3.3412015180968256e-05, + "loss": 0.9325, + "step": 7268 + }, + { + "epoch": 0.08, + "learning_rate": 3.340738683698973e-05, + "loss": 3.8178, + "step": 7270 + }, + { + "epoch": 0.08, + "learning_rate": 3.340275849301121e-05, + "loss": 1.6316, + "step": 7272 + }, + { + "epoch": 0.08, + "learning_rate": 3.339813014903268e-05, + "loss": 3.4476, + "step": 7274 + }, + { + "epoch": 0.08, + "learning_rate": 3.339350180505416e-05, + "loss": 1.7407, + "step": 7276 + }, + { + "epoch": 0.08, + "learning_rate": 3.338887346107562e-05, + "loss": 0.6441, + "step": 7278 + }, + { + "epoch": 0.08, + "learning_rate": 3.33842451170971e-05, + "loss": 2.0852, + "step": 7280 + }, + { + "epoch": 0.08, + "learning_rate": 3.3379616773118574e-05, + "loss": 0.0673, + "step": 7282 + }, + { + "epoch": 0.08, + "learning_rate": 3.337498842914005e-05, + "loss": 4.3759, + "step": 7284 + }, + { + "epoch": 0.08, + "learning_rate": 3.337036008516153e-05, + "loss": 4.2592, + "step": 7286 + }, + { + "epoch": 0.08, + "learning_rate": 3.3365731741183004e-05, + "loss": 0.0005, + "step": 7288 + }, + { + "epoch": 0.08, + "learning_rate": 3.336110339720448e-05, + "loss": 1.1348, + "step": 7290 + }, + { + "epoch": 0.08, + "learning_rate": 3.3356475053225954e-05, + "loss": 3.2019, + "step": 7292 + }, + { + "epoch": 0.08, + "learning_rate": 3.335184670924743e-05, + "loss": 2.9673, + "step": 7294 + }, + { + "epoch": 0.08, + "learning_rate": 3.3347218365268905e-05, + "loss": 1.0847, + "step": 7296 + }, + { + "epoch": 0.08, + "learning_rate": 3.3342590021290384e-05, + "loss": 0.0002, + "step": 7298 + }, + { + "epoch": 0.08, + "learning_rate": 3.3337961677311856e-05, + "loss": 0.9707, + "step": 7300 + }, + { + "epoch": 0.08, + "learning_rate": 3.3333333333333335e-05, + "loss": 0.2439, + "step": 7302 + }, + { + "epoch": 0.08, + "learning_rate": 3.332870498935481e-05, + "loss": 5.849, + "step": 7304 + }, + { + "epoch": 0.08, + "learning_rate": 3.3324076645376286e-05, + "loss": 4.1112, + "step": 7306 + }, + { + "epoch": 0.08, + "learning_rate": 3.331944830139776e-05, + "loss": 2.6247, + "step": 7308 + }, + { + "epoch": 0.08, + "learning_rate": 3.331481995741924e-05, + "loss": 3.208, + "step": 7310 + }, + { + "epoch": 0.08, + "learning_rate": 3.331019161344071e-05, + "loss": 1.191, + "step": 7312 + }, + { + "epoch": 0.08, + "learning_rate": 3.330556326946219e-05, + "loss": 2.5727, + "step": 7314 + }, + { + "epoch": 0.08, + "learning_rate": 3.330093492548366e-05, + "loss": 1.1109, + "step": 7316 + }, + { + "epoch": 0.08, + "learning_rate": 3.329630658150514e-05, + "loss": 0.001, + "step": 7318 + }, + { + "epoch": 0.08, + "learning_rate": 3.329167823752661e-05, + "loss": 4.876, + "step": 7320 + }, + { + "epoch": 0.08, + "learning_rate": 3.328704989354809e-05, + "loss": 2.1978, + "step": 7322 + }, + { + "epoch": 0.08, + "learning_rate": 3.328242154956956e-05, + "loss": 0.0031, + "step": 7324 + }, + { + "epoch": 0.08, + "learning_rate": 3.327779320559104e-05, + "loss": 1.7872, + "step": 7326 + }, + { + "epoch": 0.08, + "learning_rate": 3.327316486161252e-05, + "loss": 2.6104, + "step": 7328 + }, + { + "epoch": 0.08, + "learning_rate": 3.326853651763399e-05, + "loss": 2.3763, + "step": 7330 + }, + { + "epoch": 0.08, + "learning_rate": 3.326390817365547e-05, + "loss": 1.7005, + "step": 7332 + }, + { + "epoch": 0.08, + "learning_rate": 3.325927982967694e-05, + "loss": 2.6769, + "step": 7334 + }, + { + "epoch": 0.08, + "learning_rate": 3.325465148569842e-05, + "loss": 2.636, + "step": 7336 + }, + { + "epoch": 0.08, + "learning_rate": 3.325002314171989e-05, + "loss": 4.6505, + "step": 7338 + }, + { + "epoch": 0.08, + "learning_rate": 3.324539479774137e-05, + "loss": 1.8923, + "step": 7340 + }, + { + "epoch": 0.08, + "learning_rate": 3.3240766453762843e-05, + "loss": 3.2733, + "step": 7342 + }, + { + "epoch": 0.08, + "learning_rate": 3.323613810978432e-05, + "loss": 1.8884, + "step": 7344 + }, + { + "epoch": 0.08, + "learning_rate": 3.3231509765805794e-05, + "loss": 2.9984, + "step": 7346 + }, + { + "epoch": 0.08, + "learning_rate": 3.322688142182727e-05, + "loss": 0.0067, + "step": 7348 + }, + { + "epoch": 0.08, + "learning_rate": 3.3222253077848745e-05, + "loss": 2.7365, + "step": 7350 + }, + { + "epoch": 0.08, + "learning_rate": 3.3217624733870224e-05, + "loss": 0.9992, + "step": 7352 + }, + { + "epoch": 0.08, + "learning_rate": 3.3212996389891696e-05, + "loss": 0.3307, + "step": 7354 + }, + { + "epoch": 0.08, + "learning_rate": 3.3208368045913175e-05, + "loss": 5.305, + "step": 7356 + }, + { + "epoch": 0.08, + "learning_rate": 3.320373970193465e-05, + "loss": 3.4544, + "step": 7358 + }, + { + "epoch": 0.08, + "learning_rate": 3.3199111357956126e-05, + "loss": 1.553, + "step": 7360 + }, + { + "epoch": 0.08, + "learning_rate": 3.31944830139776e-05, + "loss": 1.5212, + "step": 7362 + }, + { + "epoch": 0.08, + "learning_rate": 3.3189854669999076e-05, + "loss": 6.3907, + "step": 7364 + }, + { + "epoch": 0.08, + "learning_rate": 3.3185226326020555e-05, + "loss": 0.9458, + "step": 7366 + }, + { + "epoch": 0.08, + "learning_rate": 3.318059798204203e-05, + "loss": 0.6087, + "step": 7368 + }, + { + "epoch": 0.08, + "learning_rate": 3.3175969638063506e-05, + "loss": 0.6988, + "step": 7370 + }, + { + "epoch": 0.08, + "learning_rate": 3.317134129408498e-05, + "loss": 3.1646, + "step": 7372 + }, + { + "epoch": 0.08, + "learning_rate": 3.316671295010646e-05, + "loss": 1.7048, + "step": 7374 + }, + { + "epoch": 0.08, + "learning_rate": 3.316208460612793e-05, + "loss": 4.8524, + "step": 7376 + }, + { + "epoch": 0.08, + "learning_rate": 3.315745626214941e-05, + "loss": 0.1489, + "step": 7378 + }, + { + "epoch": 0.09, + "learning_rate": 3.315282791817088e-05, + "loss": 5.9255, + "step": 7380 + }, + { + "epoch": 0.09, + "learning_rate": 3.314819957419236e-05, + "loss": 2.744, + "step": 7382 + }, + { + "epoch": 0.09, + "learning_rate": 3.314357123021383e-05, + "loss": 0.8175, + "step": 7384 + }, + { + "epoch": 0.09, + "learning_rate": 3.313894288623531e-05, + "loss": 4.4632, + "step": 7386 + }, + { + "epoch": 0.09, + "learning_rate": 3.313431454225678e-05, + "loss": 0.0195, + "step": 7388 + }, + { + "epoch": 0.09, + "learning_rate": 3.312968619827826e-05, + "loss": 4.3856, + "step": 7390 + }, + { + "epoch": 0.09, + "learning_rate": 3.312505785429973e-05, + "loss": 2.9194, + "step": 7392 + }, + { + "epoch": 0.09, + "learning_rate": 3.312042951032121e-05, + "loss": 2.8344, + "step": 7394 + }, + { + "epoch": 0.09, + "learning_rate": 3.311580116634268e-05, + "loss": 1.4813, + "step": 7396 + }, + { + "epoch": 0.09, + "learning_rate": 3.311117282236416e-05, + "loss": 5.513, + "step": 7398 + }, + { + "epoch": 0.09, + "learning_rate": 3.3106544478385634e-05, + "loss": 1.9541, + "step": 7400 + }, + { + "epoch": 0.09, + "learning_rate": 3.310191613440711e-05, + "loss": 1.632, + "step": 7402 + }, + { + "epoch": 0.09, + "learning_rate": 3.3097287790428585e-05, + "loss": 1.4746, + "step": 7404 + }, + { + "epoch": 0.09, + "learning_rate": 3.3092659446450064e-05, + "loss": 1.1526, + "step": 7406 + }, + { + "epoch": 0.09, + "learning_rate": 3.308803110247154e-05, + "loss": 1.2376, + "step": 7408 + }, + { + "epoch": 0.09, + "learning_rate": 3.3083402758493015e-05, + "loss": 1.5268, + "step": 7410 + }, + { + "epoch": 0.09, + "learning_rate": 3.3078774414514493e-05, + "loss": 3.5297, + "step": 7412 + }, + { + "epoch": 0.09, + "learning_rate": 3.3074146070535965e-05, + "loss": 4.1224, + "step": 7414 + }, + { + "epoch": 0.09, + "learning_rate": 3.3069517726557444e-05, + "loss": 0.3747, + "step": 7416 + }, + { + "epoch": 0.09, + "learning_rate": 3.3064889382578916e-05, + "loss": 0.001, + "step": 7418 + }, + { + "epoch": 0.09, + "learning_rate": 3.3060261038600395e-05, + "loss": 0.5452, + "step": 7420 + }, + { + "epoch": 0.09, + "learning_rate": 3.305563269462187e-05, + "loss": 0.515, + "step": 7422 + }, + { + "epoch": 0.09, + "learning_rate": 3.305100435064334e-05, + "loss": 0.3612, + "step": 7424 + }, + { + "epoch": 0.09, + "learning_rate": 3.304637600666481e-05, + "loss": 0.4824, + "step": 7426 + }, + { + "epoch": 0.09, + "learning_rate": 3.304174766268629e-05, + "loss": 2.3856, + "step": 7428 + }, + { + "epoch": 0.09, + "learning_rate": 3.303711931870776e-05, + "loss": 2.0039, + "step": 7430 + }, + { + "epoch": 0.09, + "learning_rate": 3.303249097472924e-05, + "loss": 2.5919, + "step": 7432 + }, + { + "epoch": 0.09, + "learning_rate": 3.302786263075072e-05, + "loss": 3.8647, + "step": 7434 + }, + { + "epoch": 0.09, + "learning_rate": 3.302323428677219e-05, + "loss": 5.0429, + "step": 7436 + }, + { + "epoch": 0.09, + "learning_rate": 3.301860594279367e-05, + "loss": 1.8329, + "step": 7438 + }, + { + "epoch": 0.09, + "learning_rate": 3.301397759881514e-05, + "loss": 5.4466, + "step": 7440 + }, + { + "epoch": 0.09, + "learning_rate": 3.300934925483662e-05, + "loss": 1.1203, + "step": 7442 + }, + { + "epoch": 0.09, + "learning_rate": 3.3004720910858093e-05, + "loss": 0.2703, + "step": 7444 + }, + { + "epoch": 0.09, + "learning_rate": 3.300009256687957e-05, + "loss": 0.3675, + "step": 7446 + }, + { + "epoch": 0.09, + "learning_rate": 3.2995464222901044e-05, + "loss": 1.6426, + "step": 7448 + }, + { + "epoch": 0.09, + "learning_rate": 3.299083587892252e-05, + "loss": 2.4336, + "step": 7450 + }, + { + "epoch": 0.09, + "learning_rate": 3.2986207534943995e-05, + "loss": 0.0036, + "step": 7452 + }, + { + "epoch": 0.09, + "learning_rate": 3.2981579190965474e-05, + "loss": 5.8081, + "step": 7454 + }, + { + "epoch": 0.09, + "learning_rate": 3.2976950846986946e-05, + "loss": 1.5859, + "step": 7456 + }, + { + "epoch": 0.09, + "learning_rate": 3.2972322503008425e-05, + "loss": 4.5346, + "step": 7458 + }, + { + "epoch": 0.09, + "learning_rate": 3.29676941590299e-05, + "loss": 2.6264, + "step": 7460 + }, + { + "epoch": 0.09, + "learning_rate": 3.2963065815051376e-05, + "loss": 1.763, + "step": 7462 + }, + { + "epoch": 0.09, + "learning_rate": 3.295843747107285e-05, + "loss": 1.2659, + "step": 7464 + }, + { + "epoch": 0.09, + "learning_rate": 3.2953809127094327e-05, + "loss": 3.7921, + "step": 7466 + }, + { + "epoch": 0.09, + "learning_rate": 3.29491807831158e-05, + "loss": 1.4024, + "step": 7468 + }, + { + "epoch": 0.09, + "learning_rate": 3.294455243913728e-05, + "loss": 3.1618, + "step": 7470 + }, + { + "epoch": 0.09, + "learning_rate": 3.2939924095158756e-05, + "loss": 0.4153, + "step": 7472 + }, + { + "epoch": 0.09, + "learning_rate": 3.293529575118023e-05, + "loss": 2.1999, + "step": 7474 + }, + { + "epoch": 0.09, + "learning_rate": 3.293066740720171e-05, + "loss": 2.9299, + "step": 7476 + }, + { + "epoch": 0.09, + "learning_rate": 3.292603906322318e-05, + "loss": 1.7345, + "step": 7478 + }, + { + "epoch": 0.09, + "learning_rate": 3.292141071924466e-05, + "loss": 0.0303, + "step": 7480 + }, + { + "epoch": 0.09, + "learning_rate": 3.291678237526613e-05, + "loss": 1.5046, + "step": 7482 + }, + { + "epoch": 0.09, + "learning_rate": 3.291215403128761e-05, + "loss": 0.1274, + "step": 7484 + }, + { + "epoch": 0.09, + "learning_rate": 3.290752568730908e-05, + "loss": 3.2658, + "step": 7486 + }, + { + "epoch": 0.09, + "learning_rate": 3.290289734333056e-05, + "loss": 0.0738, + "step": 7488 + }, + { + "epoch": 0.09, + "learning_rate": 3.289826899935203e-05, + "loss": 4.0224, + "step": 7490 + }, + { + "epoch": 0.09, + "learning_rate": 3.289364065537351e-05, + "loss": 4.4324, + "step": 7492 + }, + { + "epoch": 0.09, + "learning_rate": 3.288901231139498e-05, + "loss": 0.0003, + "step": 7494 + }, + { + "epoch": 0.09, + "learning_rate": 3.288438396741646e-05, + "loss": 3.4167, + "step": 7496 + }, + { + "epoch": 0.09, + "learning_rate": 3.287975562343793e-05, + "loss": 0.9708, + "step": 7498 + }, + { + "epoch": 0.09, + "learning_rate": 3.287512727945941e-05, + "loss": 2.3268, + "step": 7500 + }, + { + "epoch": 0.09, + "learning_rate": 3.2870498935480884e-05, + "loss": 0.0002, + "step": 7502 + }, + { + "epoch": 0.09, + "learning_rate": 3.286587059150236e-05, + "loss": 0.0009, + "step": 7504 + }, + { + "epoch": 0.09, + "learning_rate": 3.2861242247523835e-05, + "loss": 0.9279, + "step": 7506 + }, + { + "epoch": 0.09, + "learning_rate": 3.2856613903545314e-05, + "loss": 5.5095, + "step": 7508 + }, + { + "epoch": 0.09, + "learning_rate": 3.2851985559566786e-05, + "loss": 0.5624, + "step": 7510 + }, + { + "epoch": 0.09, + "learning_rate": 3.2847357215588265e-05, + "loss": 2.5446, + "step": 7512 + }, + { + "epoch": 0.09, + "learning_rate": 3.2842728871609743e-05, + "loss": 1.4876, + "step": 7514 + }, + { + "epoch": 0.09, + "learning_rate": 3.2838100527631216e-05, + "loss": 1.4801, + "step": 7516 + }, + { + "epoch": 0.09, + "learning_rate": 3.2833472183652694e-05, + "loss": 5.8203, + "step": 7518 + }, + { + "epoch": 0.09, + "learning_rate": 3.2828843839674166e-05, + "loss": 6.354, + "step": 7520 + }, + { + "epoch": 0.09, + "learning_rate": 3.2824215495695645e-05, + "loss": 7.8923, + "step": 7522 + }, + { + "epoch": 0.09, + "learning_rate": 3.281958715171712e-05, + "loss": 3.2861, + "step": 7524 + }, + { + "epoch": 0.09, + "learning_rate": 3.2814958807738596e-05, + "loss": 5.3633, + "step": 7526 + }, + { + "epoch": 0.09, + "learning_rate": 3.281033046376007e-05, + "loss": 1.9322, + "step": 7528 + }, + { + "epoch": 0.09, + "learning_rate": 3.280570211978155e-05, + "loss": 1.6802, + "step": 7530 + }, + { + "epoch": 0.09, + "learning_rate": 3.280107377580302e-05, + "loss": 0.0897, + "step": 7532 + }, + { + "epoch": 0.09, + "learning_rate": 3.27964454318245e-05, + "loss": 0.7123, + "step": 7534 + }, + { + "epoch": 0.09, + "learning_rate": 3.279181708784597e-05, + "loss": 3.0438, + "step": 7536 + }, + { + "epoch": 0.09, + "learning_rate": 3.278718874386745e-05, + "loss": 2.4621, + "step": 7538 + }, + { + "epoch": 0.09, + "learning_rate": 3.278256039988892e-05, + "loss": 2.2974, + "step": 7540 + }, + { + "epoch": 0.09, + "learning_rate": 3.27779320559104e-05, + "loss": 2.7314, + "step": 7542 + }, + { + "epoch": 0.09, + "learning_rate": 3.277330371193187e-05, + "loss": 2.5775, + "step": 7544 + }, + { + "epoch": 0.09, + "learning_rate": 3.276867536795335e-05, + "loss": 1.257, + "step": 7546 + }, + { + "epoch": 0.09, + "learning_rate": 3.276404702397482e-05, + "loss": 1.7973, + "step": 7548 + }, + { + "epoch": 0.09, + "learning_rate": 3.27594186799963e-05, + "loss": 1.2464, + "step": 7550 + }, + { + "epoch": 0.09, + "learning_rate": 3.275479033601778e-05, + "loss": 0.8818, + "step": 7552 + }, + { + "epoch": 0.09, + "learning_rate": 3.275016199203925e-05, + "loss": 2.5286, + "step": 7554 + }, + { + "epoch": 0.09, + "learning_rate": 3.274553364806073e-05, + "loss": 3.3742, + "step": 7556 + }, + { + "epoch": 0.09, + "learning_rate": 3.27409053040822e-05, + "loss": 7.9011, + "step": 7558 + }, + { + "epoch": 0.09, + "learning_rate": 3.273627696010368e-05, + "loss": 0.0036, + "step": 7560 + }, + { + "epoch": 0.09, + "learning_rate": 3.2731648616125154e-05, + "loss": 1.2091, + "step": 7562 + }, + { + "epoch": 0.09, + "learning_rate": 3.272702027214663e-05, + "loss": 3.9492, + "step": 7564 + }, + { + "epoch": 0.09, + "learning_rate": 3.2722391928168105e-05, + "loss": 1.1728, + "step": 7566 + }, + { + "epoch": 0.09, + "learning_rate": 3.2717763584189577e-05, + "loss": 1.2549, + "step": 7568 + }, + { + "epoch": 0.09, + "learning_rate": 3.271313524021105e-05, + "loss": 2.2042, + "step": 7570 + }, + { + "epoch": 0.09, + "learning_rate": 3.270850689623253e-05, + "loss": 1.6174, + "step": 7572 + }, + { + "epoch": 0.09, + "learning_rate": 3.2703878552254e-05, + "loss": 6.3657, + "step": 7574 + }, + { + "epoch": 0.09, + "learning_rate": 3.269925020827548e-05, + "loss": 6.7479, + "step": 7576 + }, + { + "epoch": 0.09, + "learning_rate": 3.269462186429696e-05, + "loss": 3.0452, + "step": 7578 + }, + { + "epoch": 0.09, + "learning_rate": 3.268999352031843e-05, + "loss": 0.9804, + "step": 7580 + }, + { + "epoch": 0.09, + "learning_rate": 3.268536517633991e-05, + "loss": 0.1887, + "step": 7582 + }, + { + "epoch": 0.09, + "learning_rate": 3.268073683236138e-05, + "loss": 0.4592, + "step": 7584 + }, + { + "epoch": 0.09, + "learning_rate": 3.267610848838286e-05, + "loss": 4.3856, + "step": 7586 + }, + { + "epoch": 0.09, + "learning_rate": 3.267148014440433e-05, + "loss": 0.8874, + "step": 7588 + }, + { + "epoch": 0.09, + "learning_rate": 3.266685180042581e-05, + "loss": 6.2125, + "step": 7590 + }, + { + "epoch": 0.09, + "learning_rate": 3.266222345644728e-05, + "loss": 2.9394, + "step": 7592 + }, + { + "epoch": 0.09, + "learning_rate": 3.265759511246876e-05, + "loss": 2.6411, + "step": 7594 + }, + { + "epoch": 0.09, + "learning_rate": 3.265296676849023e-05, + "loss": 2.8189, + "step": 7596 + }, + { + "epoch": 0.09, + "learning_rate": 3.264833842451171e-05, + "loss": 2.3853, + "step": 7598 + }, + { + "epoch": 0.09, + "learning_rate": 3.264371008053318e-05, + "loss": 2.3411, + "step": 7600 + }, + { + "epoch": 0.09, + "learning_rate": 3.263908173655466e-05, + "loss": 2.846, + "step": 7602 + }, + { + "epoch": 0.09, + "learning_rate": 3.2634453392576134e-05, + "loss": 2.1812, + "step": 7604 + }, + { + "epoch": 0.09, + "learning_rate": 3.262982504859761e-05, + "loss": 3.8997, + "step": 7606 + }, + { + "epoch": 0.09, + "learning_rate": 3.2625196704619085e-05, + "loss": 6.9646, + "step": 7608 + }, + { + "epoch": 0.09, + "learning_rate": 3.2620568360640564e-05, + "loss": 6.6754, + "step": 7610 + }, + { + "epoch": 0.09, + "learning_rate": 3.2615940016662036e-05, + "loss": 6.8442, + "step": 7612 + }, + { + "epoch": 0.09, + "learning_rate": 3.2611311672683515e-05, + "loss": 0.9615, + "step": 7614 + }, + { + "epoch": 0.09, + "learning_rate": 3.260668332870499e-05, + "loss": 1.3182, + "step": 7616 + }, + { + "epoch": 0.09, + "learning_rate": 3.2602054984726466e-05, + "loss": 0.1915, + "step": 7618 + }, + { + "epoch": 0.09, + "learning_rate": 3.2597426640747944e-05, + "loss": 0.0016, + "step": 7620 + }, + { + "epoch": 0.09, + "learning_rate": 3.2592798296769416e-05, + "loss": 0.049, + "step": 7622 + }, + { + "epoch": 0.09, + "learning_rate": 3.2588169952790895e-05, + "loss": 0.4228, + "step": 7624 + }, + { + "epoch": 0.09, + "learning_rate": 3.258354160881237e-05, + "loss": 0.2418, + "step": 7626 + }, + { + "epoch": 0.09, + "learning_rate": 3.2578913264833846e-05, + "loss": 2.0256, + "step": 7628 + }, + { + "epoch": 0.09, + "learning_rate": 3.257428492085532e-05, + "loss": 3.1484, + "step": 7630 + }, + { + "epoch": 0.09, + "learning_rate": 3.25696565768768e-05, + "loss": 8.1448, + "step": 7632 + }, + { + "epoch": 0.09, + "learning_rate": 3.256502823289827e-05, + "loss": 0.9573, + "step": 7634 + }, + { + "epoch": 0.09, + "learning_rate": 3.256039988891975e-05, + "loss": 3.5413, + "step": 7636 + }, + { + "epoch": 0.09, + "learning_rate": 3.255577154494122e-05, + "loss": 3.1948, + "step": 7638 + }, + { + "epoch": 0.09, + "learning_rate": 3.25511432009627e-05, + "loss": 0.003, + "step": 7640 + }, + { + "epoch": 0.09, + "learning_rate": 3.254651485698417e-05, + "loss": 0.0518, + "step": 7642 + }, + { + "epoch": 0.09, + "learning_rate": 3.254188651300565e-05, + "loss": 0.0551, + "step": 7644 + }, + { + "epoch": 0.09, + "learning_rate": 3.253725816902712e-05, + "loss": 1.1148, + "step": 7646 + }, + { + "epoch": 0.09, + "learning_rate": 3.25326298250486e-05, + "loss": 7.6741, + "step": 7648 + }, + { + "epoch": 0.09, + "learning_rate": 3.252800148107007e-05, + "loss": 3.7683, + "step": 7650 + }, + { + "epoch": 0.09, + "learning_rate": 3.252337313709155e-05, + "loss": 0.7159, + "step": 7652 + }, + { + "epoch": 0.09, + "learning_rate": 3.251874479311302e-05, + "loss": 5.7249, + "step": 7654 + }, + { + "epoch": 0.09, + "learning_rate": 3.25141164491345e-05, + "loss": 0.0016, + "step": 7656 + }, + { + "epoch": 0.09, + "learning_rate": 3.2509488105155974e-05, + "loss": 6.2266, + "step": 7658 + }, + { + "epoch": 0.09, + "learning_rate": 3.250485976117745e-05, + "loss": 0.8546, + "step": 7660 + }, + { + "epoch": 0.09, + "learning_rate": 3.250023141719893e-05, + "loss": 1.9061, + "step": 7662 + }, + { + "epoch": 0.09, + "learning_rate": 3.2495603073220404e-05, + "loss": 0.6941, + "step": 7664 + }, + { + "epoch": 0.09, + "learning_rate": 3.249097472924188e-05, + "loss": 0.907, + "step": 7666 + }, + { + "epoch": 0.09, + "learning_rate": 3.2486346385263355e-05, + "loss": 2.7791, + "step": 7668 + }, + { + "epoch": 0.09, + "learning_rate": 3.248171804128483e-05, + "loss": 2.7647, + "step": 7670 + }, + { + "epoch": 0.09, + "learning_rate": 3.2477089697306305e-05, + "loss": 2.8938, + "step": 7672 + }, + { + "epoch": 0.09, + "learning_rate": 3.2472461353327784e-05, + "loss": 6.2322, + "step": 7674 + }, + { + "epoch": 0.09, + "learning_rate": 3.2467833009349256e-05, + "loss": 4.5319, + "step": 7676 + }, + { + "epoch": 0.09, + "learning_rate": 3.2463204665370735e-05, + "loss": 1.3444, + "step": 7678 + }, + { + "epoch": 0.09, + "learning_rate": 3.245857632139221e-05, + "loss": 3.1963, + "step": 7680 + }, + { + "epoch": 0.09, + "learning_rate": 3.2453947977413686e-05, + "loss": 1.0827, + "step": 7682 + }, + { + "epoch": 0.09, + "learning_rate": 3.244931963343516e-05, + "loss": 0.4566, + "step": 7684 + }, + { + "epoch": 0.09, + "learning_rate": 3.244469128945664e-05, + "loss": 3.8082, + "step": 7686 + }, + { + "epoch": 0.09, + "learning_rate": 3.244006294547811e-05, + "loss": 6.3307, + "step": 7688 + }, + { + "epoch": 0.09, + "learning_rate": 3.243543460149959e-05, + "loss": 0.8508, + "step": 7690 + }, + { + "epoch": 0.09, + "learning_rate": 3.243080625752106e-05, + "loss": 1.761, + "step": 7692 + }, + { + "epoch": 0.09, + "learning_rate": 3.242617791354254e-05, + "loss": 1.5045, + "step": 7694 + }, + { + "epoch": 0.09, + "learning_rate": 3.242154956956401e-05, + "loss": 1.7304, + "step": 7696 + }, + { + "epoch": 0.09, + "learning_rate": 3.241692122558549e-05, + "loss": 4.739, + "step": 7698 + }, + { + "epoch": 0.09, + "learning_rate": 3.241229288160697e-05, + "loss": 1.3505, + "step": 7700 + }, + { + "epoch": 0.09, + "learning_rate": 3.240766453762844e-05, + "loss": 1.7968, + "step": 7702 + }, + { + "epoch": 0.09, + "learning_rate": 3.240303619364992e-05, + "loss": 2.1542, + "step": 7704 + }, + { + "epoch": 0.09, + "learning_rate": 3.239840784967139e-05, + "loss": 2.2459, + "step": 7706 + }, + { + "epoch": 0.09, + "learning_rate": 3.239377950569287e-05, + "loss": 0.0014, + "step": 7708 + }, + { + "epoch": 0.09, + "learning_rate": 3.238915116171434e-05, + "loss": 0.6826, + "step": 7710 + }, + { + "epoch": 0.09, + "learning_rate": 3.238452281773582e-05, + "loss": 1.0702, + "step": 7712 + }, + { + "epoch": 0.09, + "learning_rate": 3.2379894473757286e-05, + "loss": 1.6562, + "step": 7714 + }, + { + "epoch": 0.09, + "learning_rate": 3.2375266129778765e-05, + "loss": 0.0004, + "step": 7716 + }, + { + "epoch": 0.09, + "learning_rate": 3.237063778580024e-05, + "loss": 6.2955, + "step": 7718 + }, + { + "epoch": 0.09, + "learning_rate": 3.2366009441821716e-05, + "loss": 2.9639, + "step": 7720 + }, + { + "epoch": 0.09, + "learning_rate": 3.236138109784319e-05, + "loss": 0.2657, + "step": 7722 + }, + { + "epoch": 0.09, + "learning_rate": 3.2356752753864666e-05, + "loss": 3.5979, + "step": 7724 + }, + { + "epoch": 0.09, + "learning_rate": 3.2352124409886145e-05, + "loss": 3.7538, + "step": 7726 + }, + { + "epoch": 0.09, + "learning_rate": 3.234749606590762e-05, + "loss": 0.8819, + "step": 7728 + }, + { + "epoch": 0.09, + "learning_rate": 3.2342867721929096e-05, + "loss": 1.2886, + "step": 7730 + }, + { + "epoch": 0.09, + "learning_rate": 3.233823937795057e-05, + "loss": 2.257, + "step": 7732 + }, + { + "epoch": 0.09, + "learning_rate": 3.233361103397205e-05, + "loss": 1.3383, + "step": 7734 + }, + { + "epoch": 0.09, + "learning_rate": 3.232898268999352e-05, + "loss": 2.126, + "step": 7736 + }, + { + "epoch": 0.09, + "learning_rate": 3.2324354346015e-05, + "loss": 0.9785, + "step": 7738 + }, + { + "epoch": 0.09, + "learning_rate": 3.231972600203647e-05, + "loss": 3.6291, + "step": 7740 + }, + { + "epoch": 0.09, + "learning_rate": 3.231509765805795e-05, + "loss": 2.3421, + "step": 7742 + }, + { + "epoch": 0.09, + "learning_rate": 3.231046931407942e-05, + "loss": 6.215, + "step": 7744 + }, + { + "epoch": 0.09, + "learning_rate": 3.23058409701009e-05, + "loss": 0.2446, + "step": 7746 + }, + { + "epoch": 0.09, + "learning_rate": 3.230121262612237e-05, + "loss": 2.4229, + "step": 7748 + }, + { + "epoch": 0.09, + "learning_rate": 3.229658428214385e-05, + "loss": 1.3068, + "step": 7750 + }, + { + "epoch": 0.09, + "learning_rate": 3.229195593816532e-05, + "loss": 1.149, + "step": 7752 + }, + { + "epoch": 0.09, + "learning_rate": 3.22873275941868e-05, + "loss": 2.2274, + "step": 7754 + }, + { + "epoch": 0.09, + "learning_rate": 3.228269925020827e-05, + "loss": 1.0459, + "step": 7756 + }, + { + "epoch": 0.09, + "learning_rate": 3.227807090622975e-05, + "loss": 1.5083, + "step": 7758 + }, + { + "epoch": 0.09, + "learning_rate": 3.2273442562251224e-05, + "loss": 3.5236, + "step": 7760 + }, + { + "epoch": 0.09, + "learning_rate": 3.22688142182727e-05, + "loss": 4.633, + "step": 7762 + }, + { + "epoch": 0.09, + "learning_rate": 3.2264185874294175e-05, + "loss": 1.9962, + "step": 7764 + }, + { + "epoch": 0.09, + "learning_rate": 3.2259557530315654e-05, + "loss": 1.2083, + "step": 7766 + }, + { + "epoch": 0.09, + "learning_rate": 3.225492918633713e-05, + "loss": 0.0458, + "step": 7768 + }, + { + "epoch": 0.09, + "learning_rate": 3.2250300842358605e-05, + "loss": 0.5635, + "step": 7770 + }, + { + "epoch": 0.09, + "learning_rate": 3.2245672498380083e-05, + "loss": 4.6251, + "step": 7772 + }, + { + "epoch": 0.09, + "learning_rate": 3.2241044154401555e-05, + "loss": 0.3963, + "step": 7774 + }, + { + "epoch": 0.09, + "learning_rate": 3.2236415810423034e-05, + "loss": 1.3559, + "step": 7776 + }, + { + "epoch": 0.09, + "learning_rate": 3.2231787466444506e-05, + "loss": 6.4317, + "step": 7778 + }, + { + "epoch": 0.09, + "learning_rate": 3.2227159122465985e-05, + "loss": 6.3656, + "step": 7780 + }, + { + "epoch": 0.09, + "learning_rate": 3.222253077848746e-05, + "loss": 2.3569, + "step": 7782 + }, + { + "epoch": 0.09, + "learning_rate": 3.2217902434508936e-05, + "loss": 0.0869, + "step": 7784 + }, + { + "epoch": 0.09, + "learning_rate": 3.221327409053041e-05, + "loss": 1.3258, + "step": 7786 + }, + { + "epoch": 0.09, + "learning_rate": 3.220864574655189e-05, + "loss": 3.016, + "step": 7788 + }, + { + "epoch": 0.09, + "learning_rate": 3.220401740257336e-05, + "loss": 1.9786, + "step": 7790 + }, + { + "epoch": 0.09, + "learning_rate": 3.219938905859484e-05, + "loss": 0.078, + "step": 7792 + }, + { + "epoch": 0.09, + "learning_rate": 3.219476071461631e-05, + "loss": 3.2488, + "step": 7794 + }, + { + "epoch": 0.09, + "learning_rate": 3.219013237063779e-05, + "loss": 3.1158, + "step": 7796 + }, + { + "epoch": 0.09, + "learning_rate": 3.218550402665926e-05, + "loss": 0.369, + "step": 7798 + }, + { + "epoch": 0.09, + "learning_rate": 3.218087568268074e-05, + "loss": 3.4031, + "step": 7800 + }, + { + "epoch": 0.09, + "learning_rate": 3.217624733870221e-05, + "loss": 2.4213, + "step": 7802 + }, + { + "epoch": 0.09, + "learning_rate": 3.217161899472369e-05, + "loss": 1.7498, + "step": 7804 + }, + { + "epoch": 0.09, + "learning_rate": 3.216699065074517e-05, + "loss": 1.4593, + "step": 7806 + }, + { + "epoch": 0.09, + "learning_rate": 3.216236230676664e-05, + "loss": 0.0629, + "step": 7808 + }, + { + "epoch": 0.09, + "learning_rate": 3.215773396278812e-05, + "loss": 0.0774, + "step": 7810 + }, + { + "epoch": 0.09, + "learning_rate": 3.215310561880959e-05, + "loss": 1.5424, + "step": 7812 + }, + { + "epoch": 0.09, + "learning_rate": 3.214847727483107e-05, + "loss": 3.7143, + "step": 7814 + }, + { + "epoch": 0.09, + "learning_rate": 3.214384893085254e-05, + "loss": 0.0011, + "step": 7816 + }, + { + "epoch": 0.09, + "learning_rate": 3.213922058687402e-05, + "loss": 3.8993, + "step": 7818 + }, + { + "epoch": 0.09, + "learning_rate": 3.2134592242895494e-05, + "loss": 2.0086, + "step": 7820 + }, + { + "epoch": 0.09, + "learning_rate": 3.212996389891697e-05, + "loss": 0.0676, + "step": 7822 + }, + { + "epoch": 0.09, + "learning_rate": 3.2125335554938444e-05, + "loss": 3.4925, + "step": 7824 + }, + { + "epoch": 0.09, + "learning_rate": 3.212070721095992e-05, + "loss": 3.1057, + "step": 7826 + }, + { + "epoch": 0.09, + "learning_rate": 3.2116078866981395e-05, + "loss": 2.3035, + "step": 7828 + }, + { + "epoch": 0.09, + "learning_rate": 3.2111450523002874e-05, + "loss": 0.0057, + "step": 7830 + }, + { + "epoch": 0.09, + "learning_rate": 3.2106822179024346e-05, + "loss": 0.0014, + "step": 7832 + }, + { + "epoch": 0.09, + "learning_rate": 3.2102193835045825e-05, + "loss": 2.5449, + "step": 7834 + }, + { + "epoch": 0.09, + "learning_rate": 3.20975654910673e-05, + "loss": 7.0482, + "step": 7836 + }, + { + "epoch": 0.09, + "learning_rate": 3.2092937147088776e-05, + "loss": 1.299, + "step": 7838 + }, + { + "epoch": 0.09, + "learning_rate": 3.208830880311025e-05, + "loss": 3.0205, + "step": 7840 + }, + { + "epoch": 0.09, + "learning_rate": 3.208368045913173e-05, + "loss": 1.9514, + "step": 7842 + }, + { + "epoch": 0.09, + "learning_rate": 3.20790521151532e-05, + "loss": 0.0586, + "step": 7844 + }, + { + "epoch": 0.09, + "learning_rate": 3.207442377117468e-05, + "loss": 4.9048, + "step": 7846 + }, + { + "epoch": 0.09, + "learning_rate": 3.2069795427196156e-05, + "loss": 3.5076, + "step": 7848 + }, + { + "epoch": 0.09, + "learning_rate": 3.206516708321763e-05, + "loss": 2.3847, + "step": 7850 + }, + { + "epoch": 0.09, + "learning_rate": 3.206053873923911e-05, + "loss": 1.0066, + "step": 7852 + }, + { + "epoch": 0.09, + "learning_rate": 3.205591039526058e-05, + "loss": 0.3838, + "step": 7854 + }, + { + "epoch": 0.09, + "learning_rate": 3.205128205128206e-05, + "loss": 2.1757, + "step": 7856 + }, + { + "epoch": 0.09, + "learning_rate": 3.204665370730352e-05, + "loss": 6.9891, + "step": 7858 + }, + { + "epoch": 0.09, + "learning_rate": 3.2042025363325e-05, + "loss": 5.0924, + "step": 7860 + }, + { + "epoch": 0.09, + "learning_rate": 3.2037397019346474e-05, + "loss": 2.6594, + "step": 7862 + }, + { + "epoch": 0.09, + "learning_rate": 3.203276867536795e-05, + "loss": 4.203, + "step": 7864 + }, + { + "epoch": 0.09, + "learning_rate": 3.2028140331389425e-05, + "loss": 0.2276, + "step": 7866 + }, + { + "epoch": 0.09, + "learning_rate": 3.2023511987410904e-05, + "loss": 0.8025, + "step": 7868 + }, + { + "epoch": 0.09, + "learning_rate": 3.2018883643432376e-05, + "loss": 2.8032, + "step": 7870 + }, + { + "epoch": 0.09, + "learning_rate": 3.2014255299453855e-05, + "loss": 4.0629, + "step": 7872 + }, + { + "epoch": 0.09, + "learning_rate": 3.2009626955475333e-05, + "loss": 1.5313, + "step": 7874 + }, + { + "epoch": 0.09, + "learning_rate": 3.2004998611496805e-05, + "loss": 4.9594, + "step": 7876 + }, + { + "epoch": 0.09, + "learning_rate": 3.2000370267518284e-05, + "loss": 1.0184, + "step": 7878 + }, + { + "epoch": 0.09, + "learning_rate": 3.1995741923539756e-05, + "loss": 0.0308, + "step": 7880 + }, + { + "epoch": 0.09, + "learning_rate": 3.1991113579561235e-05, + "loss": 0.8684, + "step": 7882 + }, + { + "epoch": 0.09, + "learning_rate": 3.198648523558271e-05, + "loss": 0.8938, + "step": 7884 + }, + { + "epoch": 0.09, + "learning_rate": 3.1981856891604186e-05, + "loss": 0.9181, + "step": 7886 + }, + { + "epoch": 0.09, + "learning_rate": 3.197722854762566e-05, + "loss": 1.4285, + "step": 7888 + }, + { + "epoch": 0.09, + "learning_rate": 3.197260020364714e-05, + "loss": 5.1826, + "step": 7890 + }, + { + "epoch": 0.09, + "learning_rate": 3.196797185966861e-05, + "loss": 0.0998, + "step": 7892 + }, + { + "epoch": 0.09, + "learning_rate": 3.196334351569009e-05, + "loss": 1.434, + "step": 7894 + }, + { + "epoch": 0.09, + "learning_rate": 3.195871517171156e-05, + "loss": 0.3821, + "step": 7896 + }, + { + "epoch": 0.09, + "learning_rate": 3.195408682773304e-05, + "loss": 0.0026, + "step": 7898 + }, + { + "epoch": 0.09, + "learning_rate": 3.194945848375451e-05, + "loss": 1.3475, + "step": 7900 + }, + { + "epoch": 0.09, + "learning_rate": 3.194483013977599e-05, + "loss": 2.1203, + "step": 7902 + }, + { + "epoch": 0.09, + "learning_rate": 3.194020179579746e-05, + "loss": 2.4074, + "step": 7904 + }, + { + "epoch": 0.09, + "learning_rate": 3.193557345181894e-05, + "loss": 4.6178, + "step": 7906 + }, + { + "epoch": 0.09, + "learning_rate": 3.193094510784041e-05, + "loss": 0.5448, + "step": 7908 + }, + { + "epoch": 0.09, + "learning_rate": 3.192631676386189e-05, + "loss": 1.4482, + "step": 7910 + }, + { + "epoch": 0.09, + "learning_rate": 3.192168841988337e-05, + "loss": 3.7371, + "step": 7912 + }, + { + "epoch": 0.09, + "learning_rate": 3.191706007590484e-05, + "loss": 2.1748, + "step": 7914 + }, + { + "epoch": 0.09, + "learning_rate": 3.191243173192632e-05, + "loss": 0.9842, + "step": 7916 + }, + { + "epoch": 0.09, + "learning_rate": 3.190780338794779e-05, + "loss": 3.1469, + "step": 7918 + }, + { + "epoch": 0.09, + "learning_rate": 3.190317504396927e-05, + "loss": 2.5506, + "step": 7920 + }, + { + "epoch": 0.09, + "learning_rate": 3.1898546699990744e-05, + "loss": 0.2038, + "step": 7922 + }, + { + "epoch": 0.09, + "learning_rate": 3.189391835601222e-05, + "loss": 5.0057, + "step": 7924 + }, + { + "epoch": 0.09, + "learning_rate": 3.1889290012033695e-05, + "loss": 0.0036, + "step": 7926 + }, + { + "epoch": 0.09, + "learning_rate": 3.188466166805517e-05, + "loss": 2.5364, + "step": 7928 + }, + { + "epoch": 0.09, + "learning_rate": 3.1880033324076645e-05, + "loss": 0.0564, + "step": 7930 + }, + { + "epoch": 0.09, + "learning_rate": 3.1875404980098124e-05, + "loss": 2.7278, + "step": 7932 + }, + { + "epoch": 0.09, + "learning_rate": 3.1870776636119596e-05, + "loss": 1.5835, + "step": 7934 + }, + { + "epoch": 0.09, + "learning_rate": 3.1866148292141075e-05, + "loss": 1.9478, + "step": 7936 + }, + { + "epoch": 0.09, + "learning_rate": 3.186151994816255e-05, + "loss": 5.7761, + "step": 7938 + }, + { + "epoch": 0.09, + "learning_rate": 3.1856891604184026e-05, + "loss": 0.8422, + "step": 7940 + }, + { + "epoch": 0.09, + "learning_rate": 3.18522632602055e-05, + "loss": 0.3377, + "step": 7942 + }, + { + "epoch": 0.09, + "learning_rate": 3.184763491622698e-05, + "loss": 0.9974, + "step": 7944 + }, + { + "epoch": 0.09, + "learning_rate": 3.184300657224845e-05, + "loss": 4.1441, + "step": 7946 + }, + { + "epoch": 0.09, + "learning_rate": 3.183837822826993e-05, + "loss": 4.9168, + "step": 7948 + }, + { + "epoch": 0.09, + "learning_rate": 3.18337498842914e-05, + "loss": 0.0353, + "step": 7950 + }, + { + "epoch": 0.09, + "learning_rate": 3.182912154031288e-05, + "loss": 4.4639, + "step": 7952 + }, + { + "epoch": 0.09, + "learning_rate": 3.182449319633436e-05, + "loss": 2.6699, + "step": 7954 + }, + { + "epoch": 0.09, + "learning_rate": 3.181986485235583e-05, + "loss": 3.1607, + "step": 7956 + }, + { + "epoch": 0.09, + "learning_rate": 3.181523650837731e-05, + "loss": 0.0173, + "step": 7958 + }, + { + "epoch": 0.09, + "learning_rate": 3.181060816439878e-05, + "loss": 4.8739, + "step": 7960 + }, + { + "epoch": 0.09, + "learning_rate": 3.180597982042026e-05, + "loss": 3.1121, + "step": 7962 + }, + { + "epoch": 0.09, + "learning_rate": 3.180135147644173e-05, + "loss": 4.6104, + "step": 7964 + }, + { + "epoch": 0.09, + "learning_rate": 3.179672313246321e-05, + "loss": 3.0082, + "step": 7966 + }, + { + "epoch": 0.09, + "learning_rate": 3.179209478848468e-05, + "loss": 4.4288, + "step": 7968 + }, + { + "epoch": 0.09, + "learning_rate": 3.178746644450616e-05, + "loss": 0.5654, + "step": 7970 + }, + { + "epoch": 0.09, + "learning_rate": 3.178283810052763e-05, + "loss": 0.0025, + "step": 7972 + }, + { + "epoch": 0.09, + "learning_rate": 3.177820975654911e-05, + "loss": 0.8769, + "step": 7974 + }, + { + "epoch": 0.09, + "learning_rate": 3.1773581412570584e-05, + "loss": 0.7051, + "step": 7976 + }, + { + "epoch": 0.09, + "learning_rate": 3.176895306859206e-05, + "loss": 2.7188, + "step": 7978 + }, + { + "epoch": 0.09, + "learning_rate": 3.1764324724613534e-05, + "loss": 1.4885, + "step": 7980 + }, + { + "epoch": 0.09, + "learning_rate": 3.175969638063501e-05, + "loss": 1.6505, + "step": 7982 + }, + { + "epoch": 0.09, + "learning_rate": 3.1755068036656485e-05, + "loss": 4.7467, + "step": 7984 + }, + { + "epoch": 0.09, + "learning_rate": 3.1750439692677964e-05, + "loss": 0.0204, + "step": 7986 + }, + { + "epoch": 0.09, + "learning_rate": 3.1745811348699436e-05, + "loss": 7.0076, + "step": 7988 + }, + { + "epoch": 0.09, + "learning_rate": 3.1741183004720915e-05, + "loss": 2.5548, + "step": 7990 + }, + { + "epoch": 0.09, + "learning_rate": 3.173655466074239e-05, + "loss": 2.8503, + "step": 7992 + }, + { + "epoch": 0.09, + "learning_rate": 3.1731926316763866e-05, + "loss": 2.3065, + "step": 7994 + }, + { + "epoch": 0.09, + "learning_rate": 3.1727297972785345e-05, + "loss": 2.2659, + "step": 7996 + }, + { + "epoch": 0.09, + "learning_rate": 3.1722669628806817e-05, + "loss": 2.9816, + "step": 7998 + }, + { + "epoch": 0.09, + "learning_rate": 3.1718041284828295e-05, + "loss": 4.486, + "step": 8000 + }, + { + "epoch": 0.09, + "learning_rate": 3.171341294084977e-05, + "loss": 1.2234, + "step": 8002 + }, + { + "epoch": 0.09, + "learning_rate": 3.170878459687124e-05, + "loss": 0.0257, + "step": 8004 + }, + { + "epoch": 0.09, + "learning_rate": 3.170415625289271e-05, + "loss": 1.828, + "step": 8006 + }, + { + "epoch": 0.09, + "learning_rate": 3.169952790891419e-05, + "loss": 1.32, + "step": 8008 + }, + { + "epoch": 0.09, + "learning_rate": 3.169489956493566e-05, + "loss": 3.9426, + "step": 8010 + }, + { + "epoch": 0.09, + "learning_rate": 3.169027122095714e-05, + "loss": 2.2307, + "step": 8012 + }, + { + "epoch": 0.09, + "learning_rate": 3.168564287697861e-05, + "loss": 0.0018, + "step": 8014 + }, + { + "epoch": 0.09, + "learning_rate": 3.168101453300009e-05, + "loss": 0.0017, + "step": 8016 + }, + { + "epoch": 0.09, + "learning_rate": 3.1676386189021564e-05, + "loss": 1.6364, + "step": 8018 + }, + { + "epoch": 0.09, + "learning_rate": 3.167175784504304e-05, + "loss": 4.2082, + "step": 8020 + }, + { + "epoch": 0.09, + "learning_rate": 3.166712950106452e-05, + "loss": 1.4615, + "step": 8022 + }, + { + "epoch": 0.09, + "learning_rate": 3.1662501157085994e-05, + "loss": 1.9671, + "step": 8024 + }, + { + "epoch": 0.09, + "learning_rate": 3.165787281310747e-05, + "loss": 0.0108, + "step": 8026 + }, + { + "epoch": 0.09, + "learning_rate": 3.1653244469128945e-05, + "loss": 1.3288, + "step": 8028 + }, + { + "epoch": 0.09, + "learning_rate": 3.164861612515042e-05, + "loss": 1.6305, + "step": 8030 + }, + { + "epoch": 0.09, + "learning_rate": 3.1643987781171895e-05, + "loss": 2.206, + "step": 8032 + }, + { + "epoch": 0.09, + "learning_rate": 3.1639359437193374e-05, + "loss": 0.8777, + "step": 8034 + }, + { + "epoch": 0.09, + "learning_rate": 3.1634731093214846e-05, + "loss": 4.2899, + "step": 8036 + }, + { + "epoch": 0.09, + "learning_rate": 3.1630102749236325e-05, + "loss": 4.7501, + "step": 8038 + }, + { + "epoch": 0.09, + "learning_rate": 3.16254744052578e-05, + "loss": 1.5673, + "step": 8040 + }, + { + "epoch": 0.09, + "learning_rate": 3.1620846061279276e-05, + "loss": 6.0869, + "step": 8042 + }, + { + "epoch": 0.09, + "learning_rate": 3.161621771730075e-05, + "loss": 0.703, + "step": 8044 + }, + { + "epoch": 0.09, + "learning_rate": 3.161158937332223e-05, + "loss": 7.0084, + "step": 8046 + }, + { + "epoch": 0.09, + "learning_rate": 3.16069610293437e-05, + "loss": 1.9958, + "step": 8048 + }, + { + "epoch": 0.09, + "learning_rate": 3.160233268536518e-05, + "loss": 3.9442, + "step": 8050 + }, + { + "epoch": 0.09, + "learning_rate": 3.159770434138665e-05, + "loss": 0.0002, + "step": 8052 + }, + { + "epoch": 0.09, + "learning_rate": 3.159307599740813e-05, + "loss": 1.7507, + "step": 8054 + }, + { + "epoch": 0.09, + "learning_rate": 3.15884476534296e-05, + "loss": 0.0896, + "step": 8056 + }, + { + "epoch": 0.09, + "learning_rate": 3.158381930945108e-05, + "loss": 0.0474, + "step": 8058 + }, + { + "epoch": 0.09, + "learning_rate": 3.157919096547256e-05, + "loss": 0.4296, + "step": 8060 + }, + { + "epoch": 0.09, + "learning_rate": 3.157456262149403e-05, + "loss": 1.231, + "step": 8062 + }, + { + "epoch": 0.09, + "learning_rate": 3.156993427751551e-05, + "loss": 3.2459, + "step": 8064 + }, + { + "epoch": 0.09, + "learning_rate": 3.156530593353698e-05, + "loss": 2.3049, + "step": 8066 + }, + { + "epoch": 0.09, + "learning_rate": 3.156067758955846e-05, + "loss": 2.2557, + "step": 8068 + }, + { + "epoch": 0.09, + "learning_rate": 3.155604924557993e-05, + "loss": 0.0422, + "step": 8070 + }, + { + "epoch": 0.09, + "learning_rate": 3.155142090160141e-05, + "loss": 5.9104, + "step": 8072 + }, + { + "epoch": 0.09, + "learning_rate": 3.154679255762288e-05, + "loss": 2.7001, + "step": 8074 + }, + { + "epoch": 0.09, + "learning_rate": 3.154216421364436e-05, + "loss": 2.5937, + "step": 8076 + }, + { + "epoch": 0.09, + "learning_rate": 3.1537535869665834e-05, + "loss": 2.3223, + "step": 8078 + }, + { + "epoch": 0.09, + "learning_rate": 3.153290752568731e-05, + "loss": 2.2062, + "step": 8080 + }, + { + "epoch": 0.09, + "learning_rate": 3.1528279181708784e-05, + "loss": 4.7185, + "step": 8082 + }, + { + "epoch": 0.09, + "learning_rate": 3.152365083773026e-05, + "loss": 0.5392, + "step": 8084 + }, + { + "epoch": 0.09, + "learning_rate": 3.1519022493751735e-05, + "loss": 7.2309, + "step": 8086 + }, + { + "epoch": 0.09, + "learning_rate": 3.1514394149773214e-05, + "loss": 0.977, + "step": 8088 + }, + { + "epoch": 0.09, + "learning_rate": 3.1509765805794686e-05, + "loss": 1.7152, + "step": 8090 + }, + { + "epoch": 0.09, + "learning_rate": 3.1505137461816165e-05, + "loss": 5.0554, + "step": 8092 + }, + { + "epoch": 0.09, + "learning_rate": 3.150050911783764e-05, + "loss": 1.8791, + "step": 8094 + }, + { + "epoch": 0.09, + "learning_rate": 3.1495880773859116e-05, + "loss": 0.8723, + "step": 8096 + }, + { + "epoch": 0.09, + "learning_rate": 3.149125242988059e-05, + "loss": 1.4803, + "step": 8098 + }, + { + "epoch": 0.09, + "learning_rate": 3.1486624085902067e-05, + "loss": 1.5806, + "step": 8100 + }, + { + "epoch": 0.09, + "learning_rate": 3.1481995741923545e-05, + "loss": 0.0017, + "step": 8102 + }, + { + "epoch": 0.09, + "learning_rate": 3.147736739794502e-05, + "loss": 3.2618, + "step": 8104 + }, + { + "epoch": 0.09, + "learning_rate": 3.1472739053966496e-05, + "loss": 2.5934, + "step": 8106 + }, + { + "epoch": 0.09, + "learning_rate": 3.146811070998797e-05, + "loss": 0.0591, + "step": 8108 + }, + { + "epoch": 0.09, + "learning_rate": 3.146348236600945e-05, + "loss": 3.1082, + "step": 8110 + }, + { + "epoch": 0.09, + "learning_rate": 3.145885402203092e-05, + "loss": 0.0131, + "step": 8112 + }, + { + "epoch": 0.09, + "learning_rate": 3.14542256780524e-05, + "loss": 0.4143, + "step": 8114 + }, + { + "epoch": 0.09, + "learning_rate": 3.144959733407387e-05, + "loss": 2.0981, + "step": 8116 + }, + { + "epoch": 0.09, + "learning_rate": 3.144496899009535e-05, + "loss": 0.4566, + "step": 8118 + }, + { + "epoch": 0.09, + "learning_rate": 3.144034064611682e-05, + "loss": 3.3129, + "step": 8120 + }, + { + "epoch": 0.09, + "learning_rate": 3.14357123021383e-05, + "loss": 3.7437, + "step": 8122 + }, + { + "epoch": 0.09, + "learning_rate": 3.143108395815977e-05, + "loss": 2.561, + "step": 8124 + }, + { + "epoch": 0.09, + "learning_rate": 3.142645561418125e-05, + "loss": 0.3355, + "step": 8126 + }, + { + "epoch": 0.09, + "learning_rate": 3.142182727020272e-05, + "loss": 0.2487, + "step": 8128 + }, + { + "epoch": 0.09, + "learning_rate": 3.14171989262242e-05, + "loss": 5.1461, + "step": 8130 + }, + { + "epoch": 0.09, + "learning_rate": 3.1412570582245673e-05, + "loss": 5.9181, + "step": 8132 + }, + { + "epoch": 0.09, + "learning_rate": 3.140794223826715e-05, + "loss": 0.0027, + "step": 8134 + }, + { + "epoch": 0.09, + "learning_rate": 3.1403313894288624e-05, + "loss": 0.9831, + "step": 8136 + }, + { + "epoch": 0.09, + "learning_rate": 3.13986855503101e-05, + "loss": 3.2638, + "step": 8138 + }, + { + "epoch": 0.09, + "learning_rate": 3.139405720633158e-05, + "loss": 1.8449, + "step": 8140 + }, + { + "epoch": 0.09, + "learning_rate": 3.1389428862353054e-05, + "loss": 5.4895, + "step": 8142 + }, + { + "epoch": 0.09, + "learning_rate": 3.138480051837453e-05, + "loss": 3.4913, + "step": 8144 + }, + { + "epoch": 0.09, + "learning_rate": 3.1380172174396005e-05, + "loss": 4.3024, + "step": 8146 + }, + { + "epoch": 0.09, + "learning_rate": 3.137554383041748e-05, + "loss": 2.2776, + "step": 8148 + }, + { + "epoch": 0.09, + "learning_rate": 3.137091548643895e-05, + "loss": 0.5822, + "step": 8150 + }, + { + "epoch": 0.09, + "learning_rate": 3.136628714246043e-05, + "loss": 0.7959, + "step": 8152 + }, + { + "epoch": 0.09, + "learning_rate": 3.13616587984819e-05, + "loss": 0.5333, + "step": 8154 + }, + { + "epoch": 0.09, + "learning_rate": 3.135703045450338e-05, + "loss": 2.6373, + "step": 8156 + }, + { + "epoch": 0.09, + "learning_rate": 3.135240211052485e-05, + "loss": 1.6578, + "step": 8158 + }, + { + "epoch": 0.09, + "learning_rate": 3.134777376654633e-05, + "loss": 0.0388, + "step": 8160 + }, + { + "epoch": 0.09, + "learning_rate": 3.13431454225678e-05, + "loss": 0.0395, + "step": 8162 + }, + { + "epoch": 0.09, + "learning_rate": 3.133851707858928e-05, + "loss": 0.0823, + "step": 8164 + }, + { + "epoch": 0.09, + "learning_rate": 3.133388873461076e-05, + "loss": 1.3693, + "step": 8166 + }, + { + "epoch": 0.09, + "learning_rate": 3.132926039063223e-05, + "loss": 0.0003, + "step": 8168 + }, + { + "epoch": 0.09, + "learning_rate": 3.132463204665371e-05, + "loss": 2.4051, + "step": 8170 + }, + { + "epoch": 0.09, + "learning_rate": 3.132000370267518e-05, + "loss": 1.7586, + "step": 8172 + }, + { + "epoch": 0.09, + "learning_rate": 3.131537535869666e-05, + "loss": 8.9745, + "step": 8174 + }, + { + "epoch": 0.09, + "learning_rate": 3.131074701471813e-05, + "loss": 2.4175, + "step": 8176 + }, + { + "epoch": 0.09, + "learning_rate": 3.130611867073961e-05, + "loss": 3.6859, + "step": 8178 + }, + { + "epoch": 0.09, + "learning_rate": 3.1301490326761084e-05, + "loss": 0.1477, + "step": 8180 + }, + { + "epoch": 0.09, + "learning_rate": 3.129686198278256e-05, + "loss": 1.0666, + "step": 8182 + }, + { + "epoch": 0.09, + "learning_rate": 3.1292233638804034e-05, + "loss": 0.5813, + "step": 8184 + }, + { + "epoch": 0.09, + "learning_rate": 3.128760529482551e-05, + "loss": 0.2749, + "step": 8186 + }, + { + "epoch": 0.09, + "learning_rate": 3.1282976950846985e-05, + "loss": 5.8948, + "step": 8188 + }, + { + "epoch": 0.09, + "learning_rate": 3.1278348606868464e-05, + "loss": 5.304, + "step": 8190 + }, + { + "epoch": 0.09, + "learning_rate": 3.1273720262889936e-05, + "loss": 3.0641, + "step": 8192 + }, + { + "epoch": 0.09, + "learning_rate": 3.1269091918911415e-05, + "loss": 0.0006, + "step": 8194 + }, + { + "epoch": 0.09, + "learning_rate": 3.126446357493289e-05, + "loss": 2.217, + "step": 8196 + }, + { + "epoch": 0.09, + "learning_rate": 3.1259835230954366e-05, + "loss": 2.808, + "step": 8198 + }, + { + "epoch": 0.09, + "learning_rate": 3.125520688697584e-05, + "loss": 0.1423, + "step": 8200 + }, + { + "epoch": 0.09, + "learning_rate": 3.125057854299732e-05, + "loss": 5.0532, + "step": 8202 + }, + { + "epoch": 0.09, + "learning_rate": 3.124595019901879e-05, + "loss": 0.0093, + "step": 8204 + }, + { + "epoch": 0.09, + "learning_rate": 3.124132185504027e-05, + "loss": 1.9001, + "step": 8206 + }, + { + "epoch": 0.09, + "learning_rate": 3.1236693511061746e-05, + "loss": 1.2191, + "step": 8208 + }, + { + "epoch": 0.09, + "learning_rate": 3.123206516708322e-05, + "loss": 7.868, + "step": 8210 + }, + { + "epoch": 0.09, + "learning_rate": 3.12274368231047e-05, + "loss": 4.8843, + "step": 8212 + }, + { + "epoch": 0.09, + "learning_rate": 3.122280847912617e-05, + "loss": 0.1155, + "step": 8214 + }, + { + "epoch": 0.09, + "learning_rate": 3.121818013514765e-05, + "loss": 0.2419, + "step": 8216 + }, + { + "epoch": 0.09, + "learning_rate": 3.121355179116912e-05, + "loss": 0.7358, + "step": 8218 + }, + { + "epoch": 0.09, + "learning_rate": 3.12089234471906e-05, + "loss": 3.0502, + "step": 8220 + }, + { + "epoch": 0.09, + "learning_rate": 3.120429510321207e-05, + "loss": 0.1567, + "step": 8222 + }, + { + "epoch": 0.09, + "learning_rate": 3.119966675923355e-05, + "loss": 5.584, + "step": 8224 + }, + { + "epoch": 0.09, + "learning_rate": 3.119503841525502e-05, + "loss": 0.4278, + "step": 8226 + }, + { + "epoch": 0.09, + "learning_rate": 3.11904100712765e-05, + "loss": 0.0003, + "step": 8228 + }, + { + "epoch": 0.09, + "learning_rate": 3.118578172729797e-05, + "loss": 5.1811, + "step": 8230 + }, + { + "epoch": 0.09, + "learning_rate": 3.118115338331945e-05, + "loss": 2.6023, + "step": 8232 + }, + { + "epoch": 0.09, + "learning_rate": 3.1176525039340923e-05, + "loss": 0.0322, + "step": 8234 + }, + { + "epoch": 0.09, + "learning_rate": 3.11718966953624e-05, + "loss": 1.6061, + "step": 8236 + }, + { + "epoch": 0.09, + "learning_rate": 3.1167268351383874e-05, + "loss": 2.189, + "step": 8238 + }, + { + "epoch": 0.09, + "learning_rate": 3.116264000740535e-05, + "loss": 0.0011, + "step": 8240 + }, + { + "epoch": 0.09, + "learning_rate": 3.1158011663426825e-05, + "loss": 1.8465, + "step": 8242 + }, + { + "epoch": 0.09, + "learning_rate": 3.1153383319448304e-05, + "loss": 0.4413, + "step": 8244 + }, + { + "epoch": 0.09, + "learning_rate": 3.114875497546978e-05, + "loss": 4.1425, + "step": 8246 + }, + { + "epoch": 0.1, + "learning_rate": 3.1144126631491255e-05, + "loss": 0.0013, + "step": 8248 + }, + { + "epoch": 0.1, + "learning_rate": 3.1139498287512734e-05, + "loss": 0.1166, + "step": 8250 + }, + { + "epoch": 0.1, + "learning_rate": 3.1134869943534206e-05, + "loss": 3.2647, + "step": 8252 + }, + { + "epoch": 0.1, + "learning_rate": 3.1130241599555684e-05, + "loss": 0.5009, + "step": 8254 + }, + { + "epoch": 0.1, + "learning_rate": 3.1125613255577156e-05, + "loss": 0.0479, + "step": 8256 + }, + { + "epoch": 0.1, + "learning_rate": 3.1120984911598635e-05, + "loss": 1.7526, + "step": 8258 + }, + { + "epoch": 0.1, + "learning_rate": 3.111635656762011e-05, + "loss": 5.7401, + "step": 8260 + }, + { + "epoch": 0.1, + "learning_rate": 3.1111728223641586e-05, + "loss": 2.0462, + "step": 8262 + }, + { + "epoch": 0.1, + "learning_rate": 3.110709987966306e-05, + "loss": 2.2026, + "step": 8264 + }, + { + "epoch": 0.1, + "learning_rate": 3.110247153568454e-05, + "loss": 0.2488, + "step": 8266 + }, + { + "epoch": 0.1, + "learning_rate": 3.109784319170601e-05, + "loss": 3.4769, + "step": 8268 + }, + { + "epoch": 0.1, + "learning_rate": 3.109321484772749e-05, + "loss": 5.2134, + "step": 8270 + }, + { + "epoch": 0.1, + "learning_rate": 3.108858650374896e-05, + "loss": 1.5184, + "step": 8272 + }, + { + "epoch": 0.1, + "learning_rate": 3.108395815977044e-05, + "loss": 0.9441, + "step": 8274 + }, + { + "epoch": 0.1, + "learning_rate": 3.107932981579191e-05, + "loss": 0.2435, + "step": 8276 + }, + { + "epoch": 0.1, + "learning_rate": 3.107470147181339e-05, + "loss": 2.262, + "step": 8278 + }, + { + "epoch": 0.1, + "learning_rate": 3.107007312783486e-05, + "loss": 1.4654, + "step": 8280 + }, + { + "epoch": 0.1, + "learning_rate": 3.106544478385634e-05, + "loss": 2.0017, + "step": 8282 + }, + { + "epoch": 0.1, + "learning_rate": 3.106081643987781e-05, + "loss": 4.1003, + "step": 8284 + }, + { + "epoch": 0.1, + "learning_rate": 3.105618809589929e-05, + "loss": 6.4489, + "step": 8286 + }, + { + "epoch": 0.1, + "learning_rate": 3.105155975192077e-05, + "loss": 0.7611, + "step": 8288 + }, + { + "epoch": 0.1, + "learning_rate": 3.104693140794224e-05, + "loss": 1.8103, + "step": 8290 + }, + { + "epoch": 0.1, + "learning_rate": 3.104230306396372e-05, + "loss": 0.6592, + "step": 8292 + }, + { + "epoch": 0.1, + "learning_rate": 3.1037674719985186e-05, + "loss": 6.3593, + "step": 8294 + }, + { + "epoch": 0.1, + "learning_rate": 3.1033046376006665e-05, + "loss": 1.6152, + "step": 8296 + }, + { + "epoch": 0.1, + "learning_rate": 3.102841803202814e-05, + "loss": 3.1923, + "step": 8298 + }, + { + "epoch": 0.1, + "learning_rate": 3.1023789688049616e-05, + "loss": 5.0264, + "step": 8300 + }, + { + "epoch": 0.1, + "learning_rate": 3.101916134407109e-05, + "loss": 1.48, + "step": 8302 + }, + { + "epoch": 0.1, + "learning_rate": 3.101453300009257e-05, + "loss": 3.8563, + "step": 8304 + }, + { + "epoch": 0.1, + "learning_rate": 3.100990465611404e-05, + "loss": 2.5706, + "step": 8306 + }, + { + "epoch": 0.1, + "learning_rate": 3.100527631213552e-05, + "loss": 0.5912, + "step": 8308 + }, + { + "epoch": 0.1, + "learning_rate": 3.100064796815699e-05, + "loss": 1.1973, + "step": 8310 + }, + { + "epoch": 0.1, + "learning_rate": 3.099601962417847e-05, + "loss": 1.9169, + "step": 8312 + }, + { + "epoch": 0.1, + "learning_rate": 3.099139128019995e-05, + "loss": 1.4483, + "step": 8314 + }, + { + "epoch": 0.1, + "learning_rate": 3.098676293622142e-05, + "loss": 5.5765, + "step": 8316 + }, + { + "epoch": 0.1, + "learning_rate": 3.09821345922429e-05, + "loss": 3.1531, + "step": 8318 + }, + { + "epoch": 0.1, + "learning_rate": 3.097750624826437e-05, + "loss": 0.3868, + "step": 8320 + }, + { + "epoch": 0.1, + "learning_rate": 3.097287790428585e-05, + "loss": 3.3188, + "step": 8322 + }, + { + "epoch": 0.1, + "learning_rate": 3.096824956030732e-05, + "loss": 0.0283, + "step": 8324 + }, + { + "epoch": 0.1, + "learning_rate": 3.09636212163288e-05, + "loss": 0.3323, + "step": 8326 + }, + { + "epoch": 0.1, + "learning_rate": 3.095899287235027e-05, + "loss": 4.2733, + "step": 8328 + }, + { + "epoch": 0.1, + "learning_rate": 3.095436452837175e-05, + "loss": 4.0463, + "step": 8330 + }, + { + "epoch": 0.1, + "learning_rate": 3.094973618439322e-05, + "loss": 0.3585, + "step": 8332 + }, + { + "epoch": 0.1, + "learning_rate": 3.09451078404147e-05, + "loss": 5.7527, + "step": 8334 + }, + { + "epoch": 0.1, + "learning_rate": 3.0940479496436173e-05, + "loss": 2.2224, + "step": 8336 + }, + { + "epoch": 0.1, + "learning_rate": 3.093585115245765e-05, + "loss": 3.3552, + "step": 8338 + }, + { + "epoch": 0.1, + "learning_rate": 3.0931222808479124e-05, + "loss": 2.3438, + "step": 8340 + }, + { + "epoch": 0.1, + "learning_rate": 3.09265944645006e-05, + "loss": 1.8997, + "step": 8342 + }, + { + "epoch": 0.1, + "learning_rate": 3.0921966120522075e-05, + "loss": 0.9363, + "step": 8344 + }, + { + "epoch": 0.1, + "learning_rate": 3.0917337776543554e-05, + "loss": 2.2877, + "step": 8346 + }, + { + "epoch": 0.1, + "learning_rate": 3.0912709432565026e-05, + "loss": 1.2183, + "step": 8348 + }, + { + "epoch": 0.1, + "learning_rate": 3.0908081088586505e-05, + "loss": 0.8037, + "step": 8350 + }, + { + "epoch": 0.1, + "learning_rate": 3.090345274460798e-05, + "loss": 1.7496, + "step": 8352 + }, + { + "epoch": 0.1, + "learning_rate": 3.0898824400629456e-05, + "loss": 6.7742, + "step": 8354 + }, + { + "epoch": 0.1, + "learning_rate": 3.0894196056650935e-05, + "loss": 2.9634, + "step": 8356 + }, + { + "epoch": 0.1, + "learning_rate": 3.0889567712672407e-05, + "loss": 4.8109, + "step": 8358 + }, + { + "epoch": 0.1, + "learning_rate": 3.0884939368693885e-05, + "loss": 1.7785, + "step": 8360 + }, + { + "epoch": 0.1, + "learning_rate": 3.088031102471536e-05, + "loss": 4.4072, + "step": 8362 + }, + { + "epoch": 0.1, + "learning_rate": 3.0875682680736836e-05, + "loss": 2.5084, + "step": 8364 + }, + { + "epoch": 0.1, + "learning_rate": 3.087105433675831e-05, + "loss": 1.679, + "step": 8366 + }, + { + "epoch": 0.1, + "learning_rate": 3.086642599277979e-05, + "loss": 2.9228, + "step": 8368 + }, + { + "epoch": 0.1, + "learning_rate": 3.086179764880126e-05, + "loss": 4.0702, + "step": 8370 + }, + { + "epoch": 0.1, + "learning_rate": 3.085716930482274e-05, + "loss": 0.9584, + "step": 8372 + }, + { + "epoch": 0.1, + "learning_rate": 3.085254096084421e-05, + "loss": 2.5312, + "step": 8374 + }, + { + "epoch": 0.1, + "learning_rate": 3.084791261686569e-05, + "loss": 2.9919, + "step": 8376 + }, + { + "epoch": 0.1, + "learning_rate": 3.084328427288716e-05, + "loss": 2.0623, + "step": 8378 + }, + { + "epoch": 0.1, + "learning_rate": 3.083865592890864e-05, + "loss": 0.3563, + "step": 8380 + }, + { + "epoch": 0.1, + "learning_rate": 3.083402758493011e-05, + "loss": 0.9352, + "step": 8382 + }, + { + "epoch": 0.1, + "learning_rate": 3.082939924095159e-05, + "loss": 0.0363, + "step": 8384 + }, + { + "epoch": 0.1, + "learning_rate": 3.082477089697306e-05, + "loss": 4.4284, + "step": 8386 + }, + { + "epoch": 0.1, + "learning_rate": 3.082014255299454e-05, + "loss": 6.0555, + "step": 8388 + }, + { + "epoch": 0.1, + "learning_rate": 3.081551420901601e-05, + "loss": 1.7491, + "step": 8390 + }, + { + "epoch": 0.1, + "learning_rate": 3.081088586503749e-05, + "loss": 4.2498, + "step": 8392 + }, + { + "epoch": 0.1, + "learning_rate": 3.080625752105897e-05, + "loss": 6.2035, + "step": 8394 + }, + { + "epoch": 0.1, + "learning_rate": 3.080162917708044e-05, + "loss": 0.2129, + "step": 8396 + }, + { + "epoch": 0.1, + "learning_rate": 3.079700083310192e-05, + "loss": 1.7508, + "step": 8398 + }, + { + "epoch": 0.1, + "learning_rate": 3.0792372489123394e-05, + "loss": 1.4388, + "step": 8400 + }, + { + "epoch": 0.1, + "learning_rate": 3.078774414514487e-05, + "loss": 1.2908, + "step": 8402 + }, + { + "epoch": 0.1, + "learning_rate": 3.0783115801166345e-05, + "loss": 0.7245, + "step": 8404 + }, + { + "epoch": 0.1, + "learning_rate": 3.0778487457187824e-05, + "loss": 0.912, + "step": 8406 + }, + { + "epoch": 0.1, + "learning_rate": 3.0773859113209296e-05, + "loss": 5.0618, + "step": 8408 + }, + { + "epoch": 0.1, + "learning_rate": 3.0769230769230774e-05, + "loss": 0.0153, + "step": 8410 + }, + { + "epoch": 0.1, + "learning_rate": 3.0764602425252246e-05, + "loss": 0.2026, + "step": 8412 + }, + { + "epoch": 0.1, + "learning_rate": 3.0759974081273725e-05, + "loss": 2.4413, + "step": 8414 + }, + { + "epoch": 0.1, + "learning_rate": 3.07553457372952e-05, + "loss": 2.3155, + "step": 8416 + }, + { + "epoch": 0.1, + "learning_rate": 3.0750717393316676e-05, + "loss": 2.0893, + "step": 8418 + }, + { + "epoch": 0.1, + "learning_rate": 3.074608904933815e-05, + "loss": 5.1146, + "step": 8420 + }, + { + "epoch": 0.1, + "learning_rate": 3.074146070535963e-05, + "loss": 6.236, + "step": 8422 + }, + { + "epoch": 0.1, + "learning_rate": 3.07368323613811e-05, + "loss": 3.1347, + "step": 8424 + }, + { + "epoch": 0.1, + "learning_rate": 3.073220401740258e-05, + "loss": 2.2774, + "step": 8426 + }, + { + "epoch": 0.1, + "learning_rate": 3.072757567342405e-05, + "loss": 4.4451, + "step": 8428 + }, + { + "epoch": 0.1, + "learning_rate": 3.072294732944553e-05, + "loss": 6.6853, + "step": 8430 + }, + { + "epoch": 0.1, + "learning_rate": 3.0718318985467e-05, + "loss": 1.6249, + "step": 8432 + }, + { + "epoch": 0.1, + "learning_rate": 3.071369064148848e-05, + "loss": 1.3372, + "step": 8434 + }, + { + "epoch": 0.1, + "learning_rate": 3.070906229750996e-05, + "loss": 0.5749, + "step": 8436 + }, + { + "epoch": 0.1, + "learning_rate": 3.0704433953531424e-05, + "loss": 0.4462, + "step": 8438 + }, + { + "epoch": 0.1, + "learning_rate": 3.06998056095529e-05, + "loss": 0.9555, + "step": 8440 + }, + { + "epoch": 0.1, + "learning_rate": 3.0695177265574374e-05, + "loss": 0.7869, + "step": 8442 + }, + { + "epoch": 0.1, + "learning_rate": 3.069054892159585e-05, + "loss": 3.6525, + "step": 8444 + }, + { + "epoch": 0.1, + "learning_rate": 3.0685920577617325e-05, + "loss": 3.6249, + "step": 8446 + }, + { + "epoch": 0.1, + "learning_rate": 3.0681292233638804e-05, + "loss": 0.0006, + "step": 8448 + }, + { + "epoch": 0.1, + "learning_rate": 3.0676663889660276e-05, + "loss": 1.2922, + "step": 8450 + }, + { + "epoch": 0.1, + "learning_rate": 3.0672035545681755e-05, + "loss": 1.0289, + "step": 8452 + }, + { + "epoch": 0.1, + "learning_rate": 3.066740720170323e-05, + "loss": 6.1991, + "step": 8454 + }, + { + "epoch": 0.1, + "learning_rate": 3.0662778857724706e-05, + "loss": 0.3071, + "step": 8456 + }, + { + "epoch": 0.1, + "learning_rate": 3.065815051374618e-05, + "loss": 2.1744, + "step": 8458 + }, + { + "epoch": 0.1, + "learning_rate": 3.0653522169767657e-05, + "loss": 1.9806, + "step": 8460 + }, + { + "epoch": 0.1, + "learning_rate": 3.0648893825789135e-05, + "loss": 0.5833, + "step": 8462 + }, + { + "epoch": 0.1, + "learning_rate": 3.064426548181061e-05, + "loss": 3.3545, + "step": 8464 + }, + { + "epoch": 0.1, + "learning_rate": 3.0639637137832086e-05, + "loss": 1.5395, + "step": 8466 + }, + { + "epoch": 0.1, + "learning_rate": 3.063500879385356e-05, + "loss": 2.1006, + "step": 8468 + }, + { + "epoch": 0.1, + "learning_rate": 3.063038044987504e-05, + "loss": 0.1467, + "step": 8470 + }, + { + "epoch": 0.1, + "learning_rate": 3.062575210589651e-05, + "loss": 2.8177, + "step": 8472 + }, + { + "epoch": 0.1, + "learning_rate": 3.062112376191799e-05, + "loss": 1.004, + "step": 8474 + }, + { + "epoch": 0.1, + "learning_rate": 3.061649541793946e-05, + "loss": 1.4883, + "step": 8476 + }, + { + "epoch": 0.1, + "learning_rate": 3.061186707396094e-05, + "loss": 0.5784, + "step": 8478 + }, + { + "epoch": 0.1, + "learning_rate": 3.060723872998241e-05, + "loss": 0.006, + "step": 8480 + }, + { + "epoch": 0.1, + "learning_rate": 3.060261038600389e-05, + "loss": 0.0005, + "step": 8482 + }, + { + "epoch": 0.1, + "learning_rate": 3.059798204202536e-05, + "loss": 0.0816, + "step": 8484 + }, + { + "epoch": 0.1, + "learning_rate": 3.059335369804684e-05, + "loss": 0.0325, + "step": 8486 + }, + { + "epoch": 0.1, + "learning_rate": 3.058872535406831e-05, + "loss": 1.665, + "step": 8488 + }, + { + "epoch": 0.1, + "learning_rate": 3.058409701008979e-05, + "loss": 6.3894, + "step": 8490 + }, + { + "epoch": 0.1, + "learning_rate": 3.057946866611126e-05, + "loss": 0.0003, + "step": 8492 + }, + { + "epoch": 0.1, + "learning_rate": 3.057484032213274e-05, + "loss": 2.6149, + "step": 8494 + }, + { + "epoch": 0.1, + "learning_rate": 3.0570211978154214e-05, + "loss": 0.9966, + "step": 8496 + }, + { + "epoch": 0.1, + "learning_rate": 3.056558363417569e-05, + "loss": 4.2451, + "step": 8498 + }, + { + "epoch": 0.1, + "learning_rate": 3.056095529019717e-05, + "loss": 2.0741, + "step": 8500 + }, + { + "epoch": 0.1, + "learning_rate": 3.0556326946218644e-05, + "loss": 0.0004, + "step": 8502 + }, + { + "epoch": 0.1, + "learning_rate": 3.055169860224012e-05, + "loss": 1.3691, + "step": 8504 + }, + { + "epoch": 0.1, + "learning_rate": 3.0547070258261595e-05, + "loss": 2.3849, + "step": 8506 + }, + { + "epoch": 0.1, + "learning_rate": 3.0542441914283074e-05, + "loss": 1.4019, + "step": 8508 + }, + { + "epoch": 0.1, + "learning_rate": 3.0537813570304546e-05, + "loss": 0.0646, + "step": 8510 + }, + { + "epoch": 0.1, + "learning_rate": 3.0533185226326024e-05, + "loss": 2.5545, + "step": 8512 + }, + { + "epoch": 0.1, + "learning_rate": 3.0528556882347496e-05, + "loss": 1.0982, + "step": 8514 + }, + { + "epoch": 0.1, + "learning_rate": 3.0523928538368975e-05, + "loss": 7.2217, + "step": 8516 + }, + { + "epoch": 0.1, + "learning_rate": 3.051930019439045e-05, + "loss": 1.1519, + "step": 8518 + }, + { + "epoch": 0.1, + "learning_rate": 3.0514671850411926e-05, + "loss": 0.9134, + "step": 8520 + }, + { + "epoch": 0.1, + "learning_rate": 3.05100435064334e-05, + "loss": 2.7636, + "step": 8522 + }, + { + "epoch": 0.1, + "learning_rate": 3.0505415162454877e-05, + "loss": 0.0745, + "step": 8524 + }, + { + "epoch": 0.1, + "learning_rate": 3.0500786818476352e-05, + "loss": 3.6174, + "step": 8526 + }, + { + "epoch": 0.1, + "learning_rate": 3.0496158474497828e-05, + "loss": 4.757, + "step": 8528 + }, + { + "epoch": 0.1, + "learning_rate": 3.0491530130519303e-05, + "loss": 2.1492, + "step": 8530 + }, + { + "epoch": 0.1, + "learning_rate": 3.048690178654078e-05, + "loss": 0.0002, + "step": 8532 + }, + { + "epoch": 0.1, + "learning_rate": 3.0482273442562254e-05, + "loss": 3.0494, + "step": 8534 + }, + { + "epoch": 0.1, + "learning_rate": 3.047764509858373e-05, + "loss": 3.8122, + "step": 8536 + }, + { + "epoch": 0.1, + "learning_rate": 3.0473016754605205e-05, + "loss": 1.2335, + "step": 8538 + }, + { + "epoch": 0.1, + "learning_rate": 3.046838841062668e-05, + "loss": 2.2388, + "step": 8540 + }, + { + "epoch": 0.1, + "learning_rate": 3.0463760066648156e-05, + "loss": 0.8869, + "step": 8542 + }, + { + "epoch": 0.1, + "learning_rate": 3.045913172266963e-05, + "loss": 3.7308, + "step": 8544 + }, + { + "epoch": 0.1, + "learning_rate": 3.0454503378691107e-05, + "loss": 0.2315, + "step": 8546 + }, + { + "epoch": 0.1, + "learning_rate": 3.0449875034712582e-05, + "loss": 3.8689, + "step": 8548 + }, + { + "epoch": 0.1, + "learning_rate": 3.0445246690734057e-05, + "loss": 2.5855, + "step": 8550 + }, + { + "epoch": 0.1, + "learning_rate": 3.0440618346755533e-05, + "loss": 3.0806, + "step": 8552 + }, + { + "epoch": 0.1, + "learning_rate": 3.043599000277701e-05, + "loss": 1.4234, + "step": 8554 + }, + { + "epoch": 0.1, + "learning_rate": 3.0431361658798484e-05, + "loss": 3.9798, + "step": 8556 + }, + { + "epoch": 0.1, + "learning_rate": 3.042673331481996e-05, + "loss": 4.0061, + "step": 8558 + }, + { + "epoch": 0.1, + "learning_rate": 3.0422104970841438e-05, + "loss": 4.0273, + "step": 8560 + }, + { + "epoch": 0.1, + "learning_rate": 3.0417476626862913e-05, + "loss": 1.7658, + "step": 8562 + }, + { + "epoch": 0.1, + "learning_rate": 3.041284828288439e-05, + "loss": 1.1159, + "step": 8564 + }, + { + "epoch": 0.1, + "learning_rate": 3.0408219938905864e-05, + "loss": 1.5655, + "step": 8566 + }, + { + "epoch": 0.1, + "learning_rate": 3.040359159492734e-05, + "loss": 4.1492, + "step": 8568 + }, + { + "epoch": 0.1, + "learning_rate": 3.0398963250948815e-05, + "loss": 1.8282, + "step": 8570 + }, + { + "epoch": 0.1, + "learning_rate": 3.039433490697029e-05, + "loss": 3.0555, + "step": 8572 + }, + { + "epoch": 0.1, + "learning_rate": 3.0389706562991766e-05, + "loss": 2.3157, + "step": 8574 + }, + { + "epoch": 0.1, + "learning_rate": 3.038507821901324e-05, + "loss": 2.9472, + "step": 8576 + }, + { + "epoch": 0.1, + "learning_rate": 3.0380449875034717e-05, + "loss": 1.0291, + "step": 8578 + }, + { + "epoch": 0.1, + "learning_rate": 3.0375821531056192e-05, + "loss": 0.8681, + "step": 8580 + }, + { + "epoch": 0.1, + "learning_rate": 3.0371193187077668e-05, + "loss": 2.9629, + "step": 8582 + }, + { + "epoch": 0.1, + "learning_rate": 3.0366564843099136e-05, + "loss": 3.7028, + "step": 8584 + }, + { + "epoch": 0.1, + "learning_rate": 3.0361936499120615e-05, + "loss": 1.1608, + "step": 8586 + }, + { + "epoch": 0.1, + "learning_rate": 3.035730815514209e-05, + "loss": 1.9605, + "step": 8588 + }, + { + "epoch": 0.1, + "learning_rate": 3.0352679811163566e-05, + "loss": 4.3788, + "step": 8590 + }, + { + "epoch": 0.1, + "learning_rate": 3.034805146718504e-05, + "loss": 2.5573, + "step": 8592 + }, + { + "epoch": 0.1, + "learning_rate": 3.0343423123206517e-05, + "loss": 5.0749, + "step": 8594 + }, + { + "epoch": 0.1, + "learning_rate": 3.0338794779227992e-05, + "loss": 2.3817, + "step": 8596 + }, + { + "epoch": 0.1, + "learning_rate": 3.0334166435249468e-05, + "loss": 1.3812, + "step": 8598 + }, + { + "epoch": 0.1, + "learning_rate": 3.0329538091270943e-05, + "loss": 1.0008, + "step": 8600 + }, + { + "epoch": 0.1, + "learning_rate": 3.032490974729242e-05, + "loss": 0.776, + "step": 8602 + }, + { + "epoch": 0.1, + "learning_rate": 3.0320281403313894e-05, + "loss": 0.9474, + "step": 8604 + }, + { + "epoch": 0.1, + "learning_rate": 3.031565305933537e-05, + "loss": 1.5358, + "step": 8606 + }, + { + "epoch": 0.1, + "learning_rate": 3.0311024715356845e-05, + "loss": 0.4011, + "step": 8608 + }, + { + "epoch": 0.1, + "learning_rate": 3.030639637137832e-05, + "loss": 0.6522, + "step": 8610 + }, + { + "epoch": 0.1, + "learning_rate": 3.0301768027399796e-05, + "loss": 0.2305, + "step": 8612 + }, + { + "epoch": 0.1, + "learning_rate": 3.029713968342127e-05, + "loss": 0.5493, + "step": 8614 + }, + { + "epoch": 0.1, + "learning_rate": 3.0292511339442746e-05, + "loss": 6.2125, + "step": 8616 + }, + { + "epoch": 0.1, + "learning_rate": 3.0287882995464222e-05, + "loss": 1.9507, + "step": 8618 + }, + { + "epoch": 0.1, + "learning_rate": 3.0283254651485697e-05, + "loss": 0.006, + "step": 8620 + }, + { + "epoch": 0.1, + "learning_rate": 3.0278626307507173e-05, + "loss": 0.5366, + "step": 8622 + }, + { + "epoch": 0.1, + "learning_rate": 3.0273997963528648e-05, + "loss": 2.3751, + "step": 8624 + }, + { + "epoch": 0.1, + "learning_rate": 3.0269369619550127e-05, + "loss": 0.0048, + "step": 8626 + }, + { + "epoch": 0.1, + "learning_rate": 3.0264741275571602e-05, + "loss": 6.6239, + "step": 8628 + }, + { + "epoch": 0.1, + "learning_rate": 3.0260112931593078e-05, + "loss": 0.0148, + "step": 8630 + }, + { + "epoch": 0.1, + "learning_rate": 3.0255484587614553e-05, + "loss": 3.9663, + "step": 8632 + }, + { + "epoch": 0.1, + "learning_rate": 3.025085624363603e-05, + "loss": 8.3482, + "step": 8634 + }, + { + "epoch": 0.1, + "learning_rate": 3.0246227899657504e-05, + "loss": 4.7238, + "step": 8636 + }, + { + "epoch": 0.1, + "learning_rate": 3.024159955567898e-05, + "loss": 7.1077, + "step": 8638 + }, + { + "epoch": 0.1, + "learning_rate": 3.0236971211700455e-05, + "loss": 2.7613, + "step": 8640 + }, + { + "epoch": 0.1, + "learning_rate": 3.023234286772193e-05, + "loss": 5.9056, + "step": 8642 + }, + { + "epoch": 0.1, + "learning_rate": 3.0227714523743406e-05, + "loss": 0.002, + "step": 8644 + }, + { + "epoch": 0.1, + "learning_rate": 3.022308617976488e-05, + "loss": 0.5234, + "step": 8646 + }, + { + "epoch": 0.1, + "learning_rate": 3.0218457835786357e-05, + "loss": 2.3803, + "step": 8648 + }, + { + "epoch": 0.1, + "learning_rate": 3.0213829491807832e-05, + "loss": 1.6532, + "step": 8650 + }, + { + "epoch": 0.1, + "learning_rate": 3.0209201147829308e-05, + "loss": 2.0159, + "step": 8652 + }, + { + "epoch": 0.1, + "learning_rate": 3.0204572803850783e-05, + "loss": 1.7929, + "step": 8654 + }, + { + "epoch": 0.1, + "learning_rate": 3.019994445987226e-05, + "loss": 0.2551, + "step": 8656 + }, + { + "epoch": 0.1, + "learning_rate": 3.0195316115893734e-05, + "loss": 1.0449, + "step": 8658 + }, + { + "epoch": 0.1, + "learning_rate": 3.019068777191521e-05, + "loss": 1.9425, + "step": 8660 + }, + { + "epoch": 0.1, + "learning_rate": 3.0186059427936685e-05, + "loss": 1.2776, + "step": 8662 + }, + { + "epoch": 0.1, + "learning_rate": 3.018143108395816e-05, + "loss": 0.2275, + "step": 8664 + }, + { + "epoch": 0.1, + "learning_rate": 3.0176802739979635e-05, + "loss": 0.6151, + "step": 8666 + }, + { + "epoch": 0.1, + "learning_rate": 3.0172174396001114e-05, + "loss": 5.2266, + "step": 8668 + }, + { + "epoch": 0.1, + "learning_rate": 3.016754605202259e-05, + "loss": 2.0167, + "step": 8670 + }, + { + "epoch": 0.1, + "learning_rate": 3.0162917708044065e-05, + "loss": 4.5322, + "step": 8672 + }, + { + "epoch": 0.1, + "learning_rate": 3.015828936406554e-05, + "loss": 2.6235, + "step": 8674 + }, + { + "epoch": 0.1, + "learning_rate": 3.0153661020087016e-05, + "loss": 0.0141, + "step": 8676 + }, + { + "epoch": 0.1, + "learning_rate": 3.014903267610849e-05, + "loss": 4.3783, + "step": 8678 + }, + { + "epoch": 0.1, + "learning_rate": 3.0144404332129967e-05, + "loss": 7.0674, + "step": 8680 + }, + { + "epoch": 0.1, + "learning_rate": 3.0139775988151442e-05, + "loss": 2.242, + "step": 8682 + }, + { + "epoch": 0.1, + "learning_rate": 3.0135147644172918e-05, + "loss": 2.026, + "step": 8684 + }, + { + "epoch": 0.1, + "learning_rate": 3.0130519300194393e-05, + "loss": 0.5642, + "step": 8686 + }, + { + "epoch": 0.1, + "learning_rate": 3.012589095621587e-05, + "loss": 3.2346, + "step": 8688 + }, + { + "epoch": 0.1, + "learning_rate": 3.0121262612237344e-05, + "loss": 1.7129, + "step": 8690 + }, + { + "epoch": 0.1, + "learning_rate": 3.011663426825882e-05, + "loss": 0.848, + "step": 8692 + }, + { + "epoch": 0.1, + "learning_rate": 3.0112005924280295e-05, + "loss": 0.137, + "step": 8694 + }, + { + "epoch": 0.1, + "learning_rate": 3.010737758030177e-05, + "loss": 0.8701, + "step": 8696 + }, + { + "epoch": 0.1, + "learning_rate": 3.0102749236323246e-05, + "loss": 4.2081, + "step": 8698 + }, + { + "epoch": 0.1, + "learning_rate": 3.009812089234472e-05, + "loss": 1.0684, + "step": 8700 + }, + { + "epoch": 0.1, + "learning_rate": 3.0093492548366197e-05, + "loss": 0.9675, + "step": 8702 + }, + { + "epoch": 0.1, + "learning_rate": 3.0088864204387672e-05, + "loss": 1.0502, + "step": 8704 + }, + { + "epoch": 0.1, + "learning_rate": 3.0084235860409147e-05, + "loss": 3.2765, + "step": 8706 + }, + { + "epoch": 0.1, + "learning_rate": 3.0079607516430626e-05, + "loss": 2.1821, + "step": 8708 + }, + { + "epoch": 0.1, + "learning_rate": 3.00749791724521e-05, + "loss": 1.1262, + "step": 8710 + }, + { + "epoch": 0.1, + "learning_rate": 3.0070350828473577e-05, + "loss": 0.045, + "step": 8712 + }, + { + "epoch": 0.1, + "learning_rate": 3.0065722484495052e-05, + "loss": 0.0179, + "step": 8714 + }, + { + "epoch": 0.1, + "learning_rate": 3.0061094140516528e-05, + "loss": 0.9384, + "step": 8716 + }, + { + "epoch": 0.1, + "learning_rate": 3.0056465796538003e-05, + "loss": 1.9537, + "step": 8718 + }, + { + "epoch": 0.1, + "learning_rate": 3.005183745255948e-05, + "loss": 3.5294, + "step": 8720 + }, + { + "epoch": 0.1, + "learning_rate": 3.0047209108580954e-05, + "loss": 2.6424, + "step": 8722 + }, + { + "epoch": 0.1, + "learning_rate": 3.004258076460243e-05, + "loss": 1.122, + "step": 8724 + }, + { + "epoch": 0.1, + "learning_rate": 3.0037952420623905e-05, + "loss": 4.2658, + "step": 8726 + }, + { + "epoch": 0.1, + "learning_rate": 3.0033324076645374e-05, + "loss": 3.839, + "step": 8728 + }, + { + "epoch": 0.1, + "learning_rate": 3.002869573266685e-05, + "loss": 0.0003, + "step": 8730 + }, + { + "epoch": 0.1, + "learning_rate": 3.0024067388688324e-05, + "loss": 0.5661, + "step": 8732 + }, + { + "epoch": 0.1, + "learning_rate": 3.0019439044709803e-05, + "loss": 5.1147, + "step": 8734 + }, + { + "epoch": 0.1, + "learning_rate": 3.001481070073128e-05, + "loss": 4.5178, + "step": 8736 + }, + { + "epoch": 0.1, + "learning_rate": 3.0010182356752754e-05, + "loss": 1.9922, + "step": 8738 + }, + { + "epoch": 0.1, + "learning_rate": 3.000555401277423e-05, + "loss": 2.6919, + "step": 8740 + }, + { + "epoch": 0.1, + "learning_rate": 3.0000925668795705e-05, + "loss": 6.1803, + "step": 8742 + }, + { + "epoch": 0.1, + "learning_rate": 2.999629732481718e-05, + "loss": 4.4818, + "step": 8744 + }, + { + "epoch": 0.1, + "learning_rate": 2.9991668980838656e-05, + "loss": 0.92, + "step": 8746 + }, + { + "epoch": 0.1, + "learning_rate": 2.998704063686013e-05, + "loss": 2.3221, + "step": 8748 + }, + { + "epoch": 0.1, + "learning_rate": 2.9982412292881607e-05, + "loss": 4.035, + "step": 8750 + }, + { + "epoch": 0.1, + "learning_rate": 2.9977783948903082e-05, + "loss": 2.1704, + "step": 8752 + }, + { + "epoch": 0.1, + "learning_rate": 2.9973155604924558e-05, + "loss": 0.3782, + "step": 8754 + }, + { + "epoch": 0.1, + "learning_rate": 2.9968527260946033e-05, + "loss": 3.8531, + "step": 8756 + }, + { + "epoch": 0.1, + "learning_rate": 2.996389891696751e-05, + "loss": 3.9853, + "step": 8758 + }, + { + "epoch": 0.1, + "learning_rate": 2.9959270572988984e-05, + "loss": 1.3531, + "step": 8760 + }, + { + "epoch": 0.1, + "learning_rate": 2.995464222901046e-05, + "loss": 2.8588, + "step": 8762 + }, + { + "epoch": 0.1, + "learning_rate": 2.9950013885031935e-05, + "loss": 0.8375, + "step": 8764 + }, + { + "epoch": 0.1, + "learning_rate": 2.994538554105341e-05, + "loss": 0.0221, + "step": 8766 + }, + { + "epoch": 0.1, + "learning_rate": 2.9940757197074886e-05, + "loss": 1.1433, + "step": 8768 + }, + { + "epoch": 0.1, + "learning_rate": 2.993612885309636e-05, + "loss": 1.965, + "step": 8770 + }, + { + "epoch": 0.1, + "learning_rate": 2.9931500509117836e-05, + "loss": 2.3925, + "step": 8772 + }, + { + "epoch": 0.1, + "learning_rate": 2.9926872165139315e-05, + "loss": 2.5807, + "step": 8774 + }, + { + "epoch": 0.1, + "learning_rate": 2.992224382116079e-05, + "loss": 2.6032, + "step": 8776 + }, + { + "epoch": 0.1, + "learning_rate": 2.9917615477182266e-05, + "loss": 5.8284, + "step": 8778 + }, + { + "epoch": 0.1, + "learning_rate": 2.991298713320374e-05, + "loss": 2.644, + "step": 8780 + }, + { + "epoch": 0.1, + "learning_rate": 2.9908358789225217e-05, + "loss": 0.5209, + "step": 8782 + }, + { + "epoch": 0.1, + "learning_rate": 2.9903730445246692e-05, + "loss": 4.0014, + "step": 8784 + }, + { + "epoch": 0.1, + "learning_rate": 2.9899102101268168e-05, + "loss": 2.3262, + "step": 8786 + }, + { + "epoch": 0.1, + "learning_rate": 2.9894473757289643e-05, + "loss": 2.6605, + "step": 8788 + }, + { + "epoch": 0.1, + "learning_rate": 2.988984541331112e-05, + "loss": 0.9207, + "step": 8790 + }, + { + "epoch": 0.1, + "learning_rate": 2.9885217069332594e-05, + "loss": 1.551, + "step": 8792 + }, + { + "epoch": 0.1, + "learning_rate": 2.988058872535407e-05, + "loss": 0.1716, + "step": 8794 + }, + { + "epoch": 0.1, + "learning_rate": 2.9875960381375545e-05, + "loss": 0.086, + "step": 8796 + }, + { + "epoch": 0.1, + "learning_rate": 2.987133203739702e-05, + "loss": 2.2918, + "step": 8798 + }, + { + "epoch": 0.1, + "learning_rate": 2.9866703693418496e-05, + "loss": 0.9051, + "step": 8800 + }, + { + "epoch": 0.1, + "learning_rate": 2.986207534943997e-05, + "loss": 1.5911, + "step": 8802 + }, + { + "epoch": 0.1, + "learning_rate": 2.9857447005461447e-05, + "loss": 0.2114, + "step": 8804 + }, + { + "epoch": 0.1, + "learning_rate": 2.9852818661482922e-05, + "loss": 1.3804, + "step": 8806 + }, + { + "epoch": 0.1, + "learning_rate": 2.9848190317504397e-05, + "loss": 0.0005, + "step": 8808 + }, + { + "epoch": 0.1, + "learning_rate": 2.9843561973525873e-05, + "loss": 0.5363, + "step": 8810 + }, + { + "epoch": 0.1, + "learning_rate": 2.9838933629547348e-05, + "loss": 1.3634, + "step": 8812 + }, + { + "epoch": 0.1, + "learning_rate": 2.9834305285568827e-05, + "loss": 4.1716, + "step": 8814 + }, + { + "epoch": 0.1, + "learning_rate": 2.9829676941590302e-05, + "loss": 0.9799, + "step": 8816 + }, + { + "epoch": 0.1, + "learning_rate": 2.9825048597611778e-05, + "loss": 0.2878, + "step": 8818 + }, + { + "epoch": 0.1, + "learning_rate": 2.9820420253633253e-05, + "loss": 0.9264, + "step": 8820 + }, + { + "epoch": 0.1, + "learning_rate": 2.981579190965473e-05, + "loss": 0.0007, + "step": 8822 + }, + { + "epoch": 0.1, + "learning_rate": 2.9811163565676204e-05, + "loss": 5.956, + "step": 8824 + }, + { + "epoch": 0.1, + "learning_rate": 2.980653522169768e-05, + "loss": 5.9671, + "step": 8826 + }, + { + "epoch": 0.1, + "learning_rate": 2.9801906877719155e-05, + "loss": 0.0964, + "step": 8828 + }, + { + "epoch": 0.1, + "learning_rate": 2.979727853374063e-05, + "loss": 0.0004, + "step": 8830 + }, + { + "epoch": 0.1, + "learning_rate": 2.9792650189762106e-05, + "loss": 0.6169, + "step": 8832 + }, + { + "epoch": 0.1, + "learning_rate": 2.978802184578358e-05, + "loss": 0.0013, + "step": 8834 + }, + { + "epoch": 0.1, + "learning_rate": 2.9783393501805057e-05, + "loss": 1.0167, + "step": 8836 + }, + { + "epoch": 0.1, + "learning_rate": 2.9778765157826532e-05, + "loss": 2.9469, + "step": 8838 + }, + { + "epoch": 0.1, + "learning_rate": 2.9774136813848008e-05, + "loss": 0.0006, + "step": 8840 + }, + { + "epoch": 0.1, + "learning_rate": 2.9769508469869483e-05, + "loss": 1.0241, + "step": 8842 + }, + { + "epoch": 0.1, + "learning_rate": 2.976488012589096e-05, + "loss": 8.7453, + "step": 8844 + }, + { + "epoch": 0.1, + "learning_rate": 2.9760251781912434e-05, + "loss": 1.7243, + "step": 8846 + }, + { + "epoch": 0.1, + "learning_rate": 2.975562343793391e-05, + "loss": 2.9527, + "step": 8848 + }, + { + "epoch": 0.1, + "learning_rate": 2.9750995093955385e-05, + "loss": 1.2472, + "step": 8850 + }, + { + "epoch": 0.1, + "learning_rate": 2.974636674997686e-05, + "loss": 0.0023, + "step": 8852 + }, + { + "epoch": 0.1, + "learning_rate": 2.974173840599834e-05, + "loss": 3.3608, + "step": 8854 + }, + { + "epoch": 0.1, + "learning_rate": 2.9737110062019814e-05, + "loss": 0.2277, + "step": 8856 + }, + { + "epoch": 0.1, + "learning_rate": 2.973248171804129e-05, + "loss": 5.9623, + "step": 8858 + }, + { + "epoch": 0.1, + "learning_rate": 2.9727853374062765e-05, + "loss": 0.0256, + "step": 8860 + }, + { + "epoch": 0.1, + "learning_rate": 2.972322503008424e-05, + "loss": 1.715, + "step": 8862 + }, + { + "epoch": 0.1, + "learning_rate": 2.9718596686105716e-05, + "loss": 0.1588, + "step": 8864 + }, + { + "epoch": 0.1, + "learning_rate": 2.971396834212719e-05, + "loss": 1.5474, + "step": 8866 + }, + { + "epoch": 0.1, + "learning_rate": 2.9709339998148667e-05, + "loss": 4.8032, + "step": 8868 + }, + { + "epoch": 0.1, + "learning_rate": 2.9704711654170142e-05, + "loss": 4.3789, + "step": 8870 + }, + { + "epoch": 0.1, + "learning_rate": 2.9700083310191618e-05, + "loss": 0.0108, + "step": 8872 + }, + { + "epoch": 0.1, + "learning_rate": 2.9695454966213086e-05, + "loss": 1.2991, + "step": 8874 + }, + { + "epoch": 0.1, + "learning_rate": 2.9690826622234562e-05, + "loss": 0.1839, + "step": 8876 + }, + { + "epoch": 0.1, + "learning_rate": 2.9686198278256037e-05, + "loss": 0.3276, + "step": 8878 + }, + { + "epoch": 0.1, + "learning_rate": 2.9681569934277516e-05, + "loss": 2.9986, + "step": 8880 + }, + { + "epoch": 0.1, + "learning_rate": 2.967694159029899e-05, + "loss": 2.5504, + "step": 8882 + }, + { + "epoch": 0.1, + "learning_rate": 2.9672313246320467e-05, + "loss": 1.7614, + "step": 8884 + }, + { + "epoch": 0.1, + "learning_rate": 2.9667684902341942e-05, + "loss": 0.9056, + "step": 8886 + }, + { + "epoch": 0.1, + "learning_rate": 2.9663056558363418e-05, + "loss": 3.0661, + "step": 8888 + }, + { + "epoch": 0.1, + "learning_rate": 2.9658428214384893e-05, + "loss": 0.0546, + "step": 8890 + }, + { + "epoch": 0.1, + "learning_rate": 2.965379987040637e-05, + "loss": 0.6598, + "step": 8892 + }, + { + "epoch": 0.1, + "learning_rate": 2.9649171526427844e-05, + "loss": 1.5218, + "step": 8894 + }, + { + "epoch": 0.1, + "learning_rate": 2.964454318244932e-05, + "loss": 0.5795, + "step": 8896 + }, + { + "epoch": 0.1, + "learning_rate": 2.9639914838470795e-05, + "loss": 5.3184, + "step": 8898 + }, + { + "epoch": 0.1, + "learning_rate": 2.963528649449227e-05, + "loss": 2.5297, + "step": 8900 + }, + { + "epoch": 0.1, + "learning_rate": 2.9630658150513746e-05, + "loss": 1.7257, + "step": 8902 + }, + { + "epoch": 0.1, + "learning_rate": 2.962602980653522e-05, + "loss": 1.3044, + "step": 8904 + }, + { + "epoch": 0.1, + "learning_rate": 2.9621401462556697e-05, + "loss": 1.6708, + "step": 8906 + }, + { + "epoch": 0.1, + "learning_rate": 2.9616773118578172e-05, + "loss": 0.9601, + "step": 8908 + }, + { + "epoch": 0.1, + "learning_rate": 2.9612144774599647e-05, + "loss": 1.4011, + "step": 8910 + }, + { + "epoch": 0.1, + "learning_rate": 2.9607516430621123e-05, + "loss": 1.8843, + "step": 8912 + }, + { + "epoch": 0.1, + "learning_rate": 2.9602888086642598e-05, + "loss": 2.9137, + "step": 8914 + }, + { + "epoch": 0.1, + "learning_rate": 2.9598259742664074e-05, + "loss": 2.0101, + "step": 8916 + }, + { + "epoch": 0.1, + "learning_rate": 2.959363139868555e-05, + "loss": 2.5016, + "step": 8918 + }, + { + "epoch": 0.1, + "learning_rate": 2.9589003054707028e-05, + "loss": 4.2511, + "step": 8920 + }, + { + "epoch": 0.1, + "learning_rate": 2.9584374710728503e-05, + "loss": 4.6908, + "step": 8922 + }, + { + "epoch": 0.1, + "learning_rate": 2.957974636674998e-05, + "loss": 5.4428, + "step": 8924 + }, + { + "epoch": 0.1, + "learning_rate": 2.9575118022771454e-05, + "loss": 0.1247, + "step": 8926 + }, + { + "epoch": 0.1, + "learning_rate": 2.957048967879293e-05, + "loss": 1.5471, + "step": 8928 + }, + { + "epoch": 0.1, + "learning_rate": 2.9565861334814405e-05, + "loss": 1.0406, + "step": 8930 + }, + { + "epoch": 0.1, + "learning_rate": 2.956123299083588e-05, + "loss": 0.2986, + "step": 8932 + }, + { + "epoch": 0.1, + "learning_rate": 2.9556604646857356e-05, + "loss": 1.3935, + "step": 8934 + }, + { + "epoch": 0.1, + "learning_rate": 2.955197630287883e-05, + "loss": 3.3782, + "step": 8936 + }, + { + "epoch": 0.1, + "learning_rate": 2.9547347958900307e-05, + "loss": 1.3272, + "step": 8938 + }, + { + "epoch": 0.1, + "learning_rate": 2.9542719614921782e-05, + "loss": 2.7043, + "step": 8940 + }, + { + "epoch": 0.1, + "learning_rate": 2.9538091270943258e-05, + "loss": 2.0393, + "step": 8942 + }, + { + "epoch": 0.1, + "learning_rate": 2.9533462926964733e-05, + "loss": 0.0067, + "step": 8944 + }, + { + "epoch": 0.1, + "learning_rate": 2.952883458298621e-05, + "loss": 6.0855, + "step": 8946 + }, + { + "epoch": 0.1, + "learning_rate": 2.9524206239007684e-05, + "loss": 3.7209, + "step": 8948 + }, + { + "epoch": 0.1, + "learning_rate": 2.951957789502916e-05, + "loss": 2.4002, + "step": 8950 + }, + { + "epoch": 0.1, + "learning_rate": 2.9514949551050635e-05, + "loss": 2.6307, + "step": 8952 + }, + { + "epoch": 0.1, + "learning_rate": 2.951032120707211e-05, + "loss": 3.7952, + "step": 8954 + }, + { + "epoch": 0.1, + "learning_rate": 2.9505692863093586e-05, + "loss": 0.002, + "step": 8956 + }, + { + "epoch": 0.1, + "learning_rate": 2.950106451911506e-05, + "loss": 3.7827, + "step": 8958 + }, + { + "epoch": 0.1, + "learning_rate": 2.9496436175136536e-05, + "loss": 0.3493, + "step": 8960 + }, + { + "epoch": 0.1, + "learning_rate": 2.9491807831158015e-05, + "loss": 0.4392, + "step": 8962 + }, + { + "epoch": 0.1, + "learning_rate": 2.948717948717949e-05, + "loss": 1.1464, + "step": 8964 + }, + { + "epoch": 0.1, + "learning_rate": 2.9482551143200966e-05, + "loss": 0.0856, + "step": 8966 + }, + { + "epoch": 0.1, + "learning_rate": 2.947792279922244e-05, + "loss": 6.4099, + "step": 8968 + }, + { + "epoch": 0.1, + "learning_rate": 2.9473294455243917e-05, + "loss": 2.5856, + "step": 8970 + }, + { + "epoch": 0.1, + "learning_rate": 2.9468666111265392e-05, + "loss": 5.3615, + "step": 8972 + }, + { + "epoch": 0.1, + "learning_rate": 2.9464037767286868e-05, + "loss": 0.7539, + "step": 8974 + }, + { + "epoch": 0.1, + "learning_rate": 2.9459409423308343e-05, + "loss": 4.4653, + "step": 8976 + }, + { + "epoch": 0.1, + "learning_rate": 2.945478107932982e-05, + "loss": 1.0239, + "step": 8978 + }, + { + "epoch": 0.1, + "learning_rate": 2.9450152735351294e-05, + "loss": 0.0162, + "step": 8980 + }, + { + "epoch": 0.1, + "learning_rate": 2.944552439137277e-05, + "loss": 1.1272, + "step": 8982 + }, + { + "epoch": 0.1, + "learning_rate": 2.9440896047394245e-05, + "loss": 1.8814, + "step": 8984 + }, + { + "epoch": 0.1, + "learning_rate": 2.943626770341572e-05, + "loss": 3.061, + "step": 8986 + }, + { + "epoch": 0.1, + "learning_rate": 2.9431639359437196e-05, + "loss": 1.3125, + "step": 8988 + }, + { + "epoch": 0.1, + "learning_rate": 2.942701101545867e-05, + "loss": 0.6489, + "step": 8990 + }, + { + "epoch": 0.1, + "learning_rate": 2.9422382671480147e-05, + "loss": 4.5921, + "step": 8992 + }, + { + "epoch": 0.1, + "learning_rate": 2.9417754327501622e-05, + "loss": 1.4663, + "step": 8994 + }, + { + "epoch": 0.1, + "learning_rate": 2.9413125983523097e-05, + "loss": 2.4804, + "step": 8996 + }, + { + "epoch": 0.1, + "learning_rate": 2.9408497639544573e-05, + "loss": 1.4259, + "step": 8998 + }, + { + "epoch": 0.1, + "learning_rate": 2.940386929556605e-05, + "loss": 5.108, + "step": 9000 + }, + { + "epoch": 0.1, + "learning_rate": 2.9399240951587527e-05, + "loss": 2.2584, + "step": 9002 + }, + { + "epoch": 0.1, + "learning_rate": 2.9394612607609003e-05, + "loss": 2.4195, + "step": 9004 + }, + { + "epoch": 0.1, + "learning_rate": 2.9389984263630478e-05, + "loss": 1.5064, + "step": 9006 + }, + { + "epoch": 0.1, + "learning_rate": 2.9385355919651953e-05, + "loss": 2.1599, + "step": 9008 + }, + { + "epoch": 0.1, + "learning_rate": 2.938072757567343e-05, + "loss": 4.3946, + "step": 9010 + }, + { + "epoch": 0.1, + "learning_rate": 2.9376099231694904e-05, + "loss": 1.8067, + "step": 9012 + }, + { + "epoch": 0.1, + "learning_rate": 2.937147088771638e-05, + "loss": 1.6209, + "step": 9014 + }, + { + "epoch": 0.1, + "learning_rate": 2.9366842543737855e-05, + "loss": 2.39, + "step": 9016 + }, + { + "epoch": 0.1, + "learning_rate": 2.9362214199759324e-05, + "loss": 1.8602, + "step": 9018 + }, + { + "epoch": 0.1, + "learning_rate": 2.93575858557808e-05, + "loss": 0.3118, + "step": 9020 + }, + { + "epoch": 0.1, + "learning_rate": 2.9352957511802275e-05, + "loss": 2.661, + "step": 9022 + }, + { + "epoch": 0.1, + "learning_rate": 2.934832916782375e-05, + "loss": 1.5692, + "step": 9024 + }, + { + "epoch": 0.1, + "learning_rate": 2.9343700823845225e-05, + "loss": 3.7413, + "step": 9026 + }, + { + "epoch": 0.1, + "learning_rate": 2.9339072479866704e-05, + "loss": 0.7714, + "step": 9028 + }, + { + "epoch": 0.1, + "learning_rate": 2.933444413588818e-05, + "loss": 0.0004, + "step": 9030 + }, + { + "epoch": 0.1, + "learning_rate": 2.9329815791909655e-05, + "loss": 2.6258, + "step": 9032 + }, + { + "epoch": 0.1, + "learning_rate": 2.932518744793113e-05, + "loss": 0.2485, + "step": 9034 + }, + { + "epoch": 0.1, + "learning_rate": 2.9320559103952606e-05, + "loss": 2.1368, + "step": 9036 + }, + { + "epoch": 0.1, + "learning_rate": 2.931593075997408e-05, + "loss": 2.1491, + "step": 9038 + }, + { + "epoch": 0.1, + "learning_rate": 2.9311302415995557e-05, + "loss": 1.2984, + "step": 9040 + }, + { + "epoch": 0.1, + "learning_rate": 2.9306674072017032e-05, + "loss": 0.317, + "step": 9042 + }, + { + "epoch": 0.1, + "learning_rate": 2.9302045728038508e-05, + "loss": 0.468, + "step": 9044 + }, + { + "epoch": 0.1, + "learning_rate": 2.9297417384059983e-05, + "loss": 0.1246, + "step": 9046 + }, + { + "epoch": 0.1, + "learning_rate": 2.929278904008146e-05, + "loss": 4.8425, + "step": 9048 + }, + { + "epoch": 0.1, + "learning_rate": 2.9288160696102934e-05, + "loss": 4.982, + "step": 9050 + }, + { + "epoch": 0.1, + "learning_rate": 2.928353235212441e-05, + "loss": 2.0127, + "step": 9052 + }, + { + "epoch": 0.1, + "learning_rate": 2.9278904008145885e-05, + "loss": 3.79, + "step": 9054 + }, + { + "epoch": 0.1, + "learning_rate": 2.927427566416736e-05, + "loss": 2.6994, + "step": 9056 + }, + { + "epoch": 0.1, + "learning_rate": 2.9269647320188836e-05, + "loss": 0.4193, + "step": 9058 + }, + { + "epoch": 0.1, + "learning_rate": 2.926501897621031e-05, + "loss": 5.9747, + "step": 9060 + }, + { + "epoch": 0.1, + "learning_rate": 2.9260390632231786e-05, + "loss": 1.2185, + "step": 9062 + }, + { + "epoch": 0.1, + "learning_rate": 2.9255762288253262e-05, + "loss": 2.3477, + "step": 9064 + }, + { + "epoch": 0.1, + "learning_rate": 2.9251133944274737e-05, + "loss": 1.6976, + "step": 9066 + }, + { + "epoch": 0.1, + "learning_rate": 2.9246505600296216e-05, + "loss": 1.2334, + "step": 9068 + }, + { + "epoch": 0.1, + "learning_rate": 2.924187725631769e-05, + "loss": 1.019, + "step": 9070 + }, + { + "epoch": 0.1, + "learning_rate": 2.9237248912339167e-05, + "loss": 1.6281, + "step": 9072 + }, + { + "epoch": 0.1, + "learning_rate": 2.9232620568360642e-05, + "loss": 3.9281, + "step": 9074 + }, + { + "epoch": 0.1, + "learning_rate": 2.9227992224382118e-05, + "loss": 5.6147, + "step": 9076 + }, + { + "epoch": 0.1, + "learning_rate": 2.9223363880403593e-05, + "loss": 1.2166, + "step": 9078 + }, + { + "epoch": 0.1, + "learning_rate": 2.921873553642507e-05, + "loss": 1.6301, + "step": 9080 + }, + { + "epoch": 0.1, + "learning_rate": 2.9214107192446544e-05, + "loss": 2.411, + "step": 9082 + }, + { + "epoch": 0.1, + "learning_rate": 2.920947884846802e-05, + "loss": 4.9103, + "step": 9084 + }, + { + "epoch": 0.1, + "learning_rate": 2.9204850504489495e-05, + "loss": 2.6835, + "step": 9086 + }, + { + "epoch": 0.1, + "learning_rate": 2.920022216051097e-05, + "loss": 2.8274, + "step": 9088 + }, + { + "epoch": 0.1, + "learning_rate": 2.9195593816532446e-05, + "loss": 1.2848, + "step": 9090 + }, + { + "epoch": 0.1, + "learning_rate": 2.919096547255392e-05, + "loss": 2.9705, + "step": 9092 + }, + { + "epoch": 0.1, + "learning_rate": 2.9186337128575397e-05, + "loss": 1.4791, + "step": 9094 + }, + { + "epoch": 0.1, + "learning_rate": 2.9181708784596872e-05, + "loss": 1.8438, + "step": 9096 + }, + { + "epoch": 0.1, + "learning_rate": 2.9177080440618348e-05, + "loss": 2.6168, + "step": 9098 + }, + { + "epoch": 0.1, + "learning_rate": 2.9172452096639823e-05, + "loss": 0.4934, + "step": 9100 + }, + { + "epoch": 0.1, + "learning_rate": 2.91678237526613e-05, + "loss": 1.5785, + "step": 9102 + }, + { + "epoch": 0.1, + "learning_rate": 2.9163195408682774e-05, + "loss": 2.1452, + "step": 9104 + }, + { + "epoch": 0.1, + "learning_rate": 2.915856706470425e-05, + "loss": 2.3342, + "step": 9106 + }, + { + "epoch": 0.1, + "learning_rate": 2.9153938720725728e-05, + "loss": 1.5154, + "step": 9108 + }, + { + "epoch": 0.1, + "learning_rate": 2.9149310376747203e-05, + "loss": 2.5178, + "step": 9110 + }, + { + "epoch": 0.1, + "learning_rate": 2.914468203276868e-05, + "loss": 3.6622, + "step": 9112 + }, + { + "epoch": 0.1, + "learning_rate": 2.9140053688790154e-05, + "loss": 0.9573, + "step": 9114 + }, + { + "epoch": 0.1, + "learning_rate": 2.913542534481163e-05, + "loss": 1.0877, + "step": 9116 + }, + { + "epoch": 0.11, + "learning_rate": 2.9130797000833105e-05, + "loss": 3.2407, + "step": 9118 + }, + { + "epoch": 0.11, + "learning_rate": 2.912616865685458e-05, + "loss": 1.8821, + "step": 9120 + }, + { + "epoch": 0.11, + "learning_rate": 2.9121540312876056e-05, + "loss": 1.68, + "step": 9122 + }, + { + "epoch": 0.11, + "learning_rate": 2.911691196889753e-05, + "loss": 5.2975, + "step": 9124 + }, + { + "epoch": 0.11, + "learning_rate": 2.9112283624919007e-05, + "loss": 8.881, + "step": 9126 + }, + { + "epoch": 0.11, + "learning_rate": 2.9107655280940482e-05, + "loss": 5.7818, + "step": 9128 + }, + { + "epoch": 0.11, + "learning_rate": 2.9103026936961958e-05, + "loss": 0.1531, + "step": 9130 + }, + { + "epoch": 0.11, + "learning_rate": 2.9098398592983433e-05, + "loss": 4.9098, + "step": 9132 + }, + { + "epoch": 0.11, + "learning_rate": 2.909377024900491e-05, + "loss": 1.1368, + "step": 9134 + }, + { + "epoch": 0.11, + "learning_rate": 2.9089141905026384e-05, + "loss": 2.4192, + "step": 9136 + }, + { + "epoch": 0.11, + "learning_rate": 2.908451356104786e-05, + "loss": 1.3409, + "step": 9138 + }, + { + "epoch": 0.11, + "learning_rate": 2.9079885217069335e-05, + "loss": 1.8571, + "step": 9140 + }, + { + "epoch": 0.11, + "learning_rate": 2.907525687309081e-05, + "loss": 1.8307, + "step": 9142 + }, + { + "epoch": 0.11, + "learning_rate": 2.9070628529112286e-05, + "loss": 0.0022, + "step": 9144 + }, + { + "epoch": 0.11, + "learning_rate": 2.906600018513376e-05, + "loss": 2.6491, + "step": 9146 + }, + { + "epoch": 0.11, + "learning_rate": 2.906137184115524e-05, + "loss": 0.1569, + "step": 9148 + }, + { + "epoch": 0.11, + "learning_rate": 2.9056743497176715e-05, + "loss": 0.0622, + "step": 9150 + }, + { + "epoch": 0.11, + "learning_rate": 2.905211515319819e-05, + "loss": 0.0025, + "step": 9152 + }, + { + "epoch": 0.11, + "learning_rate": 2.9047486809219666e-05, + "loss": 1.4535, + "step": 9154 + }, + { + "epoch": 0.11, + "learning_rate": 2.904285846524114e-05, + "loss": 1.0994, + "step": 9156 + }, + { + "epoch": 0.11, + "learning_rate": 2.9038230121262617e-05, + "loss": 3.8168, + "step": 9158 + }, + { + "epoch": 0.11, + "learning_rate": 2.9033601777284092e-05, + "loss": 2.6239, + "step": 9160 + }, + { + "epoch": 0.11, + "learning_rate": 2.9028973433305568e-05, + "loss": 4.714, + "step": 9162 + }, + { + "epoch": 0.11, + "learning_rate": 2.9024345089327037e-05, + "loss": 3.7696, + "step": 9164 + }, + { + "epoch": 0.11, + "learning_rate": 2.9019716745348512e-05, + "loss": 0.0001, + "step": 9166 + }, + { + "epoch": 0.11, + "learning_rate": 2.9015088401369987e-05, + "loss": 0.3128, + "step": 9168 + }, + { + "epoch": 0.11, + "learning_rate": 2.9010460057391463e-05, + "loss": 4.1992, + "step": 9170 + }, + { + "epoch": 0.11, + "learning_rate": 2.9005831713412938e-05, + "loss": 6.061, + "step": 9172 + }, + { + "epoch": 0.11, + "learning_rate": 2.9001203369434417e-05, + "loss": 1.3614, + "step": 9174 + }, + { + "epoch": 0.11, + "learning_rate": 2.8996575025455892e-05, + "loss": 2.1659, + "step": 9176 + }, + { + "epoch": 0.11, + "learning_rate": 2.8991946681477368e-05, + "loss": 0.8769, + "step": 9178 + }, + { + "epoch": 0.11, + "learning_rate": 2.8987318337498843e-05, + "loss": 0.0429, + "step": 9180 + }, + { + "epoch": 0.11, + "learning_rate": 2.898268999352032e-05, + "loss": 2.3563, + "step": 9182 + }, + { + "epoch": 0.11, + "learning_rate": 2.8978061649541794e-05, + "loss": 2.3059, + "step": 9184 + }, + { + "epoch": 0.11, + "learning_rate": 2.897343330556327e-05, + "loss": 0.525, + "step": 9186 + }, + { + "epoch": 0.11, + "learning_rate": 2.8968804961584745e-05, + "loss": 1.9252, + "step": 9188 + }, + { + "epoch": 0.11, + "learning_rate": 2.896417661760622e-05, + "loss": 1.6974, + "step": 9190 + }, + { + "epoch": 0.11, + "learning_rate": 2.8959548273627696e-05, + "loss": 0.9021, + "step": 9192 + }, + { + "epoch": 0.11, + "learning_rate": 2.895491992964917e-05, + "loss": 5.7722, + "step": 9194 + }, + { + "epoch": 0.11, + "learning_rate": 2.8950291585670647e-05, + "loss": 0.4352, + "step": 9196 + }, + { + "epoch": 0.11, + "learning_rate": 2.8945663241692122e-05, + "loss": 5.0691, + "step": 9198 + }, + { + "epoch": 0.11, + "learning_rate": 2.8941034897713598e-05, + "loss": 0.5461, + "step": 9200 + }, + { + "epoch": 0.11, + "learning_rate": 2.8936406553735073e-05, + "loss": 1.4365, + "step": 9202 + }, + { + "epoch": 0.11, + "learning_rate": 2.893177820975655e-05, + "loss": 0.0015, + "step": 9204 + }, + { + "epoch": 0.11, + "learning_rate": 2.8927149865778024e-05, + "loss": 2.5322, + "step": 9206 + }, + { + "epoch": 0.11, + "learning_rate": 2.89225215217995e-05, + "loss": 0.0903, + "step": 9208 + }, + { + "epoch": 0.11, + "learning_rate": 2.8917893177820975e-05, + "loss": 1.2638, + "step": 9210 + }, + { + "epoch": 0.11, + "learning_rate": 2.891326483384245e-05, + "loss": 2.2032, + "step": 9212 + }, + { + "epoch": 0.11, + "learning_rate": 2.890863648986393e-05, + "loss": 5.3867, + "step": 9214 + }, + { + "epoch": 0.11, + "learning_rate": 2.8904008145885404e-05, + "loss": 1.7568, + "step": 9216 + }, + { + "epoch": 0.11, + "learning_rate": 2.889937980190688e-05, + "loss": 3.533, + "step": 9218 + }, + { + "epoch": 0.11, + "learning_rate": 2.8894751457928355e-05, + "loss": 6.6463, + "step": 9220 + }, + { + "epoch": 0.11, + "learning_rate": 2.889012311394983e-05, + "loss": 3.0191, + "step": 9222 + }, + { + "epoch": 0.11, + "learning_rate": 2.8885494769971306e-05, + "loss": 1.9215, + "step": 9224 + }, + { + "epoch": 0.11, + "learning_rate": 2.888086642599278e-05, + "loss": 1.2494, + "step": 9226 + }, + { + "epoch": 0.11, + "learning_rate": 2.8876238082014257e-05, + "loss": 1.9135, + "step": 9228 + }, + { + "epoch": 0.11, + "learning_rate": 2.8871609738035732e-05, + "loss": 1.7806, + "step": 9230 + }, + { + "epoch": 0.11, + "learning_rate": 2.8866981394057208e-05, + "loss": 2.6866, + "step": 9232 + }, + { + "epoch": 0.11, + "learning_rate": 2.8862353050078683e-05, + "loss": 2.7495, + "step": 9234 + }, + { + "epoch": 0.11, + "learning_rate": 2.885772470610016e-05, + "loss": 2.1765, + "step": 9236 + }, + { + "epoch": 0.11, + "learning_rate": 2.8853096362121634e-05, + "loss": 1.2448, + "step": 9238 + }, + { + "epoch": 0.11, + "learning_rate": 2.884846801814311e-05, + "loss": 0.0092, + "step": 9240 + }, + { + "epoch": 0.11, + "learning_rate": 2.8843839674164585e-05, + "loss": 0.4041, + "step": 9242 + }, + { + "epoch": 0.11, + "learning_rate": 2.883921133018606e-05, + "loss": 0.1657, + "step": 9244 + }, + { + "epoch": 0.11, + "learning_rate": 2.8834582986207536e-05, + "loss": 0.425, + "step": 9246 + }, + { + "epoch": 0.11, + "learning_rate": 2.882995464222901e-05, + "loss": 0.0588, + "step": 9248 + }, + { + "epoch": 0.11, + "learning_rate": 2.8825326298250487e-05, + "loss": 2.918, + "step": 9250 + }, + { + "epoch": 0.11, + "learning_rate": 2.8820697954271962e-05, + "loss": 1.7307, + "step": 9252 + }, + { + "epoch": 0.11, + "learning_rate": 2.881606961029344e-05, + "loss": 0.1562, + "step": 9254 + }, + { + "epoch": 0.11, + "learning_rate": 2.8811441266314916e-05, + "loss": 1.6641, + "step": 9256 + }, + { + "epoch": 0.11, + "learning_rate": 2.880681292233639e-05, + "loss": 2.4512, + "step": 9258 + }, + { + "epoch": 0.11, + "learning_rate": 2.8802184578357867e-05, + "loss": 0.9938, + "step": 9260 + }, + { + "epoch": 0.11, + "learning_rate": 2.8797556234379342e-05, + "loss": 0.0002, + "step": 9262 + }, + { + "epoch": 0.11, + "learning_rate": 2.8792927890400818e-05, + "loss": 3.3522, + "step": 9264 + }, + { + "epoch": 0.11, + "learning_rate": 2.8788299546422293e-05, + "loss": 0.0061, + "step": 9266 + }, + { + "epoch": 0.11, + "learning_rate": 2.878367120244377e-05, + "loss": 0.0002, + "step": 9268 + }, + { + "epoch": 0.11, + "learning_rate": 2.8779042858465244e-05, + "loss": 0.0054, + "step": 9270 + }, + { + "epoch": 0.11, + "learning_rate": 2.877441451448672e-05, + "loss": 0.5236, + "step": 9272 + }, + { + "epoch": 0.11, + "learning_rate": 2.8769786170508195e-05, + "loss": 1.187, + "step": 9274 + }, + { + "epoch": 0.11, + "learning_rate": 2.876515782652967e-05, + "loss": 2.3937, + "step": 9276 + }, + { + "epoch": 0.11, + "learning_rate": 2.8760529482551146e-05, + "loss": 4.5175, + "step": 9278 + }, + { + "epoch": 0.11, + "learning_rate": 2.875590113857262e-05, + "loss": 4.9882, + "step": 9280 + }, + { + "epoch": 0.11, + "learning_rate": 2.8751272794594097e-05, + "loss": 7.7477, + "step": 9282 + }, + { + "epoch": 0.11, + "learning_rate": 2.8746644450615572e-05, + "loss": 5.8341, + "step": 9284 + }, + { + "epoch": 0.11, + "learning_rate": 2.8742016106637048e-05, + "loss": 1.3651, + "step": 9286 + }, + { + "epoch": 0.11, + "learning_rate": 2.8737387762658523e-05, + "loss": 0.8056, + "step": 9288 + }, + { + "epoch": 0.11, + "learning_rate": 2.873275941868e-05, + "loss": 1.4267, + "step": 9290 + }, + { + "epoch": 0.11, + "learning_rate": 2.8728131074701474e-05, + "loss": 3.9288, + "step": 9292 + }, + { + "epoch": 0.11, + "learning_rate": 2.872350273072295e-05, + "loss": 0.8057, + "step": 9294 + }, + { + "epoch": 0.11, + "learning_rate": 2.8718874386744428e-05, + "loss": 5.9239, + "step": 9296 + }, + { + "epoch": 0.11, + "learning_rate": 2.8714246042765904e-05, + "loss": 2.5635, + "step": 9298 + }, + { + "epoch": 0.11, + "learning_rate": 2.870961769878738e-05, + "loss": 0.0034, + "step": 9300 + }, + { + "epoch": 0.11, + "learning_rate": 2.8704989354808854e-05, + "loss": 3.5664, + "step": 9302 + }, + { + "epoch": 0.11, + "learning_rate": 2.870036101083033e-05, + "loss": 0.2119, + "step": 9304 + }, + { + "epoch": 0.11, + "learning_rate": 2.8695732666851805e-05, + "loss": 1.9195, + "step": 9306 + }, + { + "epoch": 0.11, + "learning_rate": 2.8691104322873274e-05, + "loss": 1.6738, + "step": 9308 + }, + { + "epoch": 0.11, + "learning_rate": 2.868647597889475e-05, + "loss": 6.7344, + "step": 9310 + }, + { + "epoch": 0.11, + "learning_rate": 2.8681847634916225e-05, + "loss": 0.2258, + "step": 9312 + }, + { + "epoch": 0.11, + "learning_rate": 2.86772192909377e-05, + "loss": 1.8337, + "step": 9314 + }, + { + "epoch": 0.11, + "learning_rate": 2.8672590946959176e-05, + "loss": 3.2966, + "step": 9316 + }, + { + "epoch": 0.11, + "learning_rate": 2.866796260298065e-05, + "loss": 6.8161, + "step": 9318 + }, + { + "epoch": 0.11, + "learning_rate": 2.8663334259002126e-05, + "loss": 1.1347, + "step": 9320 + }, + { + "epoch": 0.11, + "learning_rate": 2.8658705915023605e-05, + "loss": 4.8397, + "step": 9322 + }, + { + "epoch": 0.11, + "learning_rate": 2.865407757104508e-05, + "loss": 0.7547, + "step": 9324 + }, + { + "epoch": 0.11, + "learning_rate": 2.8649449227066556e-05, + "loss": 1.6403, + "step": 9326 + }, + { + "epoch": 0.11, + "learning_rate": 2.864482088308803e-05, + "loss": 1.6045, + "step": 9328 + }, + { + "epoch": 0.11, + "learning_rate": 2.8640192539109507e-05, + "loss": 1.1348, + "step": 9330 + }, + { + "epoch": 0.11, + "learning_rate": 2.8635564195130982e-05, + "loss": 2.474, + "step": 9332 + }, + { + "epoch": 0.11, + "learning_rate": 2.8630935851152458e-05, + "loss": 1.4309, + "step": 9334 + }, + { + "epoch": 0.11, + "learning_rate": 2.8626307507173933e-05, + "loss": 2.5192, + "step": 9336 + }, + { + "epoch": 0.11, + "learning_rate": 2.862167916319541e-05, + "loss": 1.2003, + "step": 9338 + }, + { + "epoch": 0.11, + "learning_rate": 2.8617050819216884e-05, + "loss": 0.2768, + "step": 9340 + }, + { + "epoch": 0.11, + "learning_rate": 2.861242247523836e-05, + "loss": 2.0428, + "step": 9342 + }, + { + "epoch": 0.11, + "learning_rate": 2.8607794131259835e-05, + "loss": 4.3979, + "step": 9344 + }, + { + "epoch": 0.11, + "learning_rate": 2.860316578728131e-05, + "loss": 0.0649, + "step": 9346 + }, + { + "epoch": 0.11, + "learning_rate": 2.8598537443302786e-05, + "loss": 3.661, + "step": 9348 + }, + { + "epoch": 0.11, + "learning_rate": 2.859390909932426e-05, + "loss": 0.3039, + "step": 9350 + }, + { + "epoch": 0.11, + "learning_rate": 2.8589280755345737e-05, + "loss": 0.3838, + "step": 9352 + }, + { + "epoch": 0.11, + "learning_rate": 2.8584652411367212e-05, + "loss": 0.7316, + "step": 9354 + }, + { + "epoch": 0.11, + "learning_rate": 2.8580024067388687e-05, + "loss": 6.011, + "step": 9356 + }, + { + "epoch": 0.11, + "learning_rate": 2.8575395723410163e-05, + "loss": 3.3309, + "step": 9358 + }, + { + "epoch": 0.11, + "learning_rate": 2.8570767379431638e-05, + "loss": 2.4937, + "step": 9360 + }, + { + "epoch": 0.11, + "learning_rate": 2.8566139035453117e-05, + "loss": 0.0009, + "step": 9362 + }, + { + "epoch": 0.11, + "learning_rate": 2.8561510691474593e-05, + "loss": 1.7528, + "step": 9364 + }, + { + "epoch": 0.11, + "learning_rate": 2.8556882347496068e-05, + "loss": 4.3861, + "step": 9366 + }, + { + "epoch": 0.11, + "learning_rate": 2.8552254003517543e-05, + "loss": 0.9031, + "step": 9368 + }, + { + "epoch": 0.11, + "learning_rate": 2.854762565953902e-05, + "loss": 2.1664, + "step": 9370 + }, + { + "epoch": 0.11, + "learning_rate": 2.8542997315560494e-05, + "loss": 2.4013, + "step": 9372 + }, + { + "epoch": 0.11, + "learning_rate": 2.853836897158197e-05, + "loss": 7.9108, + "step": 9374 + }, + { + "epoch": 0.11, + "learning_rate": 2.8533740627603445e-05, + "loss": 5.1065, + "step": 9376 + }, + { + "epoch": 0.11, + "learning_rate": 2.852911228362492e-05, + "loss": 0.0186, + "step": 9378 + }, + { + "epoch": 0.11, + "learning_rate": 2.8524483939646396e-05, + "loss": 0.1255, + "step": 9380 + }, + { + "epoch": 0.11, + "learning_rate": 2.851985559566787e-05, + "loss": 7.1159, + "step": 9382 + }, + { + "epoch": 0.11, + "learning_rate": 2.8515227251689347e-05, + "loss": 2.1793, + "step": 9384 + }, + { + "epoch": 0.11, + "learning_rate": 2.8510598907710822e-05, + "loss": 2.1917, + "step": 9386 + }, + { + "epoch": 0.11, + "learning_rate": 2.8505970563732298e-05, + "loss": 0.5336, + "step": 9388 + }, + { + "epoch": 0.11, + "learning_rate": 2.8501342219753773e-05, + "loss": 0.0655, + "step": 9390 + }, + { + "epoch": 0.11, + "learning_rate": 2.849671387577525e-05, + "loss": 1.3722, + "step": 9392 + }, + { + "epoch": 0.11, + "learning_rate": 2.8492085531796724e-05, + "loss": 2.5299, + "step": 9394 + }, + { + "epoch": 0.11, + "learning_rate": 2.84874571878182e-05, + "loss": 1.273, + "step": 9396 + }, + { + "epoch": 0.11, + "learning_rate": 2.8482828843839675e-05, + "loss": 2.2456, + "step": 9398 + }, + { + "epoch": 0.11, + "learning_rate": 2.847820049986115e-05, + "loss": 0.2581, + "step": 9400 + }, + { + "epoch": 0.11, + "learning_rate": 2.847357215588263e-05, + "loss": 0.0082, + "step": 9402 + }, + { + "epoch": 0.11, + "learning_rate": 2.8468943811904104e-05, + "loss": 3.5253, + "step": 9404 + }, + { + "epoch": 0.11, + "learning_rate": 2.846431546792558e-05, + "loss": 1.4943, + "step": 9406 + }, + { + "epoch": 0.11, + "learning_rate": 2.8459687123947055e-05, + "loss": 1.9791, + "step": 9408 + }, + { + "epoch": 0.11, + "learning_rate": 2.845505877996853e-05, + "loss": 0.0008, + "step": 9410 + }, + { + "epoch": 0.11, + "learning_rate": 2.8450430435990006e-05, + "loss": 0.1347, + "step": 9412 + }, + { + "epoch": 0.11, + "learning_rate": 2.844580209201148e-05, + "loss": 0.0006, + "step": 9414 + }, + { + "epoch": 0.11, + "learning_rate": 2.8441173748032957e-05, + "loss": 0.1223, + "step": 9416 + }, + { + "epoch": 0.11, + "learning_rate": 2.8436545404054432e-05, + "loss": 1.8343, + "step": 9418 + }, + { + "epoch": 0.11, + "learning_rate": 2.8431917060075908e-05, + "loss": 0.4689, + "step": 9420 + }, + { + "epoch": 0.11, + "learning_rate": 2.8427288716097383e-05, + "loss": 2.1567, + "step": 9422 + }, + { + "epoch": 0.11, + "learning_rate": 2.842266037211886e-05, + "loss": 3.5205, + "step": 9424 + }, + { + "epoch": 0.11, + "learning_rate": 2.8418032028140334e-05, + "loss": 6.0939, + "step": 9426 + }, + { + "epoch": 0.11, + "learning_rate": 2.841340368416181e-05, + "loss": 3.9867, + "step": 9428 + }, + { + "epoch": 0.11, + "learning_rate": 2.8408775340183285e-05, + "loss": 1.5591, + "step": 9430 + }, + { + "epoch": 0.11, + "learning_rate": 2.840414699620476e-05, + "loss": 0.8507, + "step": 9432 + }, + { + "epoch": 0.11, + "learning_rate": 2.8399518652226236e-05, + "loss": 2.7592, + "step": 9434 + }, + { + "epoch": 0.11, + "learning_rate": 2.839489030824771e-05, + "loss": 4.1077, + "step": 9436 + }, + { + "epoch": 0.11, + "learning_rate": 2.8390261964269187e-05, + "loss": 2.3314, + "step": 9438 + }, + { + "epoch": 0.11, + "learning_rate": 2.8385633620290662e-05, + "loss": 2.6472, + "step": 9440 + }, + { + "epoch": 0.11, + "learning_rate": 2.838100527631214e-05, + "loss": 1.3439, + "step": 9442 + }, + { + "epoch": 0.11, + "learning_rate": 2.8376376932333616e-05, + "loss": 0.9454, + "step": 9444 + }, + { + "epoch": 0.11, + "learning_rate": 2.8371748588355092e-05, + "loss": 4.5745, + "step": 9446 + }, + { + "epoch": 0.11, + "learning_rate": 2.8367120244376567e-05, + "loss": 0.9706, + "step": 9448 + }, + { + "epoch": 0.11, + "learning_rate": 2.8362491900398043e-05, + "loss": 0.3948, + "step": 9450 + }, + { + "epoch": 0.11, + "learning_rate": 2.8357863556419518e-05, + "loss": 2.9797, + "step": 9452 + }, + { + "epoch": 0.11, + "learning_rate": 2.8353235212440987e-05, + "loss": 1.9442, + "step": 9454 + }, + { + "epoch": 0.11, + "learning_rate": 2.8348606868462462e-05, + "loss": 2.6336, + "step": 9456 + }, + { + "epoch": 0.11, + "learning_rate": 2.8343978524483937e-05, + "loss": 0.2801, + "step": 9458 + }, + { + "epoch": 0.11, + "learning_rate": 2.8339350180505413e-05, + "loss": 0.7259, + "step": 9460 + }, + { + "epoch": 0.11, + "learning_rate": 2.833472183652689e-05, + "loss": 1.482, + "step": 9462 + }, + { + "epoch": 0.11, + "learning_rate": 2.8330093492548364e-05, + "loss": 2.2079, + "step": 9464 + }, + { + "epoch": 0.11, + "learning_rate": 2.832546514856984e-05, + "loss": 0.3025, + "step": 9466 + }, + { + "epoch": 0.11, + "learning_rate": 2.8320836804591318e-05, + "loss": 4.4891, + "step": 9468 + }, + { + "epoch": 0.11, + "learning_rate": 2.8316208460612793e-05, + "loss": 0.0009, + "step": 9470 + }, + { + "epoch": 0.11, + "learning_rate": 2.831158011663427e-05, + "loss": 0.3547, + "step": 9472 + }, + { + "epoch": 0.11, + "learning_rate": 2.8306951772655744e-05, + "loss": 4.8508, + "step": 9474 + }, + { + "epoch": 0.11, + "learning_rate": 2.830232342867722e-05, + "loss": 2.8805, + "step": 9476 + }, + { + "epoch": 0.11, + "learning_rate": 2.8297695084698695e-05, + "loss": 5.0117, + "step": 9478 + }, + { + "epoch": 0.11, + "learning_rate": 2.829306674072017e-05, + "loss": 1.8361, + "step": 9480 + }, + { + "epoch": 0.11, + "learning_rate": 2.8288438396741646e-05, + "loss": 2.4493, + "step": 9482 + }, + { + "epoch": 0.11, + "learning_rate": 2.828381005276312e-05, + "loss": 1.9518, + "step": 9484 + }, + { + "epoch": 0.11, + "learning_rate": 2.8279181708784597e-05, + "loss": 1.2089, + "step": 9486 + }, + { + "epoch": 0.11, + "learning_rate": 2.8274553364806072e-05, + "loss": 3.7116, + "step": 9488 + }, + { + "epoch": 0.11, + "learning_rate": 2.8269925020827548e-05, + "loss": 0.9006, + "step": 9490 + }, + { + "epoch": 0.11, + "learning_rate": 2.8265296676849023e-05, + "loss": 1.6565, + "step": 9492 + }, + { + "epoch": 0.11, + "learning_rate": 2.82606683328705e-05, + "loss": 2.5707, + "step": 9494 + }, + { + "epoch": 0.11, + "learning_rate": 2.8256039988891974e-05, + "loss": 0.2237, + "step": 9496 + }, + { + "epoch": 0.11, + "learning_rate": 2.825141164491345e-05, + "loss": 5.3712, + "step": 9498 + }, + { + "epoch": 0.11, + "learning_rate": 2.8246783300934925e-05, + "loss": 1.2796, + "step": 9500 + }, + { + "epoch": 0.11, + "learning_rate": 2.82421549569564e-05, + "loss": 2.1942, + "step": 9502 + }, + { + "epoch": 0.11, + "learning_rate": 2.8237526612977876e-05, + "loss": 1.2594, + "step": 9504 + }, + { + "epoch": 0.11, + "learning_rate": 2.823289826899935e-05, + "loss": 0.3462, + "step": 9506 + }, + { + "epoch": 0.11, + "learning_rate": 2.822826992502083e-05, + "loss": 2.4427, + "step": 9508 + }, + { + "epoch": 0.11, + "learning_rate": 2.8223641581042305e-05, + "loss": 1.4827, + "step": 9510 + }, + { + "epoch": 0.11, + "learning_rate": 2.821901323706378e-05, + "loss": 2.0479, + "step": 9512 + }, + { + "epoch": 0.11, + "learning_rate": 2.8214384893085256e-05, + "loss": 1.7865, + "step": 9514 + }, + { + "epoch": 0.11, + "learning_rate": 2.820975654910673e-05, + "loss": 1.7387, + "step": 9516 + }, + { + "epoch": 0.11, + "learning_rate": 2.8205128205128207e-05, + "loss": 2.3466, + "step": 9518 + }, + { + "epoch": 0.11, + "learning_rate": 2.8200499861149682e-05, + "loss": 0.1736, + "step": 9520 + }, + { + "epoch": 0.11, + "learning_rate": 2.8195871517171158e-05, + "loss": 1.1938, + "step": 9522 + }, + { + "epoch": 0.11, + "learning_rate": 2.8191243173192633e-05, + "loss": 0.1916, + "step": 9524 + }, + { + "epoch": 0.11, + "learning_rate": 2.818661482921411e-05, + "loss": 1.0319, + "step": 9526 + }, + { + "epoch": 0.11, + "learning_rate": 2.8181986485235584e-05, + "loss": 2.2709, + "step": 9528 + }, + { + "epoch": 0.11, + "learning_rate": 2.817735814125706e-05, + "loss": 2.6727, + "step": 9530 + }, + { + "epoch": 0.11, + "learning_rate": 2.8172729797278535e-05, + "loss": 3.49, + "step": 9532 + }, + { + "epoch": 0.11, + "learning_rate": 2.816810145330001e-05, + "loss": 3.937, + "step": 9534 + }, + { + "epoch": 0.11, + "learning_rate": 2.8163473109321486e-05, + "loss": 8.0554, + "step": 9536 + }, + { + "epoch": 0.11, + "learning_rate": 2.815884476534296e-05, + "loss": 2.6918, + "step": 9538 + }, + { + "epoch": 0.11, + "learning_rate": 2.8154216421364437e-05, + "loss": 3.1106, + "step": 9540 + }, + { + "epoch": 0.11, + "learning_rate": 2.8149588077385912e-05, + "loss": 0.8753, + "step": 9542 + }, + { + "epoch": 0.11, + "learning_rate": 2.8144959733407388e-05, + "loss": 0.0437, + "step": 9544 + }, + { + "epoch": 0.11, + "learning_rate": 2.8140331389428863e-05, + "loss": 2.1921, + "step": 9546 + }, + { + "epoch": 0.11, + "learning_rate": 2.8135703045450342e-05, + "loss": 2.6359, + "step": 9548 + }, + { + "epoch": 0.11, + "learning_rate": 2.8131074701471817e-05, + "loss": 4.0934, + "step": 9550 + }, + { + "epoch": 0.11, + "learning_rate": 2.8126446357493293e-05, + "loss": 3.782, + "step": 9552 + }, + { + "epoch": 0.11, + "learning_rate": 2.8121818013514768e-05, + "loss": 2.957, + "step": 9554 + }, + { + "epoch": 0.11, + "learning_rate": 2.8117189669536243e-05, + "loss": 0.0753, + "step": 9556 + }, + { + "epoch": 0.11, + "learning_rate": 2.811256132555772e-05, + "loss": 0.677, + "step": 9558 + }, + { + "epoch": 0.11, + "learning_rate": 2.8107932981579194e-05, + "loss": 1.9879, + "step": 9560 + }, + { + "epoch": 0.11, + "learning_rate": 2.810330463760067e-05, + "loss": 2.704, + "step": 9562 + }, + { + "epoch": 0.11, + "learning_rate": 2.8098676293622145e-05, + "loss": 2.9169, + "step": 9564 + }, + { + "epoch": 0.11, + "learning_rate": 2.809404794964362e-05, + "loss": 4.9003, + "step": 9566 + }, + { + "epoch": 0.11, + "learning_rate": 2.8089419605665096e-05, + "loss": 0.9307, + "step": 9568 + }, + { + "epoch": 0.11, + "learning_rate": 2.808479126168657e-05, + "loss": 0.5483, + "step": 9570 + }, + { + "epoch": 0.11, + "learning_rate": 2.8080162917708047e-05, + "loss": 4.3179, + "step": 9572 + }, + { + "epoch": 0.11, + "learning_rate": 2.8075534573729522e-05, + "loss": 4.3144, + "step": 9574 + }, + { + "epoch": 0.11, + "learning_rate": 2.8070906229750998e-05, + "loss": 0.04, + "step": 9576 + }, + { + "epoch": 0.11, + "learning_rate": 2.8066277885772473e-05, + "loss": 0.9758, + "step": 9578 + }, + { + "epoch": 0.11, + "learning_rate": 2.806164954179395e-05, + "loss": 3.2048, + "step": 9580 + }, + { + "epoch": 0.11, + "learning_rate": 2.8057021197815424e-05, + "loss": 3.7609, + "step": 9582 + }, + { + "epoch": 0.11, + "learning_rate": 2.80523928538369e-05, + "loss": 3.0001, + "step": 9584 + }, + { + "epoch": 0.11, + "learning_rate": 2.8047764509858375e-05, + "loss": 0.3774, + "step": 9586 + }, + { + "epoch": 0.11, + "learning_rate": 2.804313616587985e-05, + "loss": 3.8379, + "step": 9588 + }, + { + "epoch": 0.11, + "learning_rate": 2.803850782190133e-05, + "loss": 3.1955, + "step": 9590 + }, + { + "epoch": 0.11, + "learning_rate": 2.8033879477922804e-05, + "loss": 2.2981, + "step": 9592 + }, + { + "epoch": 0.11, + "learning_rate": 2.802925113394428e-05, + "loss": 0.4182, + "step": 9594 + }, + { + "epoch": 0.11, + "learning_rate": 2.8024622789965755e-05, + "loss": 0.5038, + "step": 9596 + }, + { + "epoch": 0.11, + "learning_rate": 2.8019994445987224e-05, + "loss": 2.1631, + "step": 9598 + }, + { + "epoch": 0.11, + "learning_rate": 2.80153661020087e-05, + "loss": 0.005, + "step": 9600 + }, + { + "epoch": 0.11, + "learning_rate": 2.8010737758030175e-05, + "loss": 0.27, + "step": 9602 + }, + { + "epoch": 0.11, + "learning_rate": 2.800610941405165e-05, + "loss": 0.4997, + "step": 9604 + }, + { + "epoch": 0.11, + "learning_rate": 2.8001481070073126e-05, + "loss": 0.849, + "step": 9606 + }, + { + "epoch": 0.11, + "learning_rate": 2.79968527260946e-05, + "loss": 0.0005, + "step": 9608 + }, + { + "epoch": 0.11, + "learning_rate": 2.7992224382116077e-05, + "loss": 3.8108, + "step": 9610 + }, + { + "epoch": 0.11, + "learning_rate": 2.7987596038137552e-05, + "loss": 0.3782, + "step": 9612 + }, + { + "epoch": 0.11, + "learning_rate": 2.798296769415903e-05, + "loss": 3.6155, + "step": 9614 + }, + { + "epoch": 0.11, + "learning_rate": 2.7978339350180506e-05, + "loss": 1.7357, + "step": 9616 + }, + { + "epoch": 0.11, + "learning_rate": 2.797371100620198e-05, + "loss": 4.0513, + "step": 9618 + }, + { + "epoch": 0.11, + "learning_rate": 2.7969082662223457e-05, + "loss": 0.3143, + "step": 9620 + }, + { + "epoch": 0.11, + "learning_rate": 2.7964454318244932e-05, + "loss": 3.141, + "step": 9622 + }, + { + "epoch": 0.11, + "learning_rate": 2.7959825974266408e-05, + "loss": 2.8876, + "step": 9624 + }, + { + "epoch": 0.11, + "learning_rate": 2.7955197630287883e-05, + "loss": 3.5034, + "step": 9626 + }, + { + "epoch": 0.11, + "learning_rate": 2.795056928630936e-05, + "loss": 1.4211, + "step": 9628 + }, + { + "epoch": 0.11, + "learning_rate": 2.7945940942330834e-05, + "loss": 0.4753, + "step": 9630 + }, + { + "epoch": 0.11, + "learning_rate": 2.794131259835231e-05, + "loss": 1.6033, + "step": 9632 + }, + { + "epoch": 0.11, + "learning_rate": 2.7936684254373785e-05, + "loss": 0.1687, + "step": 9634 + }, + { + "epoch": 0.11, + "learning_rate": 2.793205591039526e-05, + "loss": 0.2104, + "step": 9636 + }, + { + "epoch": 0.11, + "learning_rate": 2.7927427566416736e-05, + "loss": 2.9397, + "step": 9638 + }, + { + "epoch": 0.11, + "learning_rate": 2.792279922243821e-05, + "loss": 2.9196, + "step": 9640 + }, + { + "epoch": 0.11, + "learning_rate": 2.7918170878459687e-05, + "loss": 0.4265, + "step": 9642 + }, + { + "epoch": 0.11, + "learning_rate": 2.7913542534481162e-05, + "loss": 1.0265, + "step": 9644 + }, + { + "epoch": 0.11, + "learning_rate": 2.7908914190502638e-05, + "loss": 5.5529, + "step": 9646 + }, + { + "epoch": 0.11, + "learning_rate": 2.7904285846524113e-05, + "loss": 0.1232, + "step": 9648 + }, + { + "epoch": 0.11, + "learning_rate": 2.789965750254559e-05, + "loss": 4.4074, + "step": 9650 + }, + { + "epoch": 0.11, + "learning_rate": 2.7895029158567064e-05, + "loss": 0.2816, + "step": 9652 + }, + { + "epoch": 0.11, + "learning_rate": 2.789040081458854e-05, + "loss": 0.0003, + "step": 9654 + }, + { + "epoch": 0.11, + "learning_rate": 2.7885772470610018e-05, + "loss": 0.0012, + "step": 9656 + }, + { + "epoch": 0.11, + "learning_rate": 2.7881144126631493e-05, + "loss": 3.6426, + "step": 9658 + }, + { + "epoch": 0.11, + "learning_rate": 2.787651578265297e-05, + "loss": 4.7657, + "step": 9660 + }, + { + "epoch": 0.11, + "learning_rate": 2.7871887438674444e-05, + "loss": 0.0009, + "step": 9662 + }, + { + "epoch": 0.11, + "learning_rate": 2.786725909469592e-05, + "loss": 4.2075, + "step": 9664 + }, + { + "epoch": 0.11, + "learning_rate": 2.7862630750717395e-05, + "loss": 1.7408, + "step": 9666 + }, + { + "epoch": 0.11, + "learning_rate": 2.785800240673887e-05, + "loss": 2.6668, + "step": 9668 + }, + { + "epoch": 0.11, + "learning_rate": 2.7853374062760346e-05, + "loss": 2.3333, + "step": 9670 + }, + { + "epoch": 0.11, + "learning_rate": 2.784874571878182e-05, + "loss": 2.598, + "step": 9672 + }, + { + "epoch": 0.11, + "learning_rate": 2.7844117374803297e-05, + "loss": 1.7312, + "step": 9674 + }, + { + "epoch": 0.11, + "learning_rate": 2.7839489030824772e-05, + "loss": 0.1376, + "step": 9676 + }, + { + "epoch": 0.11, + "learning_rate": 2.7834860686846248e-05, + "loss": 3.4575, + "step": 9678 + }, + { + "epoch": 0.11, + "learning_rate": 2.7830232342867723e-05, + "loss": 2.3636, + "step": 9680 + }, + { + "epoch": 0.11, + "learning_rate": 2.78256039988892e-05, + "loss": 3.7814, + "step": 9682 + }, + { + "epoch": 0.11, + "learning_rate": 2.7820975654910674e-05, + "loss": 3.0637, + "step": 9684 + }, + { + "epoch": 0.11, + "learning_rate": 2.781634731093215e-05, + "loss": 1.0514, + "step": 9686 + }, + { + "epoch": 0.11, + "learning_rate": 2.7811718966953625e-05, + "loss": 1.4827, + "step": 9688 + }, + { + "epoch": 0.11, + "learning_rate": 2.78070906229751e-05, + "loss": 3.6337, + "step": 9690 + }, + { + "epoch": 0.11, + "learning_rate": 2.7802462278996576e-05, + "loss": 0.7528, + "step": 9692 + }, + { + "epoch": 0.11, + "learning_rate": 2.779783393501805e-05, + "loss": 1.1515, + "step": 9694 + }, + { + "epoch": 0.11, + "learning_rate": 2.779320559103953e-05, + "loss": 0.2627, + "step": 9696 + }, + { + "epoch": 0.11, + "learning_rate": 2.7788577247061005e-05, + "loss": 2.9235, + "step": 9698 + }, + { + "epoch": 0.11, + "learning_rate": 2.778394890308248e-05, + "loss": 4.729, + "step": 9700 + }, + { + "epoch": 0.11, + "learning_rate": 2.7779320559103956e-05, + "loss": 3.2889, + "step": 9702 + }, + { + "epoch": 0.11, + "learning_rate": 2.777469221512543e-05, + "loss": 5.1229, + "step": 9704 + }, + { + "epoch": 0.11, + "learning_rate": 2.7770063871146907e-05, + "loss": 0.458, + "step": 9706 + }, + { + "epoch": 0.11, + "learning_rate": 2.7765435527168382e-05, + "loss": 0.0043, + "step": 9708 + }, + { + "epoch": 0.11, + "learning_rate": 2.7760807183189858e-05, + "loss": 1.3344, + "step": 9710 + }, + { + "epoch": 0.11, + "learning_rate": 2.7756178839211333e-05, + "loss": 3.7577, + "step": 9712 + }, + { + "epoch": 0.11, + "learning_rate": 2.775155049523281e-05, + "loss": 4.0129, + "step": 9714 + }, + { + "epoch": 0.11, + "learning_rate": 2.7746922151254284e-05, + "loss": 0.0063, + "step": 9716 + }, + { + "epoch": 0.11, + "learning_rate": 2.774229380727576e-05, + "loss": 0.8528, + "step": 9718 + }, + { + "epoch": 0.11, + "learning_rate": 2.7737665463297235e-05, + "loss": 1.8749, + "step": 9720 + }, + { + "epoch": 0.11, + "learning_rate": 2.773303711931871e-05, + "loss": 3.8136, + "step": 9722 + }, + { + "epoch": 0.11, + "learning_rate": 2.7728408775340186e-05, + "loss": 1.4091, + "step": 9724 + }, + { + "epoch": 0.11, + "learning_rate": 2.772378043136166e-05, + "loss": 5.1273, + "step": 9726 + }, + { + "epoch": 0.11, + "learning_rate": 2.7719152087383137e-05, + "loss": 5.8095, + "step": 9728 + }, + { + "epoch": 0.11, + "learning_rate": 2.7714523743404612e-05, + "loss": 6.819, + "step": 9730 + }, + { + "epoch": 0.11, + "learning_rate": 2.7709895399426088e-05, + "loss": 0.5044, + "step": 9732 + }, + { + "epoch": 0.11, + "learning_rate": 2.7705267055447563e-05, + "loss": 3.8536, + "step": 9734 + }, + { + "epoch": 0.11, + "learning_rate": 2.7700638711469042e-05, + "loss": 3.6736, + "step": 9736 + }, + { + "epoch": 0.11, + "learning_rate": 2.7696010367490517e-05, + "loss": 0.2552, + "step": 9738 + }, + { + "epoch": 0.11, + "learning_rate": 2.7691382023511993e-05, + "loss": 0.9061, + "step": 9740 + }, + { + "epoch": 0.11, + "learning_rate": 2.7686753679533468e-05, + "loss": 0.11, + "step": 9742 + }, + { + "epoch": 0.11, + "learning_rate": 2.7682125335554937e-05, + "loss": 3.1121, + "step": 9744 + }, + { + "epoch": 0.11, + "learning_rate": 2.7677496991576412e-05, + "loss": 0.852, + "step": 9746 + }, + { + "epoch": 0.11, + "learning_rate": 2.7672868647597888e-05, + "loss": 1.3867, + "step": 9748 + }, + { + "epoch": 0.11, + "learning_rate": 2.7668240303619363e-05, + "loss": 3.2169, + "step": 9750 + }, + { + "epoch": 0.11, + "learning_rate": 2.766361195964084e-05, + "loss": 3.408, + "step": 9752 + }, + { + "epoch": 0.11, + "learning_rate": 2.7658983615662314e-05, + "loss": 0.0163, + "step": 9754 + }, + { + "epoch": 0.11, + "learning_rate": 2.765435527168379e-05, + "loss": 3.2228, + "step": 9756 + }, + { + "epoch": 0.11, + "learning_rate": 2.7649726927705265e-05, + "loss": 0.6103, + "step": 9758 + }, + { + "epoch": 0.11, + "learning_rate": 2.764509858372674e-05, + "loss": 3.4852, + "step": 9760 + }, + { + "epoch": 0.11, + "learning_rate": 2.764047023974822e-05, + "loss": 3.7587, + "step": 9762 + }, + { + "epoch": 0.11, + "learning_rate": 2.7635841895769694e-05, + "loss": 2.9798, + "step": 9764 + }, + { + "epoch": 0.11, + "learning_rate": 2.763121355179117e-05, + "loss": 2.3992, + "step": 9766 + }, + { + "epoch": 0.11, + "learning_rate": 2.7626585207812645e-05, + "loss": 0.5459, + "step": 9768 + }, + { + "epoch": 0.11, + "learning_rate": 2.762195686383412e-05, + "loss": 4.9644, + "step": 9770 + }, + { + "epoch": 0.11, + "learning_rate": 2.7617328519855596e-05, + "loss": 2.9751, + "step": 9772 + }, + { + "epoch": 0.11, + "learning_rate": 2.761270017587707e-05, + "loss": 0.9456, + "step": 9774 + }, + { + "epoch": 0.11, + "learning_rate": 2.7608071831898547e-05, + "loss": 0.6434, + "step": 9776 + }, + { + "epoch": 0.11, + "learning_rate": 2.7603443487920022e-05, + "loss": 1.9834, + "step": 9778 + }, + { + "epoch": 0.11, + "learning_rate": 2.7598815143941498e-05, + "loss": 2.2792, + "step": 9780 + }, + { + "epoch": 0.11, + "learning_rate": 2.7594186799962973e-05, + "loss": 1.9386, + "step": 9782 + }, + { + "epoch": 0.11, + "learning_rate": 2.758955845598445e-05, + "loss": 1.0286, + "step": 9784 + }, + { + "epoch": 0.11, + "learning_rate": 2.7584930112005924e-05, + "loss": 1.9593, + "step": 9786 + }, + { + "epoch": 0.11, + "learning_rate": 2.75803017680274e-05, + "loss": 0.4412, + "step": 9788 + }, + { + "epoch": 0.11, + "learning_rate": 2.7575673424048875e-05, + "loss": 3.3144, + "step": 9790 + }, + { + "epoch": 0.11, + "learning_rate": 2.757104508007035e-05, + "loss": 1.1425, + "step": 9792 + }, + { + "epoch": 0.11, + "learning_rate": 2.7566416736091826e-05, + "loss": 2.6171, + "step": 9794 + }, + { + "epoch": 0.11, + "learning_rate": 2.75617883921133e-05, + "loss": 0.0107, + "step": 9796 + }, + { + "epoch": 0.11, + "learning_rate": 2.7557160048134777e-05, + "loss": 1.4387, + "step": 9798 + }, + { + "epoch": 0.11, + "learning_rate": 2.7552531704156252e-05, + "loss": 0.474, + "step": 9800 + }, + { + "epoch": 0.11, + "learning_rate": 2.754790336017773e-05, + "loss": 1.4048, + "step": 9802 + }, + { + "epoch": 0.11, + "learning_rate": 2.7543275016199206e-05, + "loss": 5.2465, + "step": 9804 + }, + { + "epoch": 0.11, + "learning_rate": 2.753864667222068e-05, + "loss": 1.7191, + "step": 9806 + }, + { + "epoch": 0.11, + "learning_rate": 2.7534018328242157e-05, + "loss": 1.6944, + "step": 9808 + }, + { + "epoch": 0.11, + "learning_rate": 2.7529389984263633e-05, + "loss": 0.2251, + "step": 9810 + }, + { + "epoch": 0.11, + "learning_rate": 2.7524761640285108e-05, + "loss": 1.6832, + "step": 9812 + }, + { + "epoch": 0.11, + "learning_rate": 2.7520133296306583e-05, + "loss": 1.275, + "step": 9814 + }, + { + "epoch": 0.11, + "learning_rate": 2.751550495232806e-05, + "loss": 2.7259, + "step": 9816 + }, + { + "epoch": 0.11, + "learning_rate": 2.7510876608349534e-05, + "loss": 3.7165, + "step": 9818 + }, + { + "epoch": 0.11, + "learning_rate": 2.750624826437101e-05, + "loss": 0.3067, + "step": 9820 + }, + { + "epoch": 0.11, + "learning_rate": 2.7501619920392485e-05, + "loss": 0.553, + "step": 9822 + }, + { + "epoch": 0.11, + "learning_rate": 2.749699157641396e-05, + "loss": 1.184, + "step": 9824 + }, + { + "epoch": 0.11, + "learning_rate": 2.7492363232435436e-05, + "loss": 4.8073, + "step": 9826 + }, + { + "epoch": 0.11, + "learning_rate": 2.748773488845691e-05, + "loss": 1.6833, + "step": 9828 + }, + { + "epoch": 0.11, + "learning_rate": 2.7483106544478387e-05, + "loss": 0.0097, + "step": 9830 + }, + { + "epoch": 0.11, + "learning_rate": 2.7478478200499862e-05, + "loss": 0.9655, + "step": 9832 + }, + { + "epoch": 0.11, + "learning_rate": 2.7473849856521338e-05, + "loss": 1.0422, + "step": 9834 + }, + { + "epoch": 0.11, + "learning_rate": 2.7469221512542813e-05, + "loss": 2.4773, + "step": 9836 + }, + { + "epoch": 0.11, + "learning_rate": 2.746459316856429e-05, + "loss": 2.4311, + "step": 9838 + }, + { + "epoch": 0.11, + "learning_rate": 2.7459964824585764e-05, + "loss": 0.0034, + "step": 9840 + }, + { + "epoch": 0.11, + "learning_rate": 2.7455336480607243e-05, + "loss": 4.1189, + "step": 9842 + }, + { + "epoch": 0.11, + "learning_rate": 2.7450708136628718e-05, + "loss": 3.2575, + "step": 9844 + }, + { + "epoch": 0.11, + "learning_rate": 2.7446079792650194e-05, + "loss": 3.8947, + "step": 9846 + }, + { + "epoch": 0.11, + "learning_rate": 2.744145144867167e-05, + "loss": 1.9695, + "step": 9848 + }, + { + "epoch": 0.11, + "learning_rate": 2.7436823104693144e-05, + "loss": 3.65, + "step": 9850 + }, + { + "epoch": 0.11, + "learning_rate": 2.743219476071462e-05, + "loss": 0.4789, + "step": 9852 + }, + { + "epoch": 0.11, + "learning_rate": 2.7427566416736095e-05, + "loss": 0.2113, + "step": 9854 + }, + { + "epoch": 0.11, + "learning_rate": 2.742293807275757e-05, + "loss": 4.6576, + "step": 9856 + }, + { + "epoch": 0.11, + "learning_rate": 2.7418309728779046e-05, + "loss": 0.9281, + "step": 9858 + }, + { + "epoch": 0.11, + "learning_rate": 2.741368138480052e-05, + "loss": 4.1118, + "step": 9860 + }, + { + "epoch": 0.11, + "learning_rate": 2.7409053040821997e-05, + "loss": 1.7478, + "step": 9862 + }, + { + "epoch": 0.11, + "learning_rate": 2.7404424696843472e-05, + "loss": 0.0012, + "step": 9864 + }, + { + "epoch": 0.11, + "learning_rate": 2.7399796352864948e-05, + "loss": 2.7279, + "step": 9866 + }, + { + "epoch": 0.11, + "learning_rate": 2.7395168008886423e-05, + "loss": 2.7854, + "step": 9868 + }, + { + "epoch": 0.11, + "learning_rate": 2.73905396649079e-05, + "loss": 1.2178, + "step": 9870 + }, + { + "epoch": 0.11, + "learning_rate": 2.7385911320929374e-05, + "loss": 4.0006, + "step": 9872 + }, + { + "epoch": 0.11, + "learning_rate": 2.738128297695085e-05, + "loss": 1.7411, + "step": 9874 + }, + { + "epoch": 0.11, + "learning_rate": 2.7376654632972325e-05, + "loss": 1.462, + "step": 9876 + }, + { + "epoch": 0.11, + "learning_rate": 2.73720262889938e-05, + "loss": 0.6637, + "step": 9878 + }, + { + "epoch": 0.11, + "learning_rate": 2.7367397945015276e-05, + "loss": 0.5077, + "step": 9880 + }, + { + "epoch": 0.11, + "learning_rate": 2.7362769601036755e-05, + "loss": 1.229, + "step": 9882 + }, + { + "epoch": 0.11, + "learning_rate": 2.735814125705823e-05, + "loss": 3.3321, + "step": 9884 + }, + { + "epoch": 0.11, + "learning_rate": 2.7353512913079705e-05, + "loss": 0.1546, + "step": 9886 + }, + { + "epoch": 0.11, + "learning_rate": 2.7348884569101174e-05, + "loss": 2.597, + "step": 9888 + }, + { + "epoch": 0.11, + "learning_rate": 2.734425622512265e-05, + "loss": 5.6346, + "step": 9890 + }, + { + "epoch": 0.11, + "learning_rate": 2.7339627881144125e-05, + "loss": 0.887, + "step": 9892 + }, + { + "epoch": 0.11, + "learning_rate": 2.73349995371656e-05, + "loss": 0.0004, + "step": 9894 + }, + { + "epoch": 0.11, + "learning_rate": 2.7330371193187076e-05, + "loss": 1.9226, + "step": 9896 + }, + { + "epoch": 0.11, + "learning_rate": 2.732574284920855e-05, + "loss": 2.2585, + "step": 9898 + }, + { + "epoch": 0.11, + "learning_rate": 2.7321114505230027e-05, + "loss": 4.8726, + "step": 9900 + }, + { + "epoch": 0.11, + "learning_rate": 2.7316486161251502e-05, + "loss": 8.3477, + "step": 9902 + }, + { + "epoch": 0.11, + "learning_rate": 2.7311857817272977e-05, + "loss": 0.0015, + "step": 9904 + }, + { + "epoch": 0.11, + "learning_rate": 2.7307229473294453e-05, + "loss": 1.7323, + "step": 9906 + }, + { + "epoch": 0.11, + "learning_rate": 2.7302601129315932e-05, + "loss": 2.5225, + "step": 9908 + }, + { + "epoch": 0.11, + "learning_rate": 2.7297972785337407e-05, + "loss": 3.0766, + "step": 9910 + }, + { + "epoch": 0.11, + "learning_rate": 2.7293344441358883e-05, + "loss": 1.5634, + "step": 9912 + }, + { + "epoch": 0.11, + "learning_rate": 2.7288716097380358e-05, + "loss": 1.568, + "step": 9914 + }, + { + "epoch": 0.11, + "learning_rate": 2.7284087753401833e-05, + "loss": 0.2167, + "step": 9916 + }, + { + "epoch": 0.11, + "learning_rate": 2.727945940942331e-05, + "loss": 2.3831, + "step": 9918 + }, + { + "epoch": 0.11, + "learning_rate": 2.7274831065444784e-05, + "loss": 0.6455, + "step": 9920 + }, + { + "epoch": 0.11, + "learning_rate": 2.727020272146626e-05, + "loss": 1.2424, + "step": 9922 + }, + { + "epoch": 0.11, + "learning_rate": 2.7265574377487735e-05, + "loss": 3.6001, + "step": 9924 + }, + { + "epoch": 0.11, + "learning_rate": 2.726094603350921e-05, + "loss": 1.0683, + "step": 9926 + }, + { + "epoch": 0.11, + "learning_rate": 2.7256317689530686e-05, + "loss": 2.2093, + "step": 9928 + }, + { + "epoch": 0.11, + "learning_rate": 2.725168934555216e-05, + "loss": 4.8441, + "step": 9930 + }, + { + "epoch": 0.11, + "learning_rate": 2.7247061001573637e-05, + "loss": 3.5826, + "step": 9932 + }, + { + "epoch": 0.11, + "learning_rate": 2.7242432657595112e-05, + "loss": 0.544, + "step": 9934 + }, + { + "epoch": 0.11, + "learning_rate": 2.7237804313616588e-05, + "loss": 1.5095, + "step": 9936 + }, + { + "epoch": 0.11, + "learning_rate": 2.7233175969638063e-05, + "loss": 1.7371, + "step": 9938 + }, + { + "epoch": 0.11, + "learning_rate": 2.722854762565954e-05, + "loss": 5.2835, + "step": 9940 + }, + { + "epoch": 0.11, + "learning_rate": 2.7223919281681014e-05, + "loss": 1.1688, + "step": 9942 + }, + { + "epoch": 0.11, + "learning_rate": 2.721929093770249e-05, + "loss": 6.0212, + "step": 9944 + }, + { + "epoch": 0.11, + "learning_rate": 2.7214662593723965e-05, + "loss": 3.1075, + "step": 9946 + }, + { + "epoch": 0.11, + "learning_rate": 2.721003424974544e-05, + "loss": 5.3276, + "step": 9948 + }, + { + "epoch": 0.11, + "learning_rate": 2.720540590576692e-05, + "loss": 0.0154, + "step": 9950 + }, + { + "epoch": 0.11, + "learning_rate": 2.7200777561788394e-05, + "loss": 2.2289, + "step": 9952 + }, + { + "epoch": 0.11, + "learning_rate": 2.719614921780987e-05, + "loss": 1.07, + "step": 9954 + }, + { + "epoch": 0.11, + "learning_rate": 2.7191520873831345e-05, + "loss": 0.3311, + "step": 9956 + }, + { + "epoch": 0.11, + "learning_rate": 2.718689252985282e-05, + "loss": 3.1961, + "step": 9958 + }, + { + "epoch": 0.11, + "learning_rate": 2.7182264185874296e-05, + "loss": 0.5719, + "step": 9960 + }, + { + "epoch": 0.11, + "learning_rate": 2.717763584189577e-05, + "loss": 2.1008, + "step": 9962 + }, + { + "epoch": 0.11, + "learning_rate": 2.7173007497917247e-05, + "loss": 6.252, + "step": 9964 + }, + { + "epoch": 0.11, + "learning_rate": 2.7168379153938722e-05, + "loss": 3.3756, + "step": 9966 + }, + { + "epoch": 0.11, + "learning_rate": 2.7163750809960198e-05, + "loss": 0.6846, + "step": 9968 + }, + { + "epoch": 0.11, + "learning_rate": 2.7159122465981673e-05, + "loss": 3.6695, + "step": 9970 + }, + { + "epoch": 0.11, + "learning_rate": 2.715449412200315e-05, + "loss": 2.0568, + "step": 9972 + }, + { + "epoch": 0.11, + "learning_rate": 2.7149865778024624e-05, + "loss": 0.5983, + "step": 9974 + }, + { + "epoch": 0.11, + "learning_rate": 2.71452374340461e-05, + "loss": 1.7116, + "step": 9976 + }, + { + "epoch": 0.11, + "learning_rate": 2.7140609090067575e-05, + "loss": 2.5836, + "step": 9978 + }, + { + "epoch": 0.11, + "learning_rate": 2.713598074608905e-05, + "loss": 1.2019, + "step": 9980 + }, + { + "epoch": 0.11, + "learning_rate": 2.7131352402110526e-05, + "loss": 0.7982, + "step": 9982 + }, + { + "epoch": 0.11, + "learning_rate": 2.7126724058132e-05, + "loss": 2.6695, + "step": 9984 + }, + { + "epoch": 0.12, + "learning_rate": 2.7122095714153477e-05, + "loss": 1.2676, + "step": 9986 + }, + { + "epoch": 0.12, + "learning_rate": 2.7117467370174952e-05, + "loss": 0.1504, + "step": 9988 + }, + { + "epoch": 0.12, + "learning_rate": 2.711283902619643e-05, + "loss": 4.8052, + "step": 9990 + }, + { + "epoch": 0.12, + "learning_rate": 2.7108210682217906e-05, + "loss": 2.5771, + "step": 9992 + }, + { + "epoch": 0.12, + "learning_rate": 2.7103582338239382e-05, + "loss": 0.0068, + "step": 9994 + }, + { + "epoch": 0.12, + "learning_rate": 2.7098953994260857e-05, + "loss": 1.5693, + "step": 9996 + }, + { + "epoch": 0.12, + "learning_rate": 2.7094325650282333e-05, + "loss": 2.9367, + "step": 9998 + }, + { + "epoch": 0.12, + "learning_rate": 2.7089697306303808e-05, + "loss": 5.1741, + "step": 10000 + }, + { + "epoch": 0.12, + "learning_rate": 2.7085068962325283e-05, + "loss": 2.7252, + "step": 10002 + }, + { + "epoch": 0.12, + "learning_rate": 2.708044061834676e-05, + "loss": 3.5361, + "step": 10004 + }, + { + "epoch": 0.12, + "learning_rate": 2.7075812274368234e-05, + "loss": 1.2624, + "step": 10006 + }, + { + "epoch": 0.12, + "learning_rate": 2.707118393038971e-05, + "loss": 3.4286, + "step": 10008 + }, + { + "epoch": 0.12, + "learning_rate": 2.7066555586411185e-05, + "loss": 1.152, + "step": 10010 + }, + { + "epoch": 0.12, + "learning_rate": 2.706192724243266e-05, + "loss": 0.0743, + "step": 10012 + }, + { + "epoch": 0.12, + "learning_rate": 2.7057298898454136e-05, + "loss": 1.5451, + "step": 10014 + }, + { + "epoch": 0.12, + "learning_rate": 2.705267055447561e-05, + "loss": 0.0262, + "step": 10016 + }, + { + "epoch": 0.12, + "learning_rate": 2.7048042210497087e-05, + "loss": 2.2949, + "step": 10018 + }, + { + "epoch": 0.12, + "learning_rate": 2.7043413866518562e-05, + "loss": 5.0104, + "step": 10020 + }, + { + "epoch": 0.12, + "learning_rate": 2.7038785522540038e-05, + "loss": 1.6818, + "step": 10022 + }, + { + "epoch": 0.12, + "learning_rate": 2.7034157178561513e-05, + "loss": 0.0146, + "step": 10024 + }, + { + "epoch": 0.12, + "learning_rate": 2.702952883458299e-05, + "loss": 4.5547, + "step": 10026 + }, + { + "epoch": 0.12, + "learning_rate": 2.7024900490604464e-05, + "loss": 1.7554, + "step": 10028 + }, + { + "epoch": 0.12, + "learning_rate": 2.7020272146625943e-05, + "loss": 1.0651, + "step": 10030 + }, + { + "epoch": 0.12, + "learning_rate": 2.7015643802647418e-05, + "loss": 2.2288, + "step": 10032 + }, + { + "epoch": 0.12, + "learning_rate": 2.7011015458668887e-05, + "loss": 1.3415, + "step": 10034 + }, + { + "epoch": 0.12, + "learning_rate": 2.7006387114690362e-05, + "loss": 3.5655, + "step": 10036 + }, + { + "epoch": 0.12, + "learning_rate": 2.7001758770711838e-05, + "loss": 0.9472, + "step": 10038 + }, + { + "epoch": 0.12, + "learning_rate": 2.6997130426733313e-05, + "loss": 2.5263, + "step": 10040 + }, + { + "epoch": 0.12, + "learning_rate": 2.699250208275479e-05, + "loss": 2.1224, + "step": 10042 + }, + { + "epoch": 0.12, + "learning_rate": 2.6987873738776264e-05, + "loss": 0.3059, + "step": 10044 + }, + { + "epoch": 0.12, + "learning_rate": 2.698324539479774e-05, + "loss": 0.0036, + "step": 10046 + }, + { + "epoch": 0.12, + "learning_rate": 2.6978617050819215e-05, + "loss": 0.1746, + "step": 10048 + }, + { + "epoch": 0.12, + "learning_rate": 2.697398870684069e-05, + "loss": 1.8547, + "step": 10050 + }, + { + "epoch": 0.12, + "learning_rate": 2.6969360362862166e-05, + "loss": 0.7831, + "step": 10052 + }, + { + "epoch": 0.12, + "learning_rate": 2.696473201888364e-05, + "loss": 4.4959, + "step": 10054 + }, + { + "epoch": 0.12, + "learning_rate": 2.696010367490512e-05, + "loss": 0.1684, + "step": 10056 + }, + { + "epoch": 0.12, + "learning_rate": 2.6955475330926595e-05, + "loss": 1.3188, + "step": 10058 + }, + { + "epoch": 0.12, + "learning_rate": 2.695084698694807e-05, + "loss": 2.7311, + "step": 10060 + }, + { + "epoch": 0.12, + "learning_rate": 2.6946218642969546e-05, + "loss": 3.0088, + "step": 10062 + }, + { + "epoch": 0.12, + "learning_rate": 2.694159029899102e-05, + "loss": 2.102, + "step": 10064 + }, + { + "epoch": 0.12, + "learning_rate": 2.6936961955012497e-05, + "loss": 1.3942, + "step": 10066 + }, + { + "epoch": 0.12, + "learning_rate": 2.6932333611033972e-05, + "loss": 1.4054, + "step": 10068 + }, + { + "epoch": 0.12, + "learning_rate": 2.6927705267055448e-05, + "loss": 1.7755, + "step": 10070 + }, + { + "epoch": 0.12, + "learning_rate": 2.6923076923076923e-05, + "loss": 0.9476, + "step": 10072 + }, + { + "epoch": 0.12, + "learning_rate": 2.69184485790984e-05, + "loss": 0.2252, + "step": 10074 + }, + { + "epoch": 0.12, + "learning_rate": 2.6913820235119874e-05, + "loss": 2.3343, + "step": 10076 + }, + { + "epoch": 0.12, + "learning_rate": 2.690919189114135e-05, + "loss": 2.2354, + "step": 10078 + }, + { + "epoch": 0.12, + "learning_rate": 2.6904563547162825e-05, + "loss": 1.7886, + "step": 10080 + }, + { + "epoch": 0.12, + "learning_rate": 2.68999352031843e-05, + "loss": 1.6227, + "step": 10082 + }, + { + "epoch": 0.12, + "learning_rate": 2.6895306859205776e-05, + "loss": 2.278, + "step": 10084 + }, + { + "epoch": 0.12, + "learning_rate": 2.689067851522725e-05, + "loss": 1.4469, + "step": 10086 + }, + { + "epoch": 0.12, + "learning_rate": 2.6886050171248727e-05, + "loss": 2.5364, + "step": 10088 + }, + { + "epoch": 0.12, + "learning_rate": 2.6881421827270202e-05, + "loss": 0.8596, + "step": 10090 + }, + { + "epoch": 0.12, + "learning_rate": 2.6876793483291678e-05, + "loss": 1.5537, + "step": 10092 + }, + { + "epoch": 0.12, + "learning_rate": 2.6872165139313153e-05, + "loss": 1.575, + "step": 10094 + }, + { + "epoch": 0.12, + "learning_rate": 2.6867536795334632e-05, + "loss": 1.0405, + "step": 10096 + }, + { + "epoch": 0.12, + "learning_rate": 2.6862908451356107e-05, + "loss": 2.3742, + "step": 10098 + }, + { + "epoch": 0.12, + "learning_rate": 2.6858280107377583e-05, + "loss": 2.3216, + "step": 10100 + }, + { + "epoch": 0.12, + "learning_rate": 2.6853651763399058e-05, + "loss": 2.5607, + "step": 10102 + }, + { + "epoch": 0.12, + "learning_rate": 2.6849023419420534e-05, + "loss": 1.0264, + "step": 10104 + }, + { + "epoch": 0.12, + "learning_rate": 2.684439507544201e-05, + "loss": 0.7691, + "step": 10106 + }, + { + "epoch": 0.12, + "learning_rate": 2.6839766731463484e-05, + "loss": 0.9071, + "step": 10108 + }, + { + "epoch": 0.12, + "learning_rate": 2.683513838748496e-05, + "loss": 4.424, + "step": 10110 + }, + { + "epoch": 0.12, + "learning_rate": 2.6830510043506435e-05, + "loss": 0.4335, + "step": 10112 + }, + { + "epoch": 0.12, + "learning_rate": 2.682588169952791e-05, + "loss": 0.2197, + "step": 10114 + }, + { + "epoch": 0.12, + "learning_rate": 2.6821253355549386e-05, + "loss": 2.2308, + "step": 10116 + }, + { + "epoch": 0.12, + "learning_rate": 2.681662501157086e-05, + "loss": 2.1068, + "step": 10118 + }, + { + "epoch": 0.12, + "learning_rate": 2.6811996667592337e-05, + "loss": 4.6484, + "step": 10120 + }, + { + "epoch": 0.12, + "learning_rate": 2.6807368323613812e-05, + "loss": 4.2568, + "step": 10122 + }, + { + "epoch": 0.12, + "learning_rate": 2.6802739979635288e-05, + "loss": 4.9319, + "step": 10124 + }, + { + "epoch": 0.12, + "learning_rate": 2.6798111635656763e-05, + "loss": 9.7115, + "step": 10126 + }, + { + "epoch": 0.12, + "learning_rate": 2.679348329167824e-05, + "loss": 0.0059, + "step": 10128 + }, + { + "epoch": 0.12, + "learning_rate": 2.6788854947699714e-05, + "loss": 1.317, + "step": 10130 + }, + { + "epoch": 0.12, + "learning_rate": 2.678422660372119e-05, + "loss": 6.1077, + "step": 10132 + }, + { + "epoch": 0.12, + "learning_rate": 2.6779598259742665e-05, + "loss": 4.0945, + "step": 10134 + }, + { + "epoch": 0.12, + "learning_rate": 2.6774969915764144e-05, + "loss": 0.5854, + "step": 10136 + }, + { + "epoch": 0.12, + "learning_rate": 2.677034157178562e-05, + "loss": 1.2135, + "step": 10138 + }, + { + "epoch": 0.12, + "learning_rate": 2.6765713227807095e-05, + "loss": 0.8805, + "step": 10140 + }, + { + "epoch": 0.12, + "learning_rate": 2.676108488382857e-05, + "loss": 1.7084, + "step": 10142 + }, + { + "epoch": 0.12, + "learning_rate": 2.6756456539850045e-05, + "loss": 0.8596, + "step": 10144 + }, + { + "epoch": 0.12, + "learning_rate": 2.675182819587152e-05, + "loss": 0.0085, + "step": 10146 + }, + { + "epoch": 0.12, + "learning_rate": 2.6747199851892996e-05, + "loss": 1.4268, + "step": 10148 + }, + { + "epoch": 0.12, + "learning_rate": 2.674257150791447e-05, + "loss": 0.2733, + "step": 10150 + }, + { + "epoch": 0.12, + "learning_rate": 2.6737943163935947e-05, + "loss": 7.3822, + "step": 10152 + }, + { + "epoch": 0.12, + "learning_rate": 2.6733314819957423e-05, + "loss": 6.5193, + "step": 10154 + }, + { + "epoch": 0.12, + "learning_rate": 2.6728686475978898e-05, + "loss": 0.9149, + "step": 10156 + }, + { + "epoch": 0.12, + "learning_rate": 2.6724058132000373e-05, + "loss": 5.5151, + "step": 10158 + }, + { + "epoch": 0.12, + "learning_rate": 2.671942978802185e-05, + "loss": 2.9135, + "step": 10160 + }, + { + "epoch": 0.12, + "learning_rate": 2.6714801444043324e-05, + "loss": 1.7214, + "step": 10162 + }, + { + "epoch": 0.12, + "learning_rate": 2.67101731000648e-05, + "loss": 0.4486, + "step": 10164 + }, + { + "epoch": 0.12, + "learning_rate": 2.6705544756086275e-05, + "loss": 0.1564, + "step": 10166 + }, + { + "epoch": 0.12, + "learning_rate": 2.670091641210775e-05, + "loss": 0.6106, + "step": 10168 + }, + { + "epoch": 0.12, + "learning_rate": 2.6696288068129226e-05, + "loss": 1.3377, + "step": 10170 + }, + { + "epoch": 0.12, + "learning_rate": 2.66916597241507e-05, + "loss": 1.9144, + "step": 10172 + }, + { + "epoch": 0.12, + "learning_rate": 2.6687031380172177e-05, + "loss": 0.0046, + "step": 10174 + }, + { + "epoch": 0.12, + "learning_rate": 2.6682403036193656e-05, + "loss": 0.8383, + "step": 10176 + }, + { + "epoch": 0.12, + "learning_rate": 2.6677774692215124e-05, + "loss": 1.1476, + "step": 10178 + }, + { + "epoch": 0.12, + "learning_rate": 2.66731463482366e-05, + "loss": 3.8101, + "step": 10180 + }, + { + "epoch": 0.12, + "learning_rate": 2.6668518004258075e-05, + "loss": 4.6926, + "step": 10182 + }, + { + "epoch": 0.12, + "learning_rate": 2.666388966027955e-05, + "loss": 1.6734, + "step": 10184 + }, + { + "epoch": 0.12, + "learning_rate": 2.6659261316301026e-05, + "loss": 0.1078, + "step": 10186 + }, + { + "epoch": 0.12, + "learning_rate": 2.66546329723225e-05, + "loss": 0.0044, + "step": 10188 + }, + { + "epoch": 0.12, + "learning_rate": 2.6650004628343977e-05, + "loss": 2.0899, + "step": 10190 + }, + { + "epoch": 0.12, + "learning_rate": 2.6645376284365452e-05, + "loss": 0.061, + "step": 10192 + }, + { + "epoch": 0.12, + "learning_rate": 2.6640747940386928e-05, + "loss": 5.3708, + "step": 10194 + }, + { + "epoch": 0.12, + "learning_rate": 2.6636119596408403e-05, + "loss": 0.4772, + "step": 10196 + }, + { + "epoch": 0.12, + "learning_rate": 2.663149125242988e-05, + "loss": 0.3239, + "step": 10198 + }, + { + "epoch": 0.12, + "learning_rate": 2.6626862908451354e-05, + "loss": 1.8412, + "step": 10200 + }, + { + "epoch": 0.12, + "learning_rate": 2.6622234564472833e-05, + "loss": 0.2176, + "step": 10202 + }, + { + "epoch": 0.12, + "learning_rate": 2.6617606220494308e-05, + "loss": 0.0006, + "step": 10204 + }, + { + "epoch": 0.12, + "learning_rate": 2.6612977876515784e-05, + "loss": 0.0023, + "step": 10206 + }, + { + "epoch": 0.12, + "learning_rate": 2.660834953253726e-05, + "loss": 0.3058, + "step": 10208 + }, + { + "epoch": 0.12, + "learning_rate": 2.6603721188558734e-05, + "loss": 2.0413, + "step": 10210 + }, + { + "epoch": 0.12, + "learning_rate": 2.659909284458021e-05, + "loss": 6.2956, + "step": 10212 + }, + { + "epoch": 0.12, + "learning_rate": 2.6594464500601685e-05, + "loss": 1.2816, + "step": 10214 + }, + { + "epoch": 0.12, + "learning_rate": 2.658983615662316e-05, + "loss": 1.4848, + "step": 10216 + }, + { + "epoch": 0.12, + "learning_rate": 2.6585207812644636e-05, + "loss": 0.0694, + "step": 10218 + }, + { + "epoch": 0.12, + "learning_rate": 2.658057946866611e-05, + "loss": 6.6251, + "step": 10220 + }, + { + "epoch": 0.12, + "learning_rate": 2.6575951124687587e-05, + "loss": 0.1252, + "step": 10222 + }, + { + "epoch": 0.12, + "learning_rate": 2.6571322780709062e-05, + "loss": 0.9605, + "step": 10224 + }, + { + "epoch": 0.12, + "learning_rate": 2.6566694436730538e-05, + "loss": 0.0045, + "step": 10226 + }, + { + "epoch": 0.12, + "learning_rate": 2.6562066092752013e-05, + "loss": 8.2282, + "step": 10228 + }, + { + "epoch": 0.12, + "learning_rate": 2.655743774877349e-05, + "loss": 4.4762, + "step": 10230 + }, + { + "epoch": 0.12, + "learning_rate": 2.6552809404794964e-05, + "loss": 0.3105, + "step": 10232 + }, + { + "epoch": 0.12, + "learning_rate": 2.654818106081644e-05, + "loss": 0.0005, + "step": 10234 + }, + { + "epoch": 0.12, + "learning_rate": 2.6543552716837915e-05, + "loss": 0.135, + "step": 10236 + }, + { + "epoch": 0.12, + "learning_rate": 2.653892437285939e-05, + "loss": 0.3372, + "step": 10238 + }, + { + "epoch": 0.12, + "learning_rate": 2.6534296028880866e-05, + "loss": 8.881, + "step": 10240 + }, + { + "epoch": 0.12, + "learning_rate": 2.6529667684902345e-05, + "loss": 3.5332, + "step": 10242 + }, + { + "epoch": 0.12, + "learning_rate": 2.652503934092382e-05, + "loss": 4.2912, + "step": 10244 + }, + { + "epoch": 0.12, + "learning_rate": 2.6520410996945295e-05, + "loss": 0.5557, + "step": 10246 + }, + { + "epoch": 0.12, + "learning_rate": 2.651578265296677e-05, + "loss": 2.1343, + "step": 10248 + }, + { + "epoch": 0.12, + "learning_rate": 2.6511154308988246e-05, + "loss": 4.5437, + "step": 10250 + }, + { + "epoch": 0.12, + "learning_rate": 2.6506525965009722e-05, + "loss": 7.403, + "step": 10252 + }, + { + "epoch": 0.12, + "learning_rate": 2.6501897621031197e-05, + "loss": 0.3731, + "step": 10254 + }, + { + "epoch": 0.12, + "learning_rate": 2.6497269277052673e-05, + "loss": 0.4076, + "step": 10256 + }, + { + "epoch": 0.12, + "learning_rate": 2.6492640933074148e-05, + "loss": 0.5089, + "step": 10258 + }, + { + "epoch": 0.12, + "learning_rate": 2.6488012589095623e-05, + "loss": 1.3773, + "step": 10260 + }, + { + "epoch": 0.12, + "learning_rate": 2.64833842451171e-05, + "loss": 0.8359, + "step": 10262 + }, + { + "epoch": 0.12, + "learning_rate": 2.6478755901138574e-05, + "loss": 0.0019, + "step": 10264 + }, + { + "epoch": 0.12, + "learning_rate": 2.647412755716005e-05, + "loss": 2.3594, + "step": 10266 + }, + { + "epoch": 0.12, + "learning_rate": 2.6469499213181525e-05, + "loss": 2.0376, + "step": 10268 + }, + { + "epoch": 0.12, + "learning_rate": 2.6464870869203e-05, + "loss": 0.1366, + "step": 10270 + }, + { + "epoch": 0.12, + "learning_rate": 2.6460242525224476e-05, + "loss": 4.2354, + "step": 10272 + }, + { + "epoch": 0.12, + "learning_rate": 2.645561418124595e-05, + "loss": 5.1587, + "step": 10274 + }, + { + "epoch": 0.12, + "learning_rate": 2.6450985837267427e-05, + "loss": 3.3224, + "step": 10276 + }, + { + "epoch": 0.12, + "learning_rate": 2.6446357493288902e-05, + "loss": 0.055, + "step": 10278 + }, + { + "epoch": 0.12, + "learning_rate": 2.6441729149310378e-05, + "loss": 0.0662, + "step": 10280 + }, + { + "epoch": 0.12, + "learning_rate": 2.6437100805331853e-05, + "loss": 3.1553, + "step": 10282 + }, + { + "epoch": 0.12, + "learning_rate": 2.6432472461353332e-05, + "loss": 2.8004, + "step": 10284 + }, + { + "epoch": 0.12, + "learning_rate": 2.6427844117374807e-05, + "loss": 1.2614, + "step": 10286 + }, + { + "epoch": 0.12, + "learning_rate": 2.6423215773396283e-05, + "loss": 2.2809, + "step": 10288 + }, + { + "epoch": 0.12, + "learning_rate": 2.6418587429417758e-05, + "loss": 2.2818, + "step": 10290 + }, + { + "epoch": 0.12, + "learning_rate": 2.6413959085439234e-05, + "loss": 1.3169, + "step": 10292 + }, + { + "epoch": 0.12, + "learning_rate": 2.640933074146071e-05, + "loss": 2.6348, + "step": 10294 + }, + { + "epoch": 0.12, + "learning_rate": 2.6404702397482184e-05, + "loss": 1.548, + "step": 10296 + }, + { + "epoch": 0.12, + "learning_rate": 2.640007405350366e-05, + "loss": 6.3316, + "step": 10298 + }, + { + "epoch": 0.12, + "learning_rate": 2.6395445709525135e-05, + "loss": 0.0034, + "step": 10300 + }, + { + "epoch": 0.12, + "learning_rate": 2.639081736554661e-05, + "loss": 0.0042, + "step": 10302 + }, + { + "epoch": 0.12, + "learning_rate": 2.6386189021568086e-05, + "loss": 2.9232, + "step": 10304 + }, + { + "epoch": 0.12, + "learning_rate": 2.638156067758956e-05, + "loss": 1.1083, + "step": 10306 + }, + { + "epoch": 0.12, + "learning_rate": 2.6376932333611037e-05, + "loss": 0.0003, + "step": 10308 + }, + { + "epoch": 0.12, + "learning_rate": 2.6372303989632512e-05, + "loss": 1.7112, + "step": 10310 + }, + { + "epoch": 0.12, + "learning_rate": 2.6367675645653988e-05, + "loss": 1.4493, + "step": 10312 + }, + { + "epoch": 0.12, + "learning_rate": 2.6363047301675463e-05, + "loss": 5.4348, + "step": 10314 + }, + { + "epoch": 0.12, + "learning_rate": 2.635841895769694e-05, + "loss": 5.6853, + "step": 10316 + }, + { + "epoch": 0.12, + "learning_rate": 2.6353790613718414e-05, + "loss": 3.2101, + "step": 10318 + }, + { + "epoch": 0.12, + "learning_rate": 2.634916226973989e-05, + "loss": 0.0022, + "step": 10320 + }, + { + "epoch": 0.12, + "learning_rate": 2.6344533925761365e-05, + "loss": 6.6323, + "step": 10322 + }, + { + "epoch": 0.12, + "learning_rate": 2.6339905581782837e-05, + "loss": 1.1668, + "step": 10324 + }, + { + "epoch": 0.12, + "learning_rate": 2.6335277237804312e-05, + "loss": 2.2497, + "step": 10326 + }, + { + "epoch": 0.12, + "learning_rate": 2.6330648893825788e-05, + "loss": 1.3288, + "step": 10328 + }, + { + "epoch": 0.12, + "learning_rate": 2.6326020549847263e-05, + "loss": 2.636, + "step": 10330 + }, + { + "epoch": 0.12, + "learning_rate": 2.632139220586874e-05, + "loss": 3.8662, + "step": 10332 + }, + { + "epoch": 0.12, + "learning_rate": 2.6316763861890214e-05, + "loss": 0.5036, + "step": 10334 + }, + { + "epoch": 0.12, + "learning_rate": 2.631213551791169e-05, + "loss": 3.8454, + "step": 10336 + }, + { + "epoch": 0.12, + "learning_rate": 2.6307507173933165e-05, + "loss": 0.9342, + "step": 10338 + }, + { + "epoch": 0.12, + "learning_rate": 2.630287882995464e-05, + "loss": 0.238, + "step": 10340 + }, + { + "epoch": 0.12, + "learning_rate": 2.6298250485976116e-05, + "loss": 3.8007, + "step": 10342 + }, + { + "epoch": 0.12, + "learning_rate": 2.629362214199759e-05, + "loss": 0.8698, + "step": 10344 + }, + { + "epoch": 0.12, + "learning_rate": 2.6288993798019067e-05, + "loss": 5.4808, + "step": 10346 + }, + { + "epoch": 0.12, + "learning_rate": 2.6284365454040542e-05, + "loss": 3.9051, + "step": 10348 + }, + { + "epoch": 0.12, + "learning_rate": 2.627973711006202e-05, + "loss": 2.653, + "step": 10350 + }, + { + "epoch": 0.12, + "learning_rate": 2.6275108766083496e-05, + "loss": 2.5757, + "step": 10352 + }, + { + "epoch": 0.12, + "learning_rate": 2.6270480422104972e-05, + "loss": 3.8571, + "step": 10354 + }, + { + "epoch": 0.12, + "learning_rate": 2.6265852078126447e-05, + "loss": 2.7164, + "step": 10356 + }, + { + "epoch": 0.12, + "learning_rate": 2.6261223734147923e-05, + "loss": 1.8354, + "step": 10358 + }, + { + "epoch": 0.12, + "learning_rate": 2.6256595390169398e-05, + "loss": 0.7843, + "step": 10360 + }, + { + "epoch": 0.12, + "learning_rate": 2.6251967046190873e-05, + "loss": 3.1618, + "step": 10362 + }, + { + "epoch": 0.12, + "learning_rate": 2.624733870221235e-05, + "loss": 1.8528, + "step": 10364 + }, + { + "epoch": 0.12, + "learning_rate": 2.6242710358233824e-05, + "loss": 1.1338, + "step": 10366 + }, + { + "epoch": 0.12, + "learning_rate": 2.62380820142553e-05, + "loss": 2.4401, + "step": 10368 + }, + { + "epoch": 0.12, + "learning_rate": 2.6233453670276775e-05, + "loss": 3.8264, + "step": 10370 + }, + { + "epoch": 0.12, + "learning_rate": 2.622882532629825e-05, + "loss": 1.6514, + "step": 10372 + }, + { + "epoch": 0.12, + "learning_rate": 2.6224196982319726e-05, + "loss": 1.358, + "step": 10374 + }, + { + "epoch": 0.12, + "learning_rate": 2.62195686383412e-05, + "loss": 1.4559, + "step": 10376 + }, + { + "epoch": 0.12, + "learning_rate": 2.6214940294362677e-05, + "loss": 1.0523, + "step": 10378 + }, + { + "epoch": 0.12, + "learning_rate": 2.6210311950384152e-05, + "loss": 0.5671, + "step": 10380 + }, + { + "epoch": 0.12, + "learning_rate": 2.6205683606405628e-05, + "loss": 1.2218, + "step": 10382 + }, + { + "epoch": 0.12, + "learning_rate": 2.6201055262427103e-05, + "loss": 0.9899, + "step": 10384 + }, + { + "epoch": 0.12, + "learning_rate": 2.619642691844858e-05, + "loss": 0.5512, + "step": 10386 + }, + { + "epoch": 0.12, + "learning_rate": 2.6191798574470054e-05, + "loss": 0.9641, + "step": 10388 + }, + { + "epoch": 0.12, + "learning_rate": 2.6187170230491533e-05, + "loss": 1.1174, + "step": 10390 + }, + { + "epoch": 0.12, + "learning_rate": 2.6182541886513008e-05, + "loss": 0.0414, + "step": 10392 + }, + { + "epoch": 0.12, + "learning_rate": 2.6177913542534484e-05, + "loss": 1.4995, + "step": 10394 + }, + { + "epoch": 0.12, + "learning_rate": 2.617328519855596e-05, + "loss": 5.1025, + "step": 10396 + }, + { + "epoch": 0.12, + "learning_rate": 2.6168656854577434e-05, + "loss": 0.9862, + "step": 10398 + }, + { + "epoch": 0.12, + "learning_rate": 2.616402851059891e-05, + "loss": 1.1396, + "step": 10400 + }, + { + "epoch": 0.12, + "learning_rate": 2.6159400166620385e-05, + "loss": 2.5759, + "step": 10402 + }, + { + "epoch": 0.12, + "learning_rate": 2.615477182264186e-05, + "loss": 1.1888, + "step": 10404 + }, + { + "epoch": 0.12, + "learning_rate": 2.6150143478663336e-05, + "loss": 0.9193, + "step": 10406 + }, + { + "epoch": 0.12, + "learning_rate": 2.614551513468481e-05, + "loss": 0.0948, + "step": 10408 + }, + { + "epoch": 0.12, + "learning_rate": 2.6140886790706287e-05, + "loss": 0.8643, + "step": 10410 + }, + { + "epoch": 0.12, + "learning_rate": 2.6136258446727762e-05, + "loss": 6.166, + "step": 10412 + }, + { + "epoch": 0.12, + "learning_rate": 2.6131630102749238e-05, + "loss": 2.3391, + "step": 10414 + }, + { + "epoch": 0.12, + "learning_rate": 2.6127001758770713e-05, + "loss": 1.7891, + "step": 10416 + }, + { + "epoch": 0.12, + "learning_rate": 2.612237341479219e-05, + "loss": 1.506, + "step": 10418 + }, + { + "epoch": 0.12, + "learning_rate": 2.6117745070813664e-05, + "loss": 1.6155, + "step": 10420 + }, + { + "epoch": 0.12, + "learning_rate": 2.611311672683514e-05, + "loss": 0.9201, + "step": 10422 + }, + { + "epoch": 0.12, + "learning_rate": 2.6108488382856615e-05, + "loss": 0.017, + "step": 10424 + }, + { + "epoch": 0.12, + "learning_rate": 2.610386003887809e-05, + "loss": 0.1159, + "step": 10426 + }, + { + "epoch": 0.12, + "learning_rate": 2.6099231694899566e-05, + "loss": 2.1721, + "step": 10428 + }, + { + "epoch": 0.12, + "learning_rate": 2.6094603350921045e-05, + "loss": 1.33, + "step": 10430 + }, + { + "epoch": 0.12, + "learning_rate": 2.608997500694252e-05, + "loss": 5.2398, + "step": 10432 + }, + { + "epoch": 0.12, + "learning_rate": 2.6085346662963996e-05, + "loss": 0.7176, + "step": 10434 + }, + { + "epoch": 0.12, + "learning_rate": 2.608071831898547e-05, + "loss": 3.5588, + "step": 10436 + }, + { + "epoch": 0.12, + "learning_rate": 2.6076089975006946e-05, + "loss": 6.9225, + "step": 10438 + }, + { + "epoch": 0.12, + "learning_rate": 2.6071461631028422e-05, + "loss": 2.9403, + "step": 10440 + }, + { + "epoch": 0.12, + "learning_rate": 2.6066833287049897e-05, + "loss": 0.9541, + "step": 10442 + }, + { + "epoch": 0.12, + "learning_rate": 2.6062204943071373e-05, + "loss": 0.554, + "step": 10444 + }, + { + "epoch": 0.12, + "learning_rate": 2.6057576599092848e-05, + "loss": 1.3721, + "step": 10446 + }, + { + "epoch": 0.12, + "learning_rate": 2.6052948255114323e-05, + "loss": 1.1349, + "step": 10448 + }, + { + "epoch": 0.12, + "learning_rate": 2.60483199111358e-05, + "loss": 0.9258, + "step": 10450 + }, + { + "epoch": 0.12, + "learning_rate": 2.6043691567157274e-05, + "loss": 3.2953, + "step": 10452 + }, + { + "epoch": 0.12, + "learning_rate": 2.603906322317875e-05, + "loss": 0.8485, + "step": 10454 + }, + { + "epoch": 0.12, + "learning_rate": 2.6034434879200225e-05, + "loss": 0.2823, + "step": 10456 + }, + { + "epoch": 0.12, + "learning_rate": 2.60298065352217e-05, + "loss": 0.8751, + "step": 10458 + }, + { + "epoch": 0.12, + "learning_rate": 2.6025178191243176e-05, + "loss": 4.899, + "step": 10460 + }, + { + "epoch": 0.12, + "learning_rate": 2.602054984726465e-05, + "loss": 0.2176, + "step": 10462 + }, + { + "epoch": 0.12, + "learning_rate": 2.6015921503286127e-05, + "loss": 1.6826, + "step": 10464 + }, + { + "epoch": 0.12, + "learning_rate": 2.6011293159307602e-05, + "loss": 0.3138, + "step": 10466 + }, + { + "epoch": 0.12, + "learning_rate": 2.6006664815329074e-05, + "loss": 2.7041, + "step": 10468 + }, + { + "epoch": 0.12, + "learning_rate": 2.600203647135055e-05, + "loss": 6.2093, + "step": 10470 + }, + { + "epoch": 0.12, + "learning_rate": 2.5997408127372025e-05, + "loss": 0.0022, + "step": 10472 + }, + { + "epoch": 0.12, + "learning_rate": 2.59927797833935e-05, + "loss": 2.2286, + "step": 10474 + }, + { + "epoch": 0.12, + "learning_rate": 2.5988151439414976e-05, + "loss": 6.6755, + "step": 10476 + }, + { + "epoch": 0.12, + "learning_rate": 2.598352309543645e-05, + "loss": 4.358, + "step": 10478 + }, + { + "epoch": 0.12, + "learning_rate": 2.5978894751457927e-05, + "loss": 4.1574, + "step": 10480 + }, + { + "epoch": 0.12, + "learning_rate": 2.5974266407479402e-05, + "loss": 0.7281, + "step": 10482 + }, + { + "epoch": 0.12, + "learning_rate": 2.5969638063500878e-05, + "loss": 5.0903, + "step": 10484 + }, + { + "epoch": 0.12, + "learning_rate": 2.5965009719522353e-05, + "loss": 2.4332, + "step": 10486 + }, + { + "epoch": 0.12, + "learning_rate": 2.596038137554383e-05, + "loss": 0.0004, + "step": 10488 + }, + { + "epoch": 0.12, + "learning_rate": 2.5955753031565304e-05, + "loss": 0.0006, + "step": 10490 + }, + { + "epoch": 0.12, + "learning_rate": 2.595112468758678e-05, + "loss": 0.6822, + "step": 10492 + }, + { + "epoch": 0.12, + "learning_rate": 2.5946496343608255e-05, + "loss": 1.2764, + "step": 10494 + }, + { + "epoch": 0.12, + "learning_rate": 2.5941867999629734e-05, + "loss": 2.136, + "step": 10496 + }, + { + "epoch": 0.12, + "learning_rate": 2.593723965565121e-05, + "loss": 0.0312, + "step": 10498 + }, + { + "epoch": 0.12, + "learning_rate": 2.5932611311672685e-05, + "loss": 0.4909, + "step": 10500 + }, + { + "epoch": 0.12, + "learning_rate": 2.592798296769416e-05, + "loss": 1.482, + "step": 10502 + }, + { + "epoch": 0.12, + "learning_rate": 2.5923354623715635e-05, + "loss": 1.1747, + "step": 10504 + }, + { + "epoch": 0.12, + "learning_rate": 2.591872627973711e-05, + "loss": 3.5803, + "step": 10506 + }, + { + "epoch": 0.12, + "learning_rate": 2.5914097935758586e-05, + "loss": 3.5772, + "step": 10508 + }, + { + "epoch": 0.12, + "learning_rate": 2.590946959178006e-05, + "loss": 1.9199, + "step": 10510 + }, + { + "epoch": 0.12, + "learning_rate": 2.5904841247801537e-05, + "loss": 0.4333, + "step": 10512 + }, + { + "epoch": 0.12, + "learning_rate": 2.5900212903823012e-05, + "loss": 2.204, + "step": 10514 + }, + { + "epoch": 0.12, + "learning_rate": 2.5895584559844488e-05, + "loss": 1.1161, + "step": 10516 + }, + { + "epoch": 0.12, + "learning_rate": 2.5890956215865963e-05, + "loss": 1.7936, + "step": 10518 + }, + { + "epoch": 0.12, + "learning_rate": 2.588632787188744e-05, + "loss": 0.5479, + "step": 10520 + }, + { + "epoch": 0.12, + "learning_rate": 2.5881699527908914e-05, + "loss": 1.3175, + "step": 10522 + }, + { + "epoch": 0.12, + "learning_rate": 2.587707118393039e-05, + "loss": 0.0008, + "step": 10524 + }, + { + "epoch": 0.12, + "learning_rate": 2.5872442839951865e-05, + "loss": 1.1473, + "step": 10526 + }, + { + "epoch": 0.12, + "learning_rate": 2.586781449597334e-05, + "loss": 1.2104, + "step": 10528 + }, + { + "epoch": 0.12, + "learning_rate": 2.5863186151994816e-05, + "loss": 2.3721, + "step": 10530 + }, + { + "epoch": 0.12, + "learning_rate": 2.585855780801629e-05, + "loss": 2.7306, + "step": 10532 + }, + { + "epoch": 0.12, + "learning_rate": 2.5853929464037767e-05, + "loss": 0.8466, + "step": 10534 + }, + { + "epoch": 0.12, + "learning_rate": 2.5849301120059246e-05, + "loss": 3.5906, + "step": 10536 + }, + { + "epoch": 0.12, + "learning_rate": 2.584467277608072e-05, + "loss": 1.8663, + "step": 10538 + }, + { + "epoch": 0.12, + "learning_rate": 2.5840044432102196e-05, + "loss": 0.5272, + "step": 10540 + }, + { + "epoch": 0.12, + "learning_rate": 2.5835416088123672e-05, + "loss": 2.1581, + "step": 10542 + }, + { + "epoch": 0.12, + "learning_rate": 2.5830787744145147e-05, + "loss": 9.88, + "step": 10544 + }, + { + "epoch": 0.12, + "learning_rate": 2.5826159400166623e-05, + "loss": 2.718, + "step": 10546 + }, + { + "epoch": 0.12, + "learning_rate": 2.5821531056188098e-05, + "loss": 5.0854, + "step": 10548 + }, + { + "epoch": 0.12, + "learning_rate": 2.5816902712209574e-05, + "loss": 3.1532, + "step": 10550 + }, + { + "epoch": 0.12, + "learning_rate": 2.581227436823105e-05, + "loss": 0.8753, + "step": 10552 + }, + { + "epoch": 0.12, + "learning_rate": 2.5807646024252524e-05, + "loss": 0.7775, + "step": 10554 + }, + { + "epoch": 0.12, + "learning_rate": 2.5803017680274e-05, + "loss": 4.2073, + "step": 10556 + }, + { + "epoch": 0.12, + "learning_rate": 2.5798389336295475e-05, + "loss": 0.1827, + "step": 10558 + }, + { + "epoch": 0.12, + "learning_rate": 2.579376099231695e-05, + "loss": 4.5917, + "step": 10560 + }, + { + "epoch": 0.12, + "learning_rate": 2.5789132648338426e-05, + "loss": 0.0011, + "step": 10562 + }, + { + "epoch": 0.12, + "learning_rate": 2.57845043043599e-05, + "loss": 1.2392, + "step": 10564 + }, + { + "epoch": 0.12, + "learning_rate": 2.5779875960381377e-05, + "loss": 2.0865, + "step": 10566 + }, + { + "epoch": 0.12, + "learning_rate": 2.5775247616402852e-05, + "loss": 0.3336, + "step": 10568 + }, + { + "epoch": 0.12, + "learning_rate": 2.5770619272424328e-05, + "loss": 1.6361, + "step": 10570 + }, + { + "epoch": 0.12, + "learning_rate": 2.5765990928445803e-05, + "loss": 3.3452, + "step": 10572 + }, + { + "epoch": 0.12, + "learning_rate": 2.576136258446728e-05, + "loss": 1.1774, + "step": 10574 + }, + { + "epoch": 0.12, + "learning_rate": 2.5756734240488754e-05, + "loss": 0.0012, + "step": 10576 + }, + { + "epoch": 0.12, + "learning_rate": 2.5752105896510233e-05, + "loss": 3.2275, + "step": 10578 + }, + { + "epoch": 0.12, + "learning_rate": 2.5747477552531708e-05, + "loss": 1.7193, + "step": 10580 + }, + { + "epoch": 0.12, + "learning_rate": 2.5742849208553184e-05, + "loss": 2.0842, + "step": 10582 + }, + { + "epoch": 0.12, + "learning_rate": 2.573822086457466e-05, + "loss": 0.2502, + "step": 10584 + }, + { + "epoch": 0.12, + "learning_rate": 2.5733592520596135e-05, + "loss": 1.5428, + "step": 10586 + }, + { + "epoch": 0.12, + "learning_rate": 2.572896417661761e-05, + "loss": 0.4105, + "step": 10588 + }, + { + "epoch": 0.12, + "learning_rate": 2.5724335832639085e-05, + "loss": 2.146, + "step": 10590 + }, + { + "epoch": 0.12, + "learning_rate": 2.571970748866056e-05, + "loss": 0.2364, + "step": 10592 + }, + { + "epoch": 0.12, + "learning_rate": 2.5715079144682036e-05, + "loss": 1.5412, + "step": 10594 + }, + { + "epoch": 0.12, + "learning_rate": 2.571045080070351e-05, + "loss": 0.9249, + "step": 10596 + }, + { + "epoch": 0.12, + "learning_rate": 2.5705822456724987e-05, + "loss": 3.7576, + "step": 10598 + }, + { + "epoch": 0.12, + "learning_rate": 2.5701194112746463e-05, + "loss": 4.3339, + "step": 10600 + }, + { + "epoch": 0.12, + "learning_rate": 2.5696565768767938e-05, + "loss": 0.6386, + "step": 10602 + }, + { + "epoch": 0.12, + "learning_rate": 2.5691937424789413e-05, + "loss": 0.2593, + "step": 10604 + }, + { + "epoch": 0.12, + "learning_rate": 2.568730908081089e-05, + "loss": 0.0038, + "step": 10606 + }, + { + "epoch": 0.12, + "learning_rate": 2.5682680736832364e-05, + "loss": 6.044, + "step": 10608 + }, + { + "epoch": 0.12, + "learning_rate": 2.567805239285384e-05, + "loss": 0.8263, + "step": 10610 + }, + { + "epoch": 0.12, + "learning_rate": 2.5673424048875315e-05, + "loss": 3.2659, + "step": 10612 + }, + { + "epoch": 0.12, + "learning_rate": 2.5668795704896787e-05, + "loss": 2.8776, + "step": 10614 + }, + { + "epoch": 0.12, + "learning_rate": 2.5664167360918263e-05, + "loss": 4.6936, + "step": 10616 + }, + { + "epoch": 0.12, + "learning_rate": 2.5659539016939738e-05, + "loss": 1.6824, + "step": 10618 + }, + { + "epoch": 0.12, + "learning_rate": 2.5654910672961213e-05, + "loss": 2.5735, + "step": 10620 + }, + { + "epoch": 0.12, + "learning_rate": 2.565028232898269e-05, + "loss": 4.6955, + "step": 10622 + }, + { + "epoch": 0.12, + "learning_rate": 2.5645653985004164e-05, + "loss": 1.3968, + "step": 10624 + }, + { + "epoch": 0.12, + "learning_rate": 2.564102564102564e-05, + "loss": 2.956, + "step": 10626 + }, + { + "epoch": 0.12, + "learning_rate": 2.5636397297047115e-05, + "loss": 0.8258, + "step": 10628 + }, + { + "epoch": 0.12, + "learning_rate": 2.563176895306859e-05, + "loss": 2.8366, + "step": 10630 + }, + { + "epoch": 0.12, + "learning_rate": 2.5627140609090066e-05, + "loss": 0.2113, + "step": 10632 + }, + { + "epoch": 0.12, + "learning_rate": 2.562251226511154e-05, + "loss": 1.1126, + "step": 10634 + }, + { + "epoch": 0.12, + "learning_rate": 2.5617883921133017e-05, + "loss": 1.4459, + "step": 10636 + }, + { + "epoch": 0.12, + "learning_rate": 2.5613255577154492e-05, + "loss": 0.0003, + "step": 10638 + }, + { + "epoch": 0.12, + "learning_rate": 2.5608627233175968e-05, + "loss": 1.0867, + "step": 10640 + }, + { + "epoch": 0.12, + "learning_rate": 2.5603998889197443e-05, + "loss": 3.8417, + "step": 10642 + }, + { + "epoch": 0.12, + "learning_rate": 2.5599370545218922e-05, + "loss": 0.663, + "step": 10644 + }, + { + "epoch": 0.12, + "learning_rate": 2.5594742201240397e-05, + "loss": 0.0667, + "step": 10646 + }, + { + "epoch": 0.12, + "learning_rate": 2.5590113857261873e-05, + "loss": 1.3336, + "step": 10648 + }, + { + "epoch": 0.12, + "learning_rate": 2.5585485513283348e-05, + "loss": 1.7983, + "step": 10650 + }, + { + "epoch": 0.12, + "learning_rate": 2.5580857169304824e-05, + "loss": 0.2013, + "step": 10652 + }, + { + "epoch": 0.12, + "learning_rate": 2.55762288253263e-05, + "loss": 2.7805, + "step": 10654 + }, + { + "epoch": 0.12, + "learning_rate": 2.5571600481347774e-05, + "loss": 0.8984, + "step": 10656 + }, + { + "epoch": 0.12, + "learning_rate": 2.556697213736925e-05, + "loss": 0.6232, + "step": 10658 + }, + { + "epoch": 0.12, + "learning_rate": 2.5562343793390725e-05, + "loss": 0.9569, + "step": 10660 + }, + { + "epoch": 0.12, + "learning_rate": 2.55577154494122e-05, + "loss": 0.1934, + "step": 10662 + }, + { + "epoch": 0.12, + "learning_rate": 2.5553087105433676e-05, + "loss": 1.8113, + "step": 10664 + }, + { + "epoch": 0.12, + "learning_rate": 2.554845876145515e-05, + "loss": 1.0031, + "step": 10666 + }, + { + "epoch": 0.12, + "learning_rate": 2.5543830417476627e-05, + "loss": 2.691, + "step": 10668 + }, + { + "epoch": 0.12, + "learning_rate": 2.5539202073498102e-05, + "loss": 2.0045, + "step": 10670 + }, + { + "epoch": 0.12, + "learning_rate": 2.5534573729519578e-05, + "loss": 0.1695, + "step": 10672 + }, + { + "epoch": 0.12, + "learning_rate": 2.5529945385541053e-05, + "loss": 0.9666, + "step": 10674 + }, + { + "epoch": 0.12, + "learning_rate": 2.552531704156253e-05, + "loss": 1.0635, + "step": 10676 + }, + { + "epoch": 0.12, + "learning_rate": 2.5520688697584004e-05, + "loss": 1.9105, + "step": 10678 + }, + { + "epoch": 0.12, + "learning_rate": 2.551606035360548e-05, + "loss": 5.6755, + "step": 10680 + }, + { + "epoch": 0.12, + "learning_rate": 2.5511432009626955e-05, + "loss": 0.7376, + "step": 10682 + }, + { + "epoch": 0.12, + "learning_rate": 2.5506803665648434e-05, + "loss": 8.4785, + "step": 10684 + }, + { + "epoch": 0.12, + "learning_rate": 2.550217532166991e-05, + "loss": 1.5228, + "step": 10686 + }, + { + "epoch": 0.12, + "learning_rate": 2.5497546977691385e-05, + "loss": 2.5081, + "step": 10688 + }, + { + "epoch": 0.12, + "learning_rate": 2.549291863371286e-05, + "loss": 3.8573, + "step": 10690 + }, + { + "epoch": 0.12, + "learning_rate": 2.5488290289734335e-05, + "loss": 0.9465, + "step": 10692 + }, + { + "epoch": 0.12, + "learning_rate": 2.548366194575581e-05, + "loss": 0.3552, + "step": 10694 + }, + { + "epoch": 0.12, + "learning_rate": 2.5479033601777286e-05, + "loss": 2.617, + "step": 10696 + }, + { + "epoch": 0.12, + "learning_rate": 2.5474405257798762e-05, + "loss": 2.9995, + "step": 10698 + }, + { + "epoch": 0.12, + "learning_rate": 2.5469776913820237e-05, + "loss": 1.2429, + "step": 10700 + }, + { + "epoch": 0.12, + "learning_rate": 2.5465148569841713e-05, + "loss": 1.1822, + "step": 10702 + }, + { + "epoch": 0.12, + "learning_rate": 2.5460520225863188e-05, + "loss": 0.129, + "step": 10704 + }, + { + "epoch": 0.12, + "learning_rate": 2.5455891881884663e-05, + "loss": 2.7891, + "step": 10706 + }, + { + "epoch": 0.12, + "learning_rate": 2.545126353790614e-05, + "loss": 1.9173, + "step": 10708 + }, + { + "epoch": 0.12, + "learning_rate": 2.5446635193927614e-05, + "loss": 0.3199, + "step": 10710 + }, + { + "epoch": 0.12, + "learning_rate": 2.544200684994909e-05, + "loss": 0.6595, + "step": 10712 + }, + { + "epoch": 0.12, + "learning_rate": 2.5437378505970565e-05, + "loss": 0.3747, + "step": 10714 + }, + { + "epoch": 0.12, + "learning_rate": 2.543275016199204e-05, + "loss": 0.0018, + "step": 10716 + }, + { + "epoch": 0.12, + "learning_rate": 2.5428121818013516e-05, + "loss": 1.6034, + "step": 10718 + }, + { + "epoch": 0.12, + "learning_rate": 2.542349347403499e-05, + "loss": 0.9929, + "step": 10720 + }, + { + "epoch": 0.12, + "learning_rate": 2.5418865130056467e-05, + "loss": 2.8125, + "step": 10722 + }, + { + "epoch": 0.12, + "learning_rate": 2.5414236786077946e-05, + "loss": 1.8049, + "step": 10724 + }, + { + "epoch": 0.12, + "learning_rate": 2.540960844209942e-05, + "loss": 1.3999, + "step": 10726 + }, + { + "epoch": 0.12, + "learning_rate": 2.5404980098120896e-05, + "loss": 2.3082, + "step": 10728 + }, + { + "epoch": 0.12, + "learning_rate": 2.5400351754142372e-05, + "loss": 4.2842, + "step": 10730 + }, + { + "epoch": 0.12, + "learning_rate": 2.5395723410163847e-05, + "loss": 2.0197, + "step": 10732 + }, + { + "epoch": 0.12, + "learning_rate": 2.5391095066185323e-05, + "loss": 5.4691, + "step": 10734 + }, + { + "epoch": 0.12, + "learning_rate": 2.5386466722206798e-05, + "loss": 0.0421, + "step": 10736 + }, + { + "epoch": 0.12, + "learning_rate": 2.5381838378228274e-05, + "loss": 4.4251, + "step": 10738 + }, + { + "epoch": 0.12, + "learning_rate": 2.537721003424975e-05, + "loss": 0.1543, + "step": 10740 + }, + { + "epoch": 0.12, + "learning_rate": 2.5372581690271224e-05, + "loss": 1.2435, + "step": 10742 + }, + { + "epoch": 0.12, + "learning_rate": 2.53679533462927e-05, + "loss": 0.1654, + "step": 10744 + }, + { + "epoch": 0.12, + "learning_rate": 2.5363325002314175e-05, + "loss": 2.1057, + "step": 10746 + }, + { + "epoch": 0.12, + "learning_rate": 2.535869665833565e-05, + "loss": 2.0205, + "step": 10748 + }, + { + "epoch": 0.12, + "learning_rate": 2.5354068314357126e-05, + "loss": 0.0004, + "step": 10750 + }, + { + "epoch": 0.12, + "learning_rate": 2.53494399703786e-05, + "loss": 0.5455, + "step": 10752 + }, + { + "epoch": 0.12, + "learning_rate": 2.5344811626400077e-05, + "loss": 0.6389, + "step": 10754 + }, + { + "epoch": 0.12, + "learning_rate": 2.5340183282421552e-05, + "loss": 3.7369, + "step": 10756 + }, + { + "epoch": 0.12, + "learning_rate": 2.5335554938443024e-05, + "loss": 0.0049, + "step": 10758 + }, + { + "epoch": 0.12, + "learning_rate": 2.53309265944645e-05, + "loss": 0.7895, + "step": 10760 + }, + { + "epoch": 0.12, + "learning_rate": 2.5326298250485975e-05, + "loss": 2.4411, + "step": 10762 + }, + { + "epoch": 0.12, + "learning_rate": 2.532166990650745e-05, + "loss": 1.3168, + "step": 10764 + }, + { + "epoch": 0.12, + "learning_rate": 2.5317041562528926e-05, + "loss": 0.2204, + "step": 10766 + }, + { + "epoch": 0.12, + "learning_rate": 2.53124132185504e-05, + "loss": 1.9499, + "step": 10768 + }, + { + "epoch": 0.12, + "learning_rate": 2.5307784874571877e-05, + "loss": 7.521, + "step": 10770 + }, + { + "epoch": 0.12, + "learning_rate": 2.5303156530593352e-05, + "loss": 0.0206, + "step": 10772 + }, + { + "epoch": 0.12, + "learning_rate": 2.5298528186614828e-05, + "loss": 6.26, + "step": 10774 + }, + { + "epoch": 0.12, + "learning_rate": 2.5293899842636303e-05, + "loss": 0.2263, + "step": 10776 + }, + { + "epoch": 0.12, + "learning_rate": 2.528927149865778e-05, + "loss": 1.5064, + "step": 10778 + }, + { + "epoch": 0.12, + "learning_rate": 2.5284643154679254e-05, + "loss": 6.4635, + "step": 10780 + }, + { + "epoch": 0.12, + "learning_rate": 2.528001481070073e-05, + "loss": 0.8544, + "step": 10782 + }, + { + "epoch": 0.12, + "learning_rate": 2.5275386466722205e-05, + "loss": 1.4406, + "step": 10784 + }, + { + "epoch": 0.12, + "learning_rate": 2.527075812274368e-05, + "loss": 1.1079, + "step": 10786 + }, + { + "epoch": 0.12, + "learning_rate": 2.5266129778765156e-05, + "loss": 0.0105, + "step": 10788 + }, + { + "epoch": 0.12, + "learning_rate": 2.5261501434786635e-05, + "loss": 0.3791, + "step": 10790 + }, + { + "epoch": 0.12, + "learning_rate": 2.525687309080811e-05, + "loss": 0.1399, + "step": 10792 + }, + { + "epoch": 0.12, + "learning_rate": 2.5252244746829585e-05, + "loss": 5.3318, + "step": 10794 + }, + { + "epoch": 0.12, + "learning_rate": 2.524761640285106e-05, + "loss": 1.1062, + "step": 10796 + }, + { + "epoch": 0.12, + "learning_rate": 2.5242988058872536e-05, + "loss": 3.1185, + "step": 10798 + }, + { + "epoch": 0.12, + "learning_rate": 2.5238359714894012e-05, + "loss": 0.7466, + "step": 10800 + }, + { + "epoch": 0.12, + "learning_rate": 2.5233731370915487e-05, + "loss": 3.2531, + "step": 10802 + }, + { + "epoch": 0.12, + "learning_rate": 2.5229103026936963e-05, + "loss": 5.683, + "step": 10804 + }, + { + "epoch": 0.12, + "learning_rate": 2.5224474682958438e-05, + "loss": 1.8788, + "step": 10806 + }, + { + "epoch": 0.12, + "learning_rate": 2.5219846338979913e-05, + "loss": 4.1938, + "step": 10808 + }, + { + "epoch": 0.12, + "learning_rate": 2.521521799500139e-05, + "loss": 0.6317, + "step": 10810 + }, + { + "epoch": 0.12, + "learning_rate": 2.5210589651022864e-05, + "loss": 0.6382, + "step": 10812 + }, + { + "epoch": 0.12, + "learning_rate": 2.520596130704434e-05, + "loss": 1.8167, + "step": 10814 + }, + { + "epoch": 0.12, + "learning_rate": 2.5201332963065815e-05, + "loss": 3.7617, + "step": 10816 + }, + { + "epoch": 0.12, + "learning_rate": 2.519670461908729e-05, + "loss": 0.4565, + "step": 10818 + }, + { + "epoch": 0.12, + "learning_rate": 2.5192076275108766e-05, + "loss": 8.7492, + "step": 10820 + }, + { + "epoch": 0.12, + "learning_rate": 2.518744793113024e-05, + "loss": 4.4655, + "step": 10822 + }, + { + "epoch": 0.12, + "learning_rate": 2.5182819587151717e-05, + "loss": 2.2671, + "step": 10824 + }, + { + "epoch": 0.12, + "learning_rate": 2.5178191243173192e-05, + "loss": 5.5665, + "step": 10826 + }, + { + "epoch": 0.12, + "learning_rate": 2.5173562899194668e-05, + "loss": 1.1654, + "step": 10828 + }, + { + "epoch": 0.12, + "learning_rate": 2.5168934555216147e-05, + "loss": 1.8174, + "step": 10830 + }, + { + "epoch": 0.12, + "learning_rate": 2.5164306211237622e-05, + "loss": 2.59, + "step": 10832 + }, + { + "epoch": 0.12, + "learning_rate": 2.5159677867259097e-05, + "loss": 1.6071, + "step": 10834 + }, + { + "epoch": 0.12, + "learning_rate": 2.5155049523280573e-05, + "loss": 0.6106, + "step": 10836 + }, + { + "epoch": 0.12, + "learning_rate": 2.5150421179302048e-05, + "loss": 4.189, + "step": 10838 + }, + { + "epoch": 0.12, + "learning_rate": 2.5145792835323524e-05, + "loss": 2.3909, + "step": 10840 + }, + { + "epoch": 0.12, + "learning_rate": 2.5141164491345e-05, + "loss": 0.5402, + "step": 10842 + }, + { + "epoch": 0.12, + "learning_rate": 2.5136536147366474e-05, + "loss": 1.583, + "step": 10844 + }, + { + "epoch": 0.12, + "learning_rate": 2.513190780338795e-05, + "loss": 1.6838, + "step": 10846 + }, + { + "epoch": 0.12, + "learning_rate": 2.5127279459409425e-05, + "loss": 0.6073, + "step": 10848 + }, + { + "epoch": 0.12, + "learning_rate": 2.51226511154309e-05, + "loss": 0.9323, + "step": 10850 + }, + { + "epoch": 0.12, + "learning_rate": 2.5118022771452376e-05, + "loss": 1.0693, + "step": 10852 + }, + { + "epoch": 0.13, + "learning_rate": 2.511339442747385e-05, + "loss": 3.8696, + "step": 10854 + }, + { + "epoch": 0.13, + "learning_rate": 2.5108766083495327e-05, + "loss": 0.0855, + "step": 10856 + }, + { + "epoch": 0.13, + "learning_rate": 2.5104137739516802e-05, + "loss": 1.5782, + "step": 10858 + }, + { + "epoch": 0.13, + "learning_rate": 2.5099509395538278e-05, + "loss": 0.5446, + "step": 10860 + }, + { + "epoch": 0.13, + "learning_rate": 2.5094881051559753e-05, + "loss": 1.9495, + "step": 10862 + }, + { + "epoch": 0.13, + "learning_rate": 2.509025270758123e-05, + "loss": 1.6402, + "step": 10864 + }, + { + "epoch": 0.13, + "learning_rate": 2.5085624363602704e-05, + "loss": 1.0244, + "step": 10866 + }, + { + "epoch": 0.13, + "learning_rate": 2.508099601962418e-05, + "loss": 2.1611, + "step": 10868 + }, + { + "epoch": 0.13, + "learning_rate": 2.507636767564566e-05, + "loss": 0.9849, + "step": 10870 + }, + { + "epoch": 0.13, + "learning_rate": 2.5071739331667134e-05, + "loss": 4.6912, + "step": 10872 + }, + { + "epoch": 0.13, + "learning_rate": 2.506711098768861e-05, + "loss": 0.0966, + "step": 10874 + }, + { + "epoch": 0.13, + "learning_rate": 2.5062482643710085e-05, + "loss": 0.6236, + "step": 10876 + }, + { + "epoch": 0.13, + "learning_rate": 2.505785429973156e-05, + "loss": 0.0915, + "step": 10878 + }, + { + "epoch": 0.13, + "learning_rate": 2.5053225955753036e-05, + "loss": 0.0418, + "step": 10880 + }, + { + "epoch": 0.13, + "learning_rate": 2.504859761177451e-05, + "loss": 4.8764, + "step": 10882 + }, + { + "epoch": 0.13, + "learning_rate": 2.5043969267795986e-05, + "loss": 0.0274, + "step": 10884 + }, + { + "epoch": 0.13, + "learning_rate": 2.5039340923817462e-05, + "loss": 0.9711, + "step": 10886 + }, + { + "epoch": 0.13, + "learning_rate": 2.5034712579838937e-05, + "loss": 3.4981, + "step": 10888 + }, + { + "epoch": 0.13, + "learning_rate": 2.5030084235860413e-05, + "loss": 4.7794, + "step": 10890 + }, + { + "epoch": 0.13, + "learning_rate": 2.5025455891881888e-05, + "loss": 2.7099, + "step": 10892 + }, + { + "epoch": 0.13, + "learning_rate": 2.5020827547903363e-05, + "loss": 8.029, + "step": 10894 + }, + { + "epoch": 0.13, + "learning_rate": 2.501619920392484e-05, + "loss": 0.4904, + "step": 10896 + }, + { + "epoch": 0.13, + "learning_rate": 2.5011570859946314e-05, + "loss": 1.0337, + "step": 10898 + }, + { + "epoch": 0.13, + "learning_rate": 2.500694251596779e-05, + "loss": 0.5419, + "step": 10900 + }, + { + "epoch": 0.13, + "learning_rate": 2.5002314171989265e-05, + "loss": 0.8179, + "step": 10902 + }, + { + "epoch": 0.13, + "learning_rate": 2.499768582801074e-05, + "loss": 4.9519, + "step": 10904 + }, + { + "epoch": 0.13, + "learning_rate": 2.4993057484032216e-05, + "loss": 5.257, + "step": 10906 + }, + { + "epoch": 0.13, + "learning_rate": 2.498842914005369e-05, + "loss": 1.3346, + "step": 10908 + }, + { + "epoch": 0.13, + "learning_rate": 2.4983800796075167e-05, + "loss": 1.2401, + "step": 10910 + }, + { + "epoch": 0.13, + "learning_rate": 2.4979172452096642e-05, + "loss": 0.4535, + "step": 10912 + }, + { + "epoch": 0.13, + "learning_rate": 2.4974544108118118e-05, + "loss": 4.1053, + "step": 10914 + }, + { + "epoch": 0.13, + "learning_rate": 2.4969915764139593e-05, + "loss": 0.9084, + "step": 10916 + }, + { + "epoch": 0.13, + "learning_rate": 2.496528742016107e-05, + "loss": 3.7412, + "step": 10918 + }, + { + "epoch": 0.13, + "learning_rate": 2.4960659076182544e-05, + "loss": 1.8607, + "step": 10920 + }, + { + "epoch": 0.13, + "learning_rate": 2.495603073220402e-05, + "loss": 1.4363, + "step": 10922 + }, + { + "epoch": 0.13, + "learning_rate": 2.4951402388225495e-05, + "loss": 2.905, + "step": 10924 + }, + { + "epoch": 0.13, + "learning_rate": 2.494677404424697e-05, + "loss": 0.9201, + "step": 10926 + }, + { + "epoch": 0.13, + "learning_rate": 2.4942145700268446e-05, + "loss": 0.1906, + "step": 10928 + }, + { + "epoch": 0.13, + "learning_rate": 2.493751735628992e-05, + "loss": 0.7125, + "step": 10930 + }, + { + "epoch": 0.13, + "learning_rate": 2.4932889012311397e-05, + "loss": 3.6161, + "step": 10932 + }, + { + "epoch": 0.13, + "learning_rate": 2.4928260668332872e-05, + "loss": 2.9353, + "step": 10934 + }, + { + "epoch": 0.13, + "learning_rate": 2.4923632324354347e-05, + "loss": 3.4616, + "step": 10936 + }, + { + "epoch": 0.13, + "learning_rate": 2.4919003980375823e-05, + "loss": 0.2123, + "step": 10938 + }, + { + "epoch": 0.13, + "learning_rate": 2.4914375636397298e-05, + "loss": 0.8262, + "step": 10940 + }, + { + "epoch": 0.13, + "learning_rate": 2.4909747292418774e-05, + "loss": 0.2252, + "step": 10942 + }, + { + "epoch": 0.13, + "learning_rate": 2.490511894844025e-05, + "loss": 0.0018, + "step": 10944 + }, + { + "epoch": 0.13, + "learning_rate": 2.4900490604461725e-05, + "loss": 4.9001, + "step": 10946 + }, + { + "epoch": 0.13, + "learning_rate": 2.48958622604832e-05, + "loss": 7.4127, + "step": 10948 + }, + { + "epoch": 0.13, + "learning_rate": 2.4891233916504675e-05, + "loss": 1.472, + "step": 10950 + }, + { + "epoch": 0.13, + "learning_rate": 2.488660557252615e-05, + "loss": 5.6679, + "step": 10952 + }, + { + "epoch": 0.13, + "learning_rate": 2.4881977228547626e-05, + "loss": 3.4975, + "step": 10954 + }, + { + "epoch": 0.13, + "learning_rate": 2.48773488845691e-05, + "loss": 1.5239, + "step": 10956 + }, + { + "epoch": 0.13, + "learning_rate": 2.4872720540590577e-05, + "loss": 1.6431, + "step": 10958 + }, + { + "epoch": 0.13, + "learning_rate": 2.4868092196612052e-05, + "loss": 5.719, + "step": 10960 + }, + { + "epoch": 0.13, + "learning_rate": 2.4863463852633528e-05, + "loss": 0.79, + "step": 10962 + }, + { + "epoch": 0.13, + "learning_rate": 2.4858835508655003e-05, + "loss": 1.7244, + "step": 10964 + }, + { + "epoch": 0.13, + "learning_rate": 2.485420716467648e-05, + "loss": 3.345, + "step": 10966 + }, + { + "epoch": 0.13, + "learning_rate": 2.4849578820697954e-05, + "loss": 3.5074, + "step": 10968 + }, + { + "epoch": 0.13, + "learning_rate": 2.484495047671943e-05, + "loss": 0.6708, + "step": 10970 + }, + { + "epoch": 0.13, + "learning_rate": 2.4840322132740905e-05, + "loss": 0.4075, + "step": 10972 + }, + { + "epoch": 0.13, + "learning_rate": 2.483569378876238e-05, + "loss": 0.5238, + "step": 10974 + }, + { + "epoch": 0.13, + "learning_rate": 2.4831065444783856e-05, + "loss": 0.636, + "step": 10976 + }, + { + "epoch": 0.13, + "learning_rate": 2.4826437100805335e-05, + "loss": 4.4648, + "step": 10978 + }, + { + "epoch": 0.13, + "learning_rate": 2.482180875682681e-05, + "loss": 7.7705, + "step": 10980 + }, + { + "epoch": 0.13, + "learning_rate": 2.4817180412848286e-05, + "loss": 1.9336, + "step": 10982 + }, + { + "epoch": 0.13, + "learning_rate": 2.481255206886976e-05, + "loss": 2.8995, + "step": 10984 + }, + { + "epoch": 0.13, + "learning_rate": 2.4807923724891236e-05, + "loss": 0.4243, + "step": 10986 + }, + { + "epoch": 0.13, + "learning_rate": 2.4803295380912712e-05, + "loss": 0.1339, + "step": 10988 + }, + { + "epoch": 0.13, + "learning_rate": 2.4798667036934187e-05, + "loss": 0.4538, + "step": 10990 + }, + { + "epoch": 0.13, + "learning_rate": 2.4794038692955663e-05, + "loss": 1.1609, + "step": 10992 + }, + { + "epoch": 0.13, + "learning_rate": 2.4789410348977138e-05, + "loss": 1.7119, + "step": 10994 + }, + { + "epoch": 0.13, + "learning_rate": 2.4784782004998614e-05, + "loss": 2.3233, + "step": 10996 + }, + { + "epoch": 0.13, + "learning_rate": 2.478015366102009e-05, + "loss": 1.4601, + "step": 10998 + }, + { + "epoch": 0.13, + "learning_rate": 2.4775525317041564e-05, + "loss": 0.1126, + "step": 11000 + }, + { + "epoch": 0.13, + "learning_rate": 2.477089697306304e-05, + "loss": 1.3814, + "step": 11002 + }, + { + "epoch": 0.13, + "learning_rate": 2.4766268629084515e-05, + "loss": 3.9005, + "step": 11004 + }, + { + "epoch": 0.13, + "learning_rate": 2.476164028510599e-05, + "loss": 1.4116, + "step": 11006 + }, + { + "epoch": 0.13, + "learning_rate": 2.4757011941127466e-05, + "loss": 0.0004, + "step": 11008 + }, + { + "epoch": 0.13, + "learning_rate": 2.475238359714894e-05, + "loss": 1.5694, + "step": 11010 + }, + { + "epoch": 0.13, + "learning_rate": 2.4747755253170417e-05, + "loss": 1.1881, + "step": 11012 + }, + { + "epoch": 0.13, + "learning_rate": 2.4743126909191892e-05, + "loss": 1.2821, + "step": 11014 + }, + { + "epoch": 0.13, + "learning_rate": 2.4738498565213368e-05, + "loss": 3.0448, + "step": 11016 + }, + { + "epoch": 0.13, + "learning_rate": 2.4733870221234843e-05, + "loss": 3.5375, + "step": 11018 + }, + { + "epoch": 0.13, + "learning_rate": 2.472924187725632e-05, + "loss": 1.6291, + "step": 11020 + }, + { + "epoch": 0.13, + "learning_rate": 2.4724613533277794e-05, + "loss": 3.6019, + "step": 11022 + }, + { + "epoch": 0.13, + "learning_rate": 2.471998518929927e-05, + "loss": 4.73, + "step": 11024 + }, + { + "epoch": 0.13, + "learning_rate": 2.4715356845320745e-05, + "loss": 0.4036, + "step": 11026 + }, + { + "epoch": 0.13, + "learning_rate": 2.471072850134222e-05, + "loss": 0.3473, + "step": 11028 + }, + { + "epoch": 0.13, + "learning_rate": 2.4706100157363696e-05, + "loss": 2.2996, + "step": 11030 + }, + { + "epoch": 0.13, + "learning_rate": 2.470147181338517e-05, + "loss": 2.5647, + "step": 11032 + }, + { + "epoch": 0.13, + "learning_rate": 2.4696843469406647e-05, + "loss": 1.5384, + "step": 11034 + }, + { + "epoch": 0.13, + "learning_rate": 2.4692215125428122e-05, + "loss": 0.062, + "step": 11036 + }, + { + "epoch": 0.13, + "learning_rate": 2.4687586781449597e-05, + "loss": 0.0008, + "step": 11038 + }, + { + "epoch": 0.13, + "learning_rate": 2.4682958437471073e-05, + "loss": 1.1076, + "step": 11040 + }, + { + "epoch": 0.13, + "learning_rate": 2.4678330093492548e-05, + "loss": 5.0152, + "step": 11042 + }, + { + "epoch": 0.13, + "learning_rate": 2.4673701749514024e-05, + "loss": 3.4418, + "step": 11044 + }, + { + "epoch": 0.13, + "learning_rate": 2.46690734055355e-05, + "loss": 2.503, + "step": 11046 + }, + { + "epoch": 0.13, + "learning_rate": 2.4664445061556975e-05, + "loss": 1.4055, + "step": 11048 + }, + { + "epoch": 0.13, + "learning_rate": 2.4659816717578453e-05, + "loss": 6.9918, + "step": 11050 + }, + { + "epoch": 0.13, + "learning_rate": 2.465518837359993e-05, + "loss": 1.0645, + "step": 11052 + }, + { + "epoch": 0.13, + "learning_rate": 2.4650560029621404e-05, + "loss": 0.4999, + "step": 11054 + }, + { + "epoch": 0.13, + "learning_rate": 2.464593168564288e-05, + "loss": 0.0068, + "step": 11056 + }, + { + "epoch": 0.13, + "learning_rate": 2.4641303341664355e-05, + "loss": 1.9184, + "step": 11058 + }, + { + "epoch": 0.13, + "learning_rate": 2.463667499768583e-05, + "loss": 5.7846, + "step": 11060 + }, + { + "epoch": 0.13, + "learning_rate": 2.4632046653707306e-05, + "loss": 1.9106, + "step": 11062 + }, + { + "epoch": 0.13, + "learning_rate": 2.462741830972878e-05, + "loss": 0.9288, + "step": 11064 + }, + { + "epoch": 0.13, + "learning_rate": 2.4622789965750257e-05, + "loss": 0.2789, + "step": 11066 + }, + { + "epoch": 0.13, + "learning_rate": 2.4618161621771732e-05, + "loss": 1.1125, + "step": 11068 + }, + { + "epoch": 0.13, + "learning_rate": 2.4613533277793208e-05, + "loss": 3.0751, + "step": 11070 + }, + { + "epoch": 0.13, + "learning_rate": 2.4608904933814683e-05, + "loss": 0.3909, + "step": 11072 + }, + { + "epoch": 0.13, + "learning_rate": 2.460427658983616e-05, + "loss": 2.9488, + "step": 11074 + }, + { + "epoch": 0.13, + "learning_rate": 2.4599648245857634e-05, + "loss": 2.9328, + "step": 11076 + }, + { + "epoch": 0.13, + "learning_rate": 2.459501990187911e-05, + "loss": 4.6936, + "step": 11078 + }, + { + "epoch": 0.13, + "learning_rate": 2.4590391557900585e-05, + "loss": 1.6501, + "step": 11080 + }, + { + "epoch": 0.13, + "learning_rate": 2.458576321392206e-05, + "loss": 1.4187, + "step": 11082 + }, + { + "epoch": 0.13, + "learning_rate": 2.4581134869943536e-05, + "loss": 2.6703, + "step": 11084 + }, + { + "epoch": 0.13, + "learning_rate": 2.457650652596501e-05, + "loss": 0.3244, + "step": 11086 + }, + { + "epoch": 0.13, + "learning_rate": 2.4571878181986486e-05, + "loss": 4.3483, + "step": 11088 + }, + { + "epoch": 0.13, + "learning_rate": 2.4567249838007962e-05, + "loss": 1.9387, + "step": 11090 + }, + { + "epoch": 0.13, + "learning_rate": 2.4562621494029437e-05, + "loss": 2.4125, + "step": 11092 + }, + { + "epoch": 0.13, + "learning_rate": 2.4557993150050913e-05, + "loss": 1.9836, + "step": 11094 + }, + { + "epoch": 0.13, + "learning_rate": 2.4553364806072388e-05, + "loss": 1.4088, + "step": 11096 + }, + { + "epoch": 0.13, + "learning_rate": 2.4548736462093864e-05, + "loss": 0.1005, + "step": 11098 + }, + { + "epoch": 0.13, + "learning_rate": 2.454410811811534e-05, + "loss": 3.8122, + "step": 11100 + }, + { + "epoch": 0.13, + "learning_rate": 2.4539479774136814e-05, + "loss": 1.5739, + "step": 11102 + }, + { + "epoch": 0.13, + "learning_rate": 2.453485143015829e-05, + "loss": 4.0685, + "step": 11104 + }, + { + "epoch": 0.13, + "learning_rate": 2.4530223086179765e-05, + "loss": 0.0224, + "step": 11106 + }, + { + "epoch": 0.13, + "learning_rate": 2.452559474220124e-05, + "loss": 4.1641, + "step": 11108 + }, + { + "epoch": 0.13, + "learning_rate": 2.4520966398222716e-05, + "loss": 1.3253, + "step": 11110 + }, + { + "epoch": 0.13, + "learning_rate": 2.451633805424419e-05, + "loss": 6.5043, + "step": 11112 + }, + { + "epoch": 0.13, + "learning_rate": 2.4511709710265667e-05, + "loss": 0.9125, + "step": 11114 + }, + { + "epoch": 0.13, + "learning_rate": 2.4507081366287142e-05, + "loss": 3.5488, + "step": 11116 + }, + { + "epoch": 0.13, + "learning_rate": 2.4502453022308618e-05, + "loss": 0.7689, + "step": 11118 + }, + { + "epoch": 0.13, + "learning_rate": 2.4497824678330093e-05, + "loss": 1.1446, + "step": 11120 + }, + { + "epoch": 0.13, + "learning_rate": 2.449319633435157e-05, + "loss": 9.1204, + "step": 11122 + }, + { + "epoch": 0.13, + "learning_rate": 2.4488567990373047e-05, + "loss": 1.1954, + "step": 11124 + }, + { + "epoch": 0.13, + "learning_rate": 2.4483939646394523e-05, + "loss": 0.4774, + "step": 11126 + }, + { + "epoch": 0.13, + "learning_rate": 2.4479311302416e-05, + "loss": 1.1023, + "step": 11128 + }, + { + "epoch": 0.13, + "learning_rate": 2.4474682958437474e-05, + "loss": 2.4272, + "step": 11130 + }, + { + "epoch": 0.13, + "learning_rate": 2.447005461445895e-05, + "loss": 1.8325, + "step": 11132 + }, + { + "epoch": 0.13, + "learning_rate": 2.4465426270480425e-05, + "loss": 3.4507, + "step": 11134 + }, + { + "epoch": 0.13, + "learning_rate": 2.44607979265019e-05, + "loss": 0.3498, + "step": 11136 + }, + { + "epoch": 0.13, + "learning_rate": 2.4456169582523375e-05, + "loss": 1.2935, + "step": 11138 + }, + { + "epoch": 0.13, + "learning_rate": 2.445154123854485e-05, + "loss": 0.4234, + "step": 11140 + }, + { + "epoch": 0.13, + "learning_rate": 2.4446912894566326e-05, + "loss": 3.0161, + "step": 11142 + }, + { + "epoch": 0.13, + "learning_rate": 2.4442284550587802e-05, + "loss": 2.6918, + "step": 11144 + }, + { + "epoch": 0.13, + "learning_rate": 2.4437656206609277e-05, + "loss": 0.7827, + "step": 11146 + }, + { + "epoch": 0.13, + "learning_rate": 2.4433027862630753e-05, + "loss": 1.3444, + "step": 11148 + }, + { + "epoch": 0.13, + "learning_rate": 2.4428399518652228e-05, + "loss": 1.5864, + "step": 11150 + }, + { + "epoch": 0.13, + "learning_rate": 2.4423771174673703e-05, + "loss": 1.1552, + "step": 11152 + }, + { + "epoch": 0.13, + "learning_rate": 2.441914283069518e-05, + "loss": 3.0909, + "step": 11154 + }, + { + "epoch": 0.13, + "learning_rate": 2.4414514486716654e-05, + "loss": 1.6787, + "step": 11156 + }, + { + "epoch": 0.13, + "learning_rate": 2.440988614273813e-05, + "loss": 1.4011, + "step": 11158 + }, + { + "epoch": 0.13, + "learning_rate": 2.4405257798759605e-05, + "loss": 0.4605, + "step": 11160 + }, + { + "epoch": 0.13, + "learning_rate": 2.440062945478108e-05, + "loss": 1.6797, + "step": 11162 + }, + { + "epoch": 0.13, + "learning_rate": 2.4396001110802556e-05, + "loss": 3.6558, + "step": 11164 + }, + { + "epoch": 0.13, + "learning_rate": 2.439137276682403e-05, + "loss": 0.7617, + "step": 11166 + }, + { + "epoch": 0.13, + "learning_rate": 2.4386744422845507e-05, + "loss": 2.3279, + "step": 11168 + }, + { + "epoch": 0.13, + "learning_rate": 2.4382116078866982e-05, + "loss": 1.2202, + "step": 11170 + }, + { + "epoch": 0.13, + "learning_rate": 2.4377487734888458e-05, + "loss": 5.5606, + "step": 11172 + }, + { + "epoch": 0.13, + "learning_rate": 2.4372859390909933e-05, + "loss": 1.1671, + "step": 11174 + }, + { + "epoch": 0.13, + "learning_rate": 2.436823104693141e-05, + "loss": 1.4214, + "step": 11176 + }, + { + "epoch": 0.13, + "learning_rate": 2.4363602702952884e-05, + "loss": 0.0108, + "step": 11178 + }, + { + "epoch": 0.13, + "learning_rate": 2.435897435897436e-05, + "loss": 0.0096, + "step": 11180 + }, + { + "epoch": 0.13, + "learning_rate": 2.4354346014995835e-05, + "loss": 9.1698, + "step": 11182 + }, + { + "epoch": 0.13, + "learning_rate": 2.434971767101731e-05, + "loss": 3.0189, + "step": 11184 + }, + { + "epoch": 0.13, + "learning_rate": 2.4345089327038786e-05, + "loss": 2.0515, + "step": 11186 + }, + { + "epoch": 0.13, + "learning_rate": 2.434046098306026e-05, + "loss": 0.8165, + "step": 11188 + }, + { + "epoch": 0.13, + "learning_rate": 2.4335832639081736e-05, + "loss": 2.0105, + "step": 11190 + }, + { + "epoch": 0.13, + "learning_rate": 2.4331204295103212e-05, + "loss": 2.287, + "step": 11192 + }, + { + "epoch": 0.13, + "learning_rate": 2.4326575951124687e-05, + "loss": 4.134, + "step": 11194 + }, + { + "epoch": 0.13, + "learning_rate": 2.4321947607146163e-05, + "loss": 1.9106, + "step": 11196 + }, + { + "epoch": 0.13, + "learning_rate": 2.431731926316764e-05, + "loss": 3.4879, + "step": 11198 + }, + { + "epoch": 0.13, + "learning_rate": 2.4312690919189117e-05, + "loss": 4.2655, + "step": 11200 + }, + { + "epoch": 0.13, + "learning_rate": 2.4308062575210592e-05, + "loss": 1.7051, + "step": 11202 + }, + { + "epoch": 0.13, + "learning_rate": 2.4303434231232068e-05, + "loss": 0.6493, + "step": 11204 + }, + { + "epoch": 0.13, + "learning_rate": 2.4298805887253543e-05, + "loss": 0.2021, + "step": 11206 + }, + { + "epoch": 0.13, + "learning_rate": 2.429417754327502e-05, + "loss": 1.7346, + "step": 11208 + }, + { + "epoch": 0.13, + "learning_rate": 2.4289549199296494e-05, + "loss": 0.7857, + "step": 11210 + }, + { + "epoch": 0.13, + "learning_rate": 2.428492085531797e-05, + "loss": 2.1978, + "step": 11212 + }, + { + "epoch": 0.13, + "learning_rate": 2.4280292511339445e-05, + "loss": 1.2276, + "step": 11214 + }, + { + "epoch": 0.13, + "learning_rate": 2.427566416736092e-05, + "loss": 2.8579, + "step": 11216 + }, + { + "epoch": 0.13, + "learning_rate": 2.4271035823382396e-05, + "loss": 4.1516, + "step": 11218 + }, + { + "epoch": 0.13, + "learning_rate": 2.426640747940387e-05, + "loss": 2.4513, + "step": 11220 + }, + { + "epoch": 0.13, + "learning_rate": 2.4261779135425347e-05, + "loss": 0.0045, + "step": 11222 + }, + { + "epoch": 0.13, + "learning_rate": 2.4257150791446822e-05, + "loss": 4.3487, + "step": 11224 + }, + { + "epoch": 0.13, + "learning_rate": 2.4252522447468298e-05, + "loss": 1.3267, + "step": 11226 + }, + { + "epoch": 0.13, + "learning_rate": 2.4247894103489773e-05, + "loss": 0.9638, + "step": 11228 + }, + { + "epoch": 0.13, + "learning_rate": 2.424326575951125e-05, + "loss": 1.8809, + "step": 11230 + }, + { + "epoch": 0.13, + "learning_rate": 2.4238637415532724e-05, + "loss": 1.6923, + "step": 11232 + }, + { + "epoch": 0.13, + "learning_rate": 2.42340090715542e-05, + "loss": 0.0938, + "step": 11234 + }, + { + "epoch": 0.13, + "learning_rate": 2.4229380727575675e-05, + "loss": 0.5582, + "step": 11236 + }, + { + "epoch": 0.13, + "learning_rate": 2.422475238359715e-05, + "loss": 0.6677, + "step": 11238 + }, + { + "epoch": 0.13, + "learning_rate": 2.4220124039618625e-05, + "loss": 1.8938, + "step": 11240 + }, + { + "epoch": 0.13, + "learning_rate": 2.42154956956401e-05, + "loss": 0.2298, + "step": 11242 + }, + { + "epoch": 0.13, + "learning_rate": 2.4210867351661576e-05, + "loss": 4.7688, + "step": 11244 + }, + { + "epoch": 0.13, + "learning_rate": 2.4206239007683052e-05, + "loss": 2.1021, + "step": 11246 + }, + { + "epoch": 0.13, + "learning_rate": 2.4201610663704527e-05, + "loss": 4.8994, + "step": 11248 + }, + { + "epoch": 0.13, + "learning_rate": 2.4196982319726003e-05, + "loss": 0.1836, + "step": 11250 + }, + { + "epoch": 0.13, + "learning_rate": 2.4192353975747478e-05, + "loss": 2.2905, + "step": 11252 + }, + { + "epoch": 0.13, + "learning_rate": 2.4187725631768953e-05, + "loss": 2.5842, + "step": 11254 + }, + { + "epoch": 0.13, + "learning_rate": 2.418309728779043e-05, + "loss": 3.5241, + "step": 11256 + }, + { + "epoch": 0.13, + "learning_rate": 2.4178468943811904e-05, + "loss": 0.538, + "step": 11258 + }, + { + "epoch": 0.13, + "learning_rate": 2.417384059983338e-05, + "loss": 0.7227, + "step": 11260 + }, + { + "epoch": 0.13, + "learning_rate": 2.4169212255854855e-05, + "loss": 2.627, + "step": 11262 + }, + { + "epoch": 0.13, + "learning_rate": 2.416458391187633e-05, + "loss": 0.2266, + "step": 11264 + }, + { + "epoch": 0.13, + "learning_rate": 2.4159955567897806e-05, + "loss": 2.3895, + "step": 11266 + }, + { + "epoch": 0.13, + "learning_rate": 2.415532722391928e-05, + "loss": 0.0019, + "step": 11268 + }, + { + "epoch": 0.13, + "learning_rate": 2.4150698879940757e-05, + "loss": 1.3735, + "step": 11270 + }, + { + "epoch": 0.13, + "learning_rate": 2.4146070535962236e-05, + "loss": 2.5134, + "step": 11272 + }, + { + "epoch": 0.13, + "learning_rate": 2.414144219198371e-05, + "loss": 3.7204, + "step": 11274 + }, + { + "epoch": 0.13, + "learning_rate": 2.4136813848005187e-05, + "loss": 1.5583, + "step": 11276 + }, + { + "epoch": 0.13, + "learning_rate": 2.4132185504026662e-05, + "loss": 4.004, + "step": 11278 + }, + { + "epoch": 0.13, + "learning_rate": 2.4127557160048137e-05, + "loss": 0.0321, + "step": 11280 + }, + { + "epoch": 0.13, + "learning_rate": 2.4122928816069613e-05, + "loss": 1.0213, + "step": 11282 + }, + { + "epoch": 0.13, + "learning_rate": 2.4118300472091088e-05, + "loss": 0.0752, + "step": 11284 + }, + { + "epoch": 0.13, + "learning_rate": 2.4113672128112564e-05, + "loss": 0.2098, + "step": 11286 + }, + { + "epoch": 0.13, + "learning_rate": 2.410904378413404e-05, + "loss": 0.029, + "step": 11288 + }, + { + "epoch": 0.13, + "learning_rate": 2.4104415440155514e-05, + "loss": 2.3837, + "step": 11290 + }, + { + "epoch": 0.13, + "learning_rate": 2.409978709617699e-05, + "loss": 5.4758, + "step": 11292 + }, + { + "epoch": 0.13, + "learning_rate": 2.4095158752198465e-05, + "loss": 0.0235, + "step": 11294 + }, + { + "epoch": 0.13, + "learning_rate": 2.409053040821994e-05, + "loss": 2.3618, + "step": 11296 + }, + { + "epoch": 0.13, + "learning_rate": 2.4085902064241416e-05, + "loss": 2.8237, + "step": 11298 + }, + { + "epoch": 0.13, + "learning_rate": 2.408127372026289e-05, + "loss": 5.1632, + "step": 11300 + }, + { + "epoch": 0.13, + "learning_rate": 2.4076645376284364e-05, + "loss": 3.2161, + "step": 11302 + }, + { + "epoch": 0.13, + "learning_rate": 2.4072017032305842e-05, + "loss": 5.8994, + "step": 11304 + }, + { + "epoch": 0.13, + "learning_rate": 2.4067388688327318e-05, + "loss": 1.0395, + "step": 11306 + }, + { + "epoch": 0.13, + "learning_rate": 2.4062760344348793e-05, + "loss": 0.8664, + "step": 11308 + }, + { + "epoch": 0.13, + "learning_rate": 2.405813200037027e-05, + "loss": 3.0925, + "step": 11310 + }, + { + "epoch": 0.13, + "learning_rate": 2.4053503656391744e-05, + "loss": 0.0007, + "step": 11312 + }, + { + "epoch": 0.13, + "learning_rate": 2.404887531241322e-05, + "loss": 2.9255, + "step": 11314 + }, + { + "epoch": 0.13, + "learning_rate": 2.4044246968434695e-05, + "loss": 0.8451, + "step": 11316 + }, + { + "epoch": 0.13, + "learning_rate": 2.403961862445617e-05, + "loss": 0.9974, + "step": 11318 + }, + { + "epoch": 0.13, + "learning_rate": 2.4034990280477646e-05, + "loss": 0.4491, + "step": 11320 + }, + { + "epoch": 0.13, + "learning_rate": 2.403036193649912e-05, + "loss": 4.5837, + "step": 11322 + }, + { + "epoch": 0.13, + "learning_rate": 2.4025733592520597e-05, + "loss": 0.0209, + "step": 11324 + }, + { + "epoch": 0.13, + "learning_rate": 2.4021105248542072e-05, + "loss": 0.8152, + "step": 11326 + }, + { + "epoch": 0.13, + "learning_rate": 2.4016476904563548e-05, + "loss": 2.4669, + "step": 11328 + }, + { + "epoch": 0.13, + "learning_rate": 2.4011848560585023e-05, + "loss": 1.0523, + "step": 11330 + }, + { + "epoch": 0.13, + "learning_rate": 2.40072202166065e-05, + "loss": 2.2443, + "step": 11332 + }, + { + "epoch": 0.13, + "learning_rate": 2.4002591872627974e-05, + "loss": 3.2072, + "step": 11334 + }, + { + "epoch": 0.13, + "learning_rate": 2.399796352864945e-05, + "loss": 0.8773, + "step": 11336 + }, + { + "epoch": 0.13, + "learning_rate": 2.3993335184670925e-05, + "loss": 6.5746, + "step": 11338 + }, + { + "epoch": 0.13, + "learning_rate": 2.39887068406924e-05, + "loss": 2.7629, + "step": 11340 + }, + { + "epoch": 0.13, + "learning_rate": 2.3984078496713876e-05, + "loss": 4.9417, + "step": 11342 + }, + { + "epoch": 0.13, + "learning_rate": 2.3979450152735354e-05, + "loss": 1.7322, + "step": 11344 + }, + { + "epoch": 0.13, + "learning_rate": 2.397482180875683e-05, + "loss": 1.1291, + "step": 11346 + }, + { + "epoch": 0.13, + "learning_rate": 2.3970193464778305e-05, + "loss": 0.7873, + "step": 11348 + }, + { + "epoch": 0.13, + "learning_rate": 2.396556512079978e-05, + "loss": 1.149, + "step": 11350 + }, + { + "epoch": 0.13, + "learning_rate": 2.3960936776821256e-05, + "loss": 3.3263, + "step": 11352 + }, + { + "epoch": 0.13, + "learning_rate": 2.395630843284273e-05, + "loss": 3.1071, + "step": 11354 + }, + { + "epoch": 0.13, + "learning_rate": 2.3951680088864207e-05, + "loss": 1.3147, + "step": 11356 + }, + { + "epoch": 0.13, + "learning_rate": 2.3947051744885682e-05, + "loss": 2.9403, + "step": 11358 + }, + { + "epoch": 0.13, + "learning_rate": 2.3942423400907158e-05, + "loss": 5.2262, + "step": 11360 + }, + { + "epoch": 0.13, + "learning_rate": 2.3937795056928633e-05, + "loss": 2.6371, + "step": 11362 + }, + { + "epoch": 0.13, + "learning_rate": 2.393316671295011e-05, + "loss": 1.627, + "step": 11364 + }, + { + "epoch": 0.13, + "learning_rate": 2.3928538368971584e-05, + "loss": 1.5642, + "step": 11366 + }, + { + "epoch": 0.13, + "learning_rate": 2.392391002499306e-05, + "loss": 2.2996, + "step": 11368 + }, + { + "epoch": 0.13, + "learning_rate": 2.3919281681014535e-05, + "loss": 2.8258, + "step": 11370 + }, + { + "epoch": 0.13, + "learning_rate": 2.391465333703601e-05, + "loss": 1.2265, + "step": 11372 + }, + { + "epoch": 0.13, + "learning_rate": 2.3910024993057486e-05, + "loss": 3.4283, + "step": 11374 + }, + { + "epoch": 0.13, + "learning_rate": 2.3905396649078958e-05, + "loss": 2.0581, + "step": 11376 + }, + { + "epoch": 0.13, + "learning_rate": 2.3900768305100437e-05, + "loss": 0.5135, + "step": 11378 + }, + { + "epoch": 0.13, + "learning_rate": 2.3896139961121912e-05, + "loss": 0.0293, + "step": 11380 + }, + { + "epoch": 0.13, + "learning_rate": 2.3891511617143387e-05, + "loss": 0.8802, + "step": 11382 + }, + { + "epoch": 0.13, + "learning_rate": 2.3886883273164863e-05, + "loss": 0.0109, + "step": 11384 + }, + { + "epoch": 0.13, + "learning_rate": 2.3882254929186338e-05, + "loss": 2.0542, + "step": 11386 + }, + { + "epoch": 0.13, + "learning_rate": 2.3877626585207814e-05, + "loss": 3.0506, + "step": 11388 + }, + { + "epoch": 0.13, + "learning_rate": 2.387299824122929e-05, + "loss": 3.9052, + "step": 11390 + }, + { + "epoch": 0.13, + "learning_rate": 2.3868369897250765e-05, + "loss": 2.5502, + "step": 11392 + }, + { + "epoch": 0.13, + "learning_rate": 2.386374155327224e-05, + "loss": 1.7489, + "step": 11394 + }, + { + "epoch": 0.13, + "learning_rate": 2.3859113209293715e-05, + "loss": 4.8033, + "step": 11396 + }, + { + "epoch": 0.13, + "learning_rate": 2.385448486531519e-05, + "loss": 1.0647, + "step": 11398 + }, + { + "epoch": 0.13, + "learning_rate": 2.3849856521336666e-05, + "loss": 1.4736, + "step": 11400 + }, + { + "epoch": 0.13, + "learning_rate": 2.384522817735814e-05, + "loss": 3.6563, + "step": 11402 + }, + { + "epoch": 0.13, + "learning_rate": 2.3840599833379617e-05, + "loss": 0.4296, + "step": 11404 + }, + { + "epoch": 0.13, + "learning_rate": 2.3835971489401093e-05, + "loss": 4.3785, + "step": 11406 + }, + { + "epoch": 0.13, + "learning_rate": 2.3831343145422568e-05, + "loss": 0.1951, + "step": 11408 + }, + { + "epoch": 0.13, + "learning_rate": 2.3826714801444043e-05, + "loss": 2.6543, + "step": 11410 + }, + { + "epoch": 0.13, + "learning_rate": 2.382208645746552e-05, + "loss": 1.4794, + "step": 11412 + }, + { + "epoch": 0.13, + "learning_rate": 2.3817458113486994e-05, + "loss": 4.8881, + "step": 11414 + }, + { + "epoch": 0.13, + "learning_rate": 2.381282976950847e-05, + "loss": 3.9938, + "step": 11416 + }, + { + "epoch": 0.13, + "learning_rate": 2.380820142552995e-05, + "loss": 1.8005, + "step": 11418 + }, + { + "epoch": 0.13, + "learning_rate": 2.3803573081551424e-05, + "loss": 2.5097, + "step": 11420 + }, + { + "epoch": 0.13, + "learning_rate": 2.37989447375729e-05, + "loss": 1.7501, + "step": 11422 + }, + { + "epoch": 0.13, + "learning_rate": 2.3794316393594375e-05, + "loss": 1.7017, + "step": 11424 + }, + { + "epoch": 0.13, + "learning_rate": 2.378968804961585e-05, + "loss": 2.2042, + "step": 11426 + }, + { + "epoch": 0.13, + "learning_rate": 2.3785059705637326e-05, + "loss": 2.4984, + "step": 11428 + }, + { + "epoch": 0.13, + "learning_rate": 2.37804313616588e-05, + "loss": 2.1332, + "step": 11430 + }, + { + "epoch": 0.13, + "learning_rate": 2.3775803017680276e-05, + "loss": 2.3465, + "step": 11432 + }, + { + "epoch": 0.13, + "learning_rate": 2.3771174673701752e-05, + "loss": 0.4042, + "step": 11434 + }, + { + "epoch": 0.13, + "learning_rate": 2.3766546329723227e-05, + "loss": 0.0968, + "step": 11436 + }, + { + "epoch": 0.13, + "learning_rate": 2.3761917985744703e-05, + "loss": 1.6538, + "step": 11438 + }, + { + "epoch": 0.13, + "learning_rate": 2.3757289641766178e-05, + "loss": 1.3352, + "step": 11440 + }, + { + "epoch": 0.13, + "learning_rate": 2.3752661297787654e-05, + "loss": 1.3705, + "step": 11442 + }, + { + "epoch": 0.13, + "learning_rate": 2.374803295380913e-05, + "loss": 3.4098, + "step": 11444 + }, + { + "epoch": 0.13, + "learning_rate": 2.3743404609830604e-05, + "loss": 1.3245, + "step": 11446 + }, + { + "epoch": 0.13, + "learning_rate": 2.3738776265852076e-05, + "loss": 0.0375, + "step": 11448 + }, + { + "epoch": 0.13, + "learning_rate": 2.3734147921873552e-05, + "loss": 0.3716, + "step": 11450 + }, + { + "epoch": 0.13, + "learning_rate": 2.372951957789503e-05, + "loss": 1.5376, + "step": 11452 + }, + { + "epoch": 0.13, + "learning_rate": 2.3724891233916506e-05, + "loss": 0.5546, + "step": 11454 + }, + { + "epoch": 0.13, + "learning_rate": 2.372026288993798e-05, + "loss": 1.0139, + "step": 11456 + }, + { + "epoch": 0.13, + "learning_rate": 2.3715634545959457e-05, + "loss": 0.0023, + "step": 11458 + }, + { + "epoch": 0.13, + "learning_rate": 2.3711006201980932e-05, + "loss": 3.0647, + "step": 11460 + }, + { + "epoch": 0.13, + "learning_rate": 2.3706377858002408e-05, + "loss": 3.8631, + "step": 11462 + }, + { + "epoch": 0.13, + "learning_rate": 2.3701749514023883e-05, + "loss": 0.6844, + "step": 11464 + }, + { + "epoch": 0.13, + "learning_rate": 2.369712117004536e-05, + "loss": 0.3766, + "step": 11466 + }, + { + "epoch": 0.13, + "learning_rate": 2.3692492826066834e-05, + "loss": 2.7217, + "step": 11468 + }, + { + "epoch": 0.13, + "learning_rate": 2.368786448208831e-05, + "loss": 0.6464, + "step": 11470 + }, + { + "epoch": 0.13, + "learning_rate": 2.3683236138109785e-05, + "loss": 1.7295, + "step": 11472 + }, + { + "epoch": 0.13, + "learning_rate": 2.367860779413126e-05, + "loss": 1.1876, + "step": 11474 + }, + { + "epoch": 0.13, + "learning_rate": 2.3673979450152736e-05, + "loss": 1.2564, + "step": 11476 + }, + { + "epoch": 0.13, + "learning_rate": 2.366935110617421e-05, + "loss": 1.8553, + "step": 11478 + }, + { + "epoch": 0.13, + "learning_rate": 2.3664722762195687e-05, + "loss": 0.5782, + "step": 11480 + }, + { + "epoch": 0.13, + "learning_rate": 2.3660094418217162e-05, + "loss": 2.9281, + "step": 11482 + }, + { + "epoch": 0.13, + "learning_rate": 2.3655466074238637e-05, + "loss": 3.0873, + "step": 11484 + }, + { + "epoch": 0.13, + "learning_rate": 2.3650837730260113e-05, + "loss": 2.3988, + "step": 11486 + }, + { + "epoch": 0.13, + "learning_rate": 2.3646209386281588e-05, + "loss": 0.8782, + "step": 11488 + }, + { + "epoch": 0.13, + "learning_rate": 2.3641581042303064e-05, + "loss": 0.538, + "step": 11490 + }, + { + "epoch": 0.13, + "learning_rate": 2.3636952698324543e-05, + "loss": 3.4558, + "step": 11492 + }, + { + "epoch": 0.13, + "learning_rate": 2.3632324354346018e-05, + "loss": 0.8473, + "step": 11494 + }, + { + "epoch": 0.13, + "learning_rate": 2.3627696010367493e-05, + "loss": 4.7022, + "step": 11496 + }, + { + "epoch": 0.13, + "learning_rate": 2.362306766638897e-05, + "loss": 2.7692, + "step": 11498 + }, + { + "epoch": 0.13, + "learning_rate": 2.3618439322410444e-05, + "loss": 0.2777, + "step": 11500 + }, + { + "epoch": 0.13, + "learning_rate": 2.361381097843192e-05, + "loss": 2.711, + "step": 11502 + }, + { + "epoch": 0.13, + "learning_rate": 2.3609182634453395e-05, + "loss": 0.3319, + "step": 11504 + }, + { + "epoch": 0.13, + "learning_rate": 2.360455429047487e-05, + "loss": 2.4296, + "step": 11506 + }, + { + "epoch": 0.13, + "learning_rate": 2.3599925946496346e-05, + "loss": 0.0285, + "step": 11508 + }, + { + "epoch": 0.13, + "learning_rate": 2.359529760251782e-05, + "loss": 0.8064, + "step": 11510 + }, + { + "epoch": 0.13, + "learning_rate": 2.3590669258539297e-05, + "loss": 0.7026, + "step": 11512 + }, + { + "epoch": 0.13, + "learning_rate": 2.3586040914560772e-05, + "loss": 2.4599, + "step": 11514 + }, + { + "epoch": 0.13, + "learning_rate": 2.3581412570582248e-05, + "loss": 3.1368, + "step": 11516 + }, + { + "epoch": 0.13, + "learning_rate": 2.3576784226603723e-05, + "loss": 0.6655, + "step": 11518 + }, + { + "epoch": 0.13, + "learning_rate": 2.3572155882625195e-05, + "loss": 1.0324, + "step": 11520 + }, + { + "epoch": 0.13, + "learning_rate": 2.356752753864667e-05, + "loss": 2.6682, + "step": 11522 + }, + { + "epoch": 0.13, + "learning_rate": 2.356289919466815e-05, + "loss": 4.3664, + "step": 11524 + }, + { + "epoch": 0.13, + "learning_rate": 2.3558270850689625e-05, + "loss": 3.0065, + "step": 11526 + }, + { + "epoch": 0.13, + "learning_rate": 2.35536425067111e-05, + "loss": 2.6501, + "step": 11528 + }, + { + "epoch": 0.13, + "learning_rate": 2.3549014162732576e-05, + "loss": 3.4268, + "step": 11530 + }, + { + "epoch": 0.13, + "learning_rate": 2.354438581875405e-05, + "loss": 1.523, + "step": 11532 + }, + { + "epoch": 0.13, + "learning_rate": 2.3539757474775526e-05, + "loss": 0.0009, + "step": 11534 + }, + { + "epoch": 0.13, + "learning_rate": 2.3535129130797002e-05, + "loss": 0.3034, + "step": 11536 + }, + { + "epoch": 0.13, + "learning_rate": 2.3530500786818477e-05, + "loss": 3.6636, + "step": 11538 + }, + { + "epoch": 0.13, + "learning_rate": 2.3525872442839953e-05, + "loss": 0.1919, + "step": 11540 + }, + { + "epoch": 0.13, + "learning_rate": 2.3521244098861428e-05, + "loss": 4.7357, + "step": 11542 + }, + { + "epoch": 0.13, + "learning_rate": 2.3516615754882904e-05, + "loss": 4.7137, + "step": 11544 + }, + { + "epoch": 0.13, + "learning_rate": 2.351198741090438e-05, + "loss": 1.358, + "step": 11546 + }, + { + "epoch": 0.13, + "learning_rate": 2.3507359066925854e-05, + "loss": 0.7011, + "step": 11548 + }, + { + "epoch": 0.13, + "learning_rate": 2.350273072294733e-05, + "loss": 1.4574, + "step": 11550 + }, + { + "epoch": 0.13, + "learning_rate": 2.3498102378968805e-05, + "loss": 0.2736, + "step": 11552 + }, + { + "epoch": 0.13, + "learning_rate": 2.349347403499028e-05, + "loss": 0.1344, + "step": 11554 + }, + { + "epoch": 0.13, + "learning_rate": 2.3488845691011756e-05, + "loss": 1.2024, + "step": 11556 + }, + { + "epoch": 0.13, + "learning_rate": 2.348421734703323e-05, + "loss": 0.0404, + "step": 11558 + }, + { + "epoch": 0.13, + "learning_rate": 2.3479589003054707e-05, + "loss": 0.33, + "step": 11560 + }, + { + "epoch": 0.13, + "learning_rate": 2.3474960659076182e-05, + "loss": 0.7041, + "step": 11562 + }, + { + "epoch": 0.13, + "learning_rate": 2.3470332315097658e-05, + "loss": 10.2638, + "step": 11564 + }, + { + "epoch": 0.13, + "learning_rate": 2.3465703971119137e-05, + "loss": 7.7568, + "step": 11566 + }, + { + "epoch": 0.13, + "learning_rate": 2.3461075627140612e-05, + "loss": 0.0003, + "step": 11568 + }, + { + "epoch": 0.13, + "learning_rate": 2.3456447283162087e-05, + "loss": 2.2152, + "step": 11570 + }, + { + "epoch": 0.13, + "learning_rate": 2.3451818939183563e-05, + "loss": 4.1176, + "step": 11572 + }, + { + "epoch": 0.13, + "learning_rate": 2.344719059520504e-05, + "loss": 2.023, + "step": 11574 + }, + { + "epoch": 0.13, + "learning_rate": 2.3442562251226514e-05, + "loss": 5.7705, + "step": 11576 + }, + { + "epoch": 0.13, + "learning_rate": 2.343793390724799e-05, + "loss": 3.1709, + "step": 11578 + }, + { + "epoch": 0.13, + "learning_rate": 2.3433305563269465e-05, + "loss": 0.0009, + "step": 11580 + }, + { + "epoch": 0.13, + "learning_rate": 2.342867721929094e-05, + "loss": 2.233, + "step": 11582 + }, + { + "epoch": 0.13, + "learning_rate": 2.3424048875312415e-05, + "loss": 4.4059, + "step": 11584 + }, + { + "epoch": 0.13, + "learning_rate": 2.341942053133389e-05, + "loss": 1.0962, + "step": 11586 + }, + { + "epoch": 0.13, + "learning_rate": 2.3414792187355366e-05, + "loss": 1.2199, + "step": 11588 + }, + { + "epoch": 0.13, + "learning_rate": 2.3410163843376842e-05, + "loss": 0.0006, + "step": 11590 + }, + { + "epoch": 0.13, + "learning_rate": 2.3405535499398314e-05, + "loss": 0.0079, + "step": 11592 + }, + { + "epoch": 0.13, + "learning_rate": 2.340090715541979e-05, + "loss": 0.9957, + "step": 11594 + }, + { + "epoch": 0.13, + "learning_rate": 2.3396278811441265e-05, + "loss": 0.0022, + "step": 11596 + }, + { + "epoch": 0.13, + "learning_rate": 2.3391650467462743e-05, + "loss": 0.7252, + "step": 11598 + }, + { + "epoch": 0.13, + "learning_rate": 2.338702212348422e-05, + "loss": 1.7499, + "step": 11600 + }, + { + "epoch": 0.13, + "learning_rate": 2.3382393779505694e-05, + "loss": 1.8626, + "step": 11602 + }, + { + "epoch": 0.13, + "learning_rate": 2.337776543552717e-05, + "loss": 3.66, + "step": 11604 + }, + { + "epoch": 0.13, + "learning_rate": 2.3373137091548645e-05, + "loss": 4.4469, + "step": 11606 + }, + { + "epoch": 0.13, + "learning_rate": 2.336850874757012e-05, + "loss": 0.0276, + "step": 11608 + }, + { + "epoch": 0.13, + "learning_rate": 2.3363880403591596e-05, + "loss": 1.7478, + "step": 11610 + }, + { + "epoch": 0.13, + "learning_rate": 2.335925205961307e-05, + "loss": 2.1135, + "step": 11612 + }, + { + "epoch": 0.13, + "learning_rate": 2.3354623715634547e-05, + "loss": 0.6441, + "step": 11614 + }, + { + "epoch": 0.13, + "learning_rate": 2.3349995371656022e-05, + "loss": 2.9487, + "step": 11616 + }, + { + "epoch": 0.13, + "learning_rate": 2.3345367027677498e-05, + "loss": 0.7412, + "step": 11618 + }, + { + "epoch": 0.13, + "learning_rate": 2.3340738683698973e-05, + "loss": 5.2675, + "step": 11620 + }, + { + "epoch": 0.13, + "learning_rate": 2.333611033972045e-05, + "loss": 1.0765, + "step": 11622 + }, + { + "epoch": 0.13, + "learning_rate": 2.3331481995741924e-05, + "loss": 1.6913, + "step": 11624 + }, + { + "epoch": 0.13, + "learning_rate": 2.33268536517634e-05, + "loss": 6.9639, + "step": 11626 + }, + { + "epoch": 0.13, + "learning_rate": 2.3322225307784875e-05, + "loss": 1.9966, + "step": 11628 + }, + { + "epoch": 0.13, + "learning_rate": 2.331759696380635e-05, + "loss": 5.2761, + "step": 11630 + }, + { + "epoch": 0.13, + "learning_rate": 2.3312968619827826e-05, + "loss": 0.9856, + "step": 11632 + }, + { + "epoch": 0.13, + "learning_rate": 2.33083402758493e-05, + "loss": 0.5208, + "step": 11634 + }, + { + "epoch": 0.13, + "learning_rate": 2.3303711931870776e-05, + "loss": 0.0003, + "step": 11636 + }, + { + "epoch": 0.13, + "learning_rate": 2.3299083587892255e-05, + "loss": 3.4098, + "step": 11638 + }, + { + "epoch": 0.13, + "learning_rate": 2.329445524391373e-05, + "loss": 2.8005, + "step": 11640 + }, + { + "epoch": 0.13, + "learning_rate": 2.3289826899935206e-05, + "loss": 2.3016, + "step": 11642 + }, + { + "epoch": 0.13, + "learning_rate": 2.328519855595668e-05, + "loss": 1.4691, + "step": 11644 + }, + { + "epoch": 0.13, + "learning_rate": 2.3280570211978157e-05, + "loss": 2.1602, + "step": 11646 + }, + { + "epoch": 0.13, + "learning_rate": 2.3275941867999632e-05, + "loss": 3.6909, + "step": 11648 + }, + { + "epoch": 0.13, + "learning_rate": 2.3271313524021108e-05, + "loss": 4.2358, + "step": 11650 + }, + { + "epoch": 0.13, + "learning_rate": 2.3266685180042583e-05, + "loss": 1.8802, + "step": 11652 + }, + { + "epoch": 0.13, + "learning_rate": 2.326205683606406e-05, + "loss": 0.6052, + "step": 11654 + }, + { + "epoch": 0.13, + "learning_rate": 2.3257428492085534e-05, + "loss": 2.2932, + "step": 11656 + }, + { + "epoch": 0.13, + "learning_rate": 2.325280014810701e-05, + "loss": 1.8191, + "step": 11658 + }, + { + "epoch": 0.13, + "learning_rate": 2.3248171804128485e-05, + "loss": 0.9598, + "step": 11660 + }, + { + "epoch": 0.13, + "learning_rate": 2.324354346014996e-05, + "loss": 0.0234, + "step": 11662 + }, + { + "epoch": 0.13, + "learning_rate": 2.3238915116171436e-05, + "loss": 0.7169, + "step": 11664 + }, + { + "epoch": 0.13, + "learning_rate": 2.3234286772192908e-05, + "loss": 1.2792, + "step": 11666 + }, + { + "epoch": 0.13, + "learning_rate": 2.3229658428214383e-05, + "loss": 0.7113, + "step": 11668 + }, + { + "epoch": 0.13, + "learning_rate": 2.322503008423586e-05, + "loss": 0.0011, + "step": 11670 + }, + { + "epoch": 0.13, + "learning_rate": 2.3220401740257338e-05, + "loss": 3.6669, + "step": 11672 + }, + { + "epoch": 0.13, + "learning_rate": 2.3215773396278813e-05, + "loss": 2.6983, + "step": 11674 + }, + { + "epoch": 0.13, + "learning_rate": 2.321114505230029e-05, + "loss": 2.6359, + "step": 11676 + }, + { + "epoch": 0.13, + "learning_rate": 2.3206516708321764e-05, + "loss": 3.3102, + "step": 11678 + }, + { + "epoch": 0.13, + "learning_rate": 2.320188836434324e-05, + "loss": 0.0005, + "step": 11680 + }, + { + "epoch": 0.13, + "learning_rate": 2.3197260020364715e-05, + "loss": 3.6713, + "step": 11682 + }, + { + "epoch": 0.13, + "learning_rate": 2.319263167638619e-05, + "loss": 10.8348, + "step": 11684 + }, + { + "epoch": 0.13, + "learning_rate": 2.3188003332407665e-05, + "loss": 0.0024, + "step": 11686 + }, + { + "epoch": 0.13, + "learning_rate": 2.318337498842914e-05, + "loss": 5.8768, + "step": 11688 + }, + { + "epoch": 0.13, + "learning_rate": 2.3178746644450616e-05, + "loss": 1.3058, + "step": 11690 + }, + { + "epoch": 0.13, + "learning_rate": 2.3174118300472092e-05, + "loss": 3.7066, + "step": 11692 + }, + { + "epoch": 0.13, + "learning_rate": 2.3169489956493567e-05, + "loss": 2.9691, + "step": 11694 + }, + { + "epoch": 0.13, + "learning_rate": 2.3164861612515043e-05, + "loss": 1.0647, + "step": 11696 + }, + { + "epoch": 0.13, + "learning_rate": 2.3160233268536518e-05, + "loss": 3.6425, + "step": 11698 + }, + { + "epoch": 0.13, + "learning_rate": 2.3155604924557993e-05, + "loss": 1.4321, + "step": 11700 + }, + { + "epoch": 0.13, + "learning_rate": 2.315097658057947e-05, + "loss": 2.86, + "step": 11702 + }, + { + "epoch": 0.13, + "learning_rate": 2.3146348236600944e-05, + "loss": 0.9618, + "step": 11704 + }, + { + "epoch": 0.13, + "learning_rate": 2.314171989262242e-05, + "loss": 2.982, + "step": 11706 + }, + { + "epoch": 0.13, + "learning_rate": 2.3137091548643895e-05, + "loss": 2.3021, + "step": 11708 + }, + { + "epoch": 0.13, + "learning_rate": 2.313246320466537e-05, + "loss": 2.0069, + "step": 11710 + }, + { + "epoch": 0.13, + "learning_rate": 2.312783486068685e-05, + "loss": 0.0012, + "step": 11712 + }, + { + "epoch": 0.13, + "learning_rate": 2.3123206516708325e-05, + "loss": 0.4581, + "step": 11714 + }, + { + "epoch": 0.13, + "learning_rate": 2.31185781727298e-05, + "loss": 1.3937, + "step": 11716 + }, + { + "epoch": 0.13, + "learning_rate": 2.3113949828751276e-05, + "loss": 7.0825, + "step": 11718 + }, + { + "epoch": 0.13, + "learning_rate": 2.310932148477275e-05, + "loss": 2.146, + "step": 11720 + }, + { + "epoch": 0.14, + "learning_rate": 2.3104693140794227e-05, + "loss": 1.3761, + "step": 11722 + }, + { + "epoch": 0.14, + "learning_rate": 2.3100064796815702e-05, + "loss": 4.7641, + "step": 11724 + }, + { + "epoch": 0.14, + "learning_rate": 2.3095436452837177e-05, + "loss": 1.0774, + "step": 11726 + }, + { + "epoch": 0.14, + "learning_rate": 2.3090808108858653e-05, + "loss": 0.2275, + "step": 11728 + }, + { + "epoch": 0.14, + "learning_rate": 2.3086179764880128e-05, + "loss": 0.3565, + "step": 11730 + }, + { + "epoch": 0.14, + "learning_rate": 2.3081551420901604e-05, + "loss": 0.9936, + "step": 11732 + }, + { + "epoch": 0.14, + "learning_rate": 2.307692307692308e-05, + "loss": 0.6058, + "step": 11734 + }, + { + "epoch": 0.14, + "learning_rate": 2.3072294732944554e-05, + "loss": 2.5883, + "step": 11736 + }, + { + "epoch": 0.14, + "learning_rate": 2.3067666388966027e-05, + "loss": 3.8657, + "step": 11738 + }, + { + "epoch": 0.14, + "learning_rate": 2.3063038044987502e-05, + "loss": 2.1753, + "step": 11740 + }, + { + "epoch": 0.14, + "learning_rate": 2.3058409701008977e-05, + "loss": 0.0013, + "step": 11742 + }, + { + "epoch": 0.14, + "learning_rate": 2.3053781357030453e-05, + "loss": 4.6145, + "step": 11744 + }, + { + "epoch": 0.14, + "learning_rate": 2.304915301305193e-05, + "loss": 1.5403, + "step": 11746 + }, + { + "epoch": 0.14, + "learning_rate": 2.3044524669073407e-05, + "loss": 1.677, + "step": 11748 + }, + { + "epoch": 0.14, + "learning_rate": 2.3039896325094882e-05, + "loss": 2.9177, + "step": 11750 + }, + { + "epoch": 0.14, + "learning_rate": 2.3035267981116358e-05, + "loss": 2.2892, + "step": 11752 + }, + { + "epoch": 0.14, + "learning_rate": 2.3030639637137833e-05, + "loss": 5.2272, + "step": 11754 + }, + { + "epoch": 0.14, + "learning_rate": 2.302601129315931e-05, + "loss": 1.6585, + "step": 11756 + }, + { + "epoch": 0.14, + "learning_rate": 2.3021382949180784e-05, + "loss": 1.8311, + "step": 11758 + }, + { + "epoch": 0.14, + "learning_rate": 2.301675460520226e-05, + "loss": 2.511, + "step": 11760 + }, + { + "epoch": 0.14, + "learning_rate": 2.3012126261223735e-05, + "loss": 0.6595, + "step": 11762 + }, + { + "epoch": 0.14, + "learning_rate": 2.300749791724521e-05, + "loss": 0.0007, + "step": 11764 + }, + { + "epoch": 0.14, + "learning_rate": 2.3002869573266686e-05, + "loss": 2.5219, + "step": 11766 + }, + { + "epoch": 0.14, + "learning_rate": 2.299824122928816e-05, + "loss": 1.1799, + "step": 11768 + }, + { + "epoch": 0.14, + "learning_rate": 2.2993612885309637e-05, + "loss": 3.7213, + "step": 11770 + }, + { + "epoch": 0.14, + "learning_rate": 2.2988984541331112e-05, + "loss": 2.2108, + "step": 11772 + }, + { + "epoch": 0.14, + "learning_rate": 2.2984356197352588e-05, + "loss": 3.0596, + "step": 11774 + }, + { + "epoch": 0.14, + "learning_rate": 2.2979727853374063e-05, + "loss": 5.0035, + "step": 11776 + }, + { + "epoch": 0.14, + "learning_rate": 2.297509950939554e-05, + "loss": 0.8988, + "step": 11778 + }, + { + "epoch": 0.14, + "learning_rate": 2.2970471165417014e-05, + "loss": 3.5068, + "step": 11780 + }, + { + "epoch": 0.14, + "learning_rate": 2.296584282143849e-05, + "loss": 5.0437, + "step": 11782 + }, + { + "epoch": 0.14, + "learning_rate": 2.2961214477459965e-05, + "loss": 1.7216, + "step": 11784 + }, + { + "epoch": 0.14, + "learning_rate": 2.2956586133481443e-05, + "loss": 2.3211, + "step": 11786 + }, + { + "epoch": 0.14, + "learning_rate": 2.295195778950292e-05, + "loss": 2.3235, + "step": 11788 + }, + { + "epoch": 0.14, + "learning_rate": 2.2947329445524394e-05, + "loss": 0.1274, + "step": 11790 + }, + { + "epoch": 0.14, + "learning_rate": 2.294270110154587e-05, + "loss": 0.001, + "step": 11792 + }, + { + "epoch": 0.14, + "learning_rate": 2.2938072757567345e-05, + "loss": 2.5048, + "step": 11794 + }, + { + "epoch": 0.14, + "learning_rate": 2.293344441358882e-05, + "loss": 0.006, + "step": 11796 + }, + { + "epoch": 0.14, + "learning_rate": 2.2928816069610296e-05, + "loss": 1.2315, + "step": 11798 + }, + { + "epoch": 0.14, + "learning_rate": 2.292418772563177e-05, + "loss": 0.4612, + "step": 11800 + }, + { + "epoch": 0.14, + "learning_rate": 2.2919559381653247e-05, + "loss": 0.0338, + "step": 11802 + }, + { + "epoch": 0.14, + "learning_rate": 2.2914931037674722e-05, + "loss": 0.4349, + "step": 11804 + }, + { + "epoch": 0.14, + "learning_rate": 2.2910302693696198e-05, + "loss": 0.5837, + "step": 11806 + }, + { + "epoch": 0.14, + "learning_rate": 2.2905674349717673e-05, + "loss": 1.2775, + "step": 11808 + }, + { + "epoch": 0.14, + "learning_rate": 2.2901046005739145e-05, + "loss": 1.5945, + "step": 11810 + }, + { + "epoch": 0.14, + "learning_rate": 2.289641766176062e-05, + "loss": 1.0095, + "step": 11812 + }, + { + "epoch": 0.14, + "learning_rate": 2.2891789317782096e-05, + "loss": 0.8258, + "step": 11814 + }, + { + "epoch": 0.14, + "learning_rate": 2.288716097380357e-05, + "loss": 1.6127, + "step": 11816 + }, + { + "epoch": 0.14, + "learning_rate": 2.288253262982505e-05, + "loss": 3.479, + "step": 11818 + }, + { + "epoch": 0.14, + "learning_rate": 2.2877904285846526e-05, + "loss": 9.351, + "step": 11820 + }, + { + "epoch": 0.14, + "learning_rate": 2.2873275941868e-05, + "loss": 0.0002, + "step": 11822 + }, + { + "epoch": 0.14, + "learning_rate": 2.2868647597889477e-05, + "loss": 4.8827, + "step": 11824 + }, + { + "epoch": 0.14, + "learning_rate": 2.2864019253910952e-05, + "loss": 2.687, + "step": 11826 + }, + { + "epoch": 0.14, + "learning_rate": 2.2859390909932427e-05, + "loss": 2.8048, + "step": 11828 + }, + { + "epoch": 0.14, + "learning_rate": 2.2854762565953903e-05, + "loss": 1.8187, + "step": 11830 + }, + { + "epoch": 0.14, + "learning_rate": 2.2850134221975378e-05, + "loss": 1.8712, + "step": 11832 + }, + { + "epoch": 0.14, + "learning_rate": 2.2845505877996854e-05, + "loss": 5.4094, + "step": 11834 + }, + { + "epoch": 0.14, + "learning_rate": 2.284087753401833e-05, + "loss": 3.6381, + "step": 11836 + }, + { + "epoch": 0.14, + "learning_rate": 2.2836249190039805e-05, + "loss": 3.7207, + "step": 11838 + }, + { + "epoch": 0.14, + "learning_rate": 2.283162084606128e-05, + "loss": 3.2465, + "step": 11840 + }, + { + "epoch": 0.14, + "learning_rate": 2.2826992502082755e-05, + "loss": 5.7429, + "step": 11842 + }, + { + "epoch": 0.14, + "learning_rate": 2.282236415810423e-05, + "loss": 0.4259, + "step": 11844 + }, + { + "epoch": 0.14, + "learning_rate": 2.2817735814125706e-05, + "loss": 3.4085, + "step": 11846 + }, + { + "epoch": 0.14, + "learning_rate": 2.281310747014718e-05, + "loss": 2.4615, + "step": 11848 + }, + { + "epoch": 0.14, + "learning_rate": 2.2808479126168657e-05, + "loss": 0.7117, + "step": 11850 + }, + { + "epoch": 0.14, + "learning_rate": 2.2803850782190133e-05, + "loss": 0.9904, + "step": 11852 + }, + { + "epoch": 0.14, + "learning_rate": 2.2799222438211608e-05, + "loss": 0.401, + "step": 11854 + }, + { + "epoch": 0.14, + "learning_rate": 2.2794594094233083e-05, + "loss": 1.3235, + "step": 11856 + }, + { + "epoch": 0.14, + "learning_rate": 2.2789965750254562e-05, + "loss": 0.2817, + "step": 11858 + }, + { + "epoch": 0.14, + "learning_rate": 2.2785337406276038e-05, + "loss": 1.772, + "step": 11860 + }, + { + "epoch": 0.14, + "learning_rate": 2.2780709062297513e-05, + "loss": 1.1804, + "step": 11862 + }, + { + "epoch": 0.14, + "learning_rate": 2.277608071831899e-05, + "loss": 4.126, + "step": 11864 + }, + { + "epoch": 0.14, + "learning_rate": 2.2771452374340464e-05, + "loss": 2.1236, + "step": 11866 + }, + { + "epoch": 0.14, + "learning_rate": 2.276682403036194e-05, + "loss": 2.2264, + "step": 11868 + }, + { + "epoch": 0.14, + "learning_rate": 2.2762195686383415e-05, + "loss": 2.2909, + "step": 11870 + }, + { + "epoch": 0.14, + "learning_rate": 2.275756734240489e-05, + "loss": 1.2049, + "step": 11872 + }, + { + "epoch": 0.14, + "learning_rate": 2.2752938998426366e-05, + "loss": 3.8166, + "step": 11874 + }, + { + "epoch": 0.14, + "learning_rate": 2.274831065444784e-05, + "loss": 0.7682, + "step": 11876 + }, + { + "epoch": 0.14, + "learning_rate": 2.2743682310469316e-05, + "loss": 2.1898, + "step": 11878 + }, + { + "epoch": 0.14, + "learning_rate": 2.2739053966490792e-05, + "loss": 0.0312, + "step": 11880 + }, + { + "epoch": 0.14, + "learning_rate": 2.2734425622512264e-05, + "loss": 2.8355, + "step": 11882 + }, + { + "epoch": 0.14, + "learning_rate": 2.272979727853374e-05, + "loss": 2.6993, + "step": 11884 + }, + { + "epoch": 0.14, + "learning_rate": 2.2725168934555215e-05, + "loss": 2.3613, + "step": 11886 + }, + { + "epoch": 0.14, + "learning_rate": 2.272054059057669e-05, + "loss": 2.2117, + "step": 11888 + }, + { + "epoch": 0.14, + "learning_rate": 2.2715912246598166e-05, + "loss": 2.7101, + "step": 11890 + }, + { + "epoch": 0.14, + "learning_rate": 2.2711283902619644e-05, + "loss": 1.9092, + "step": 11892 + }, + { + "epoch": 0.14, + "learning_rate": 2.270665555864112e-05, + "loss": 1.6353, + "step": 11894 + }, + { + "epoch": 0.14, + "learning_rate": 2.2702027214662595e-05, + "loss": 1.932, + "step": 11896 + }, + { + "epoch": 0.14, + "learning_rate": 2.269739887068407e-05, + "loss": 2.0946, + "step": 11898 + }, + { + "epoch": 0.14, + "learning_rate": 2.2692770526705546e-05, + "loss": 1.0947, + "step": 11900 + }, + { + "epoch": 0.14, + "learning_rate": 2.268814218272702e-05, + "loss": 2.2807, + "step": 11902 + }, + { + "epoch": 0.14, + "learning_rate": 2.2683513838748497e-05, + "loss": 0.9438, + "step": 11904 + }, + { + "epoch": 0.14, + "learning_rate": 2.2678885494769972e-05, + "loss": 0.0048, + "step": 11906 + }, + { + "epoch": 0.14, + "learning_rate": 2.2674257150791448e-05, + "loss": 4.6263, + "step": 11908 + }, + { + "epoch": 0.14, + "learning_rate": 2.2669628806812923e-05, + "loss": 0.6084, + "step": 11910 + }, + { + "epoch": 0.14, + "learning_rate": 2.26650004628344e-05, + "loss": 1.6278, + "step": 11912 + }, + { + "epoch": 0.14, + "learning_rate": 2.2660372118855874e-05, + "loss": 1.5517, + "step": 11914 + }, + { + "epoch": 0.14, + "learning_rate": 2.265574377487735e-05, + "loss": 0.6334, + "step": 11916 + }, + { + "epoch": 0.14, + "learning_rate": 2.2651115430898825e-05, + "loss": 2.9594, + "step": 11918 + }, + { + "epoch": 0.14, + "learning_rate": 2.26464870869203e-05, + "loss": 2.9019, + "step": 11920 + }, + { + "epoch": 0.14, + "learning_rate": 2.2641858742941776e-05, + "loss": 3.7601, + "step": 11922 + }, + { + "epoch": 0.14, + "learning_rate": 2.263723039896325e-05, + "loss": 0.1491, + "step": 11924 + }, + { + "epoch": 0.14, + "learning_rate": 2.2632602054984727e-05, + "loss": 0.8837, + "step": 11926 + }, + { + "epoch": 0.14, + "learning_rate": 2.2627973711006202e-05, + "loss": 2.8138, + "step": 11928 + }, + { + "epoch": 0.14, + "learning_rate": 2.2623345367027677e-05, + "loss": 3.7212, + "step": 11930 + }, + { + "epoch": 0.14, + "learning_rate": 2.2618717023049156e-05, + "loss": 1.4218, + "step": 11932 + }, + { + "epoch": 0.14, + "learning_rate": 2.261408867907063e-05, + "loss": 1.7363, + "step": 11934 + }, + { + "epoch": 0.14, + "learning_rate": 2.2609460335092107e-05, + "loss": 0.9116, + "step": 11936 + }, + { + "epoch": 0.14, + "learning_rate": 2.2604831991113583e-05, + "loss": 0.0264, + "step": 11938 + }, + { + "epoch": 0.14, + "learning_rate": 2.2600203647135058e-05, + "loss": 6.5916, + "step": 11940 + }, + { + "epoch": 0.14, + "learning_rate": 2.2595575303156533e-05, + "loss": 4.521, + "step": 11942 + }, + { + "epoch": 0.14, + "learning_rate": 2.259094695917801e-05, + "loss": 0.9365, + "step": 11944 + }, + { + "epoch": 0.14, + "learning_rate": 2.2586318615199484e-05, + "loss": 3.0691, + "step": 11946 + }, + { + "epoch": 0.14, + "learning_rate": 2.258169027122096e-05, + "loss": 0.3633, + "step": 11948 + }, + { + "epoch": 0.14, + "learning_rate": 2.2577061927242435e-05, + "loss": 1.173, + "step": 11950 + }, + { + "epoch": 0.14, + "learning_rate": 2.257243358326391e-05, + "loss": 0.7139, + "step": 11952 + }, + { + "epoch": 0.14, + "learning_rate": 2.2567805239285386e-05, + "loss": 0.0773, + "step": 11954 + }, + { + "epoch": 0.14, + "learning_rate": 2.2563176895306858e-05, + "loss": 6.4459, + "step": 11956 + }, + { + "epoch": 0.14, + "learning_rate": 2.2558548551328333e-05, + "loss": 0.0249, + "step": 11958 + }, + { + "epoch": 0.14, + "learning_rate": 2.255392020734981e-05, + "loss": 0.2342, + "step": 11960 + }, + { + "epoch": 0.14, + "learning_rate": 2.2549291863371284e-05, + "loss": 0.5842, + "step": 11962 + }, + { + "epoch": 0.14, + "learning_rate": 2.254466351939276e-05, + "loss": 1.543, + "step": 11964 + }, + { + "epoch": 0.14, + "learning_rate": 2.254003517541424e-05, + "loss": 0.115, + "step": 11966 + }, + { + "epoch": 0.14, + "learning_rate": 2.2535406831435714e-05, + "loss": 2.0399, + "step": 11968 + }, + { + "epoch": 0.14, + "learning_rate": 2.253077848745719e-05, + "loss": 0.1222, + "step": 11970 + }, + { + "epoch": 0.14, + "learning_rate": 2.2526150143478665e-05, + "loss": 0.0018, + "step": 11972 + }, + { + "epoch": 0.14, + "learning_rate": 2.252152179950014e-05, + "loss": 0.1606, + "step": 11974 + }, + { + "epoch": 0.14, + "learning_rate": 2.2516893455521616e-05, + "loss": 0.0932, + "step": 11976 + }, + { + "epoch": 0.14, + "learning_rate": 2.251226511154309e-05, + "loss": 2.6749, + "step": 11978 + }, + { + "epoch": 0.14, + "learning_rate": 2.2507636767564566e-05, + "loss": 0.8746, + "step": 11980 + }, + { + "epoch": 0.14, + "learning_rate": 2.2503008423586042e-05, + "loss": 1.6348, + "step": 11982 + }, + { + "epoch": 0.14, + "learning_rate": 2.2498380079607517e-05, + "loss": 4.1786, + "step": 11984 + }, + { + "epoch": 0.14, + "learning_rate": 2.2493751735628993e-05, + "loss": 5.1557, + "step": 11986 + }, + { + "epoch": 0.14, + "learning_rate": 2.2489123391650468e-05, + "loss": 0.6608, + "step": 11988 + }, + { + "epoch": 0.14, + "learning_rate": 2.2484495047671944e-05, + "loss": 4.0326, + "step": 11990 + }, + { + "epoch": 0.14, + "learning_rate": 2.247986670369342e-05, + "loss": 5.0737, + "step": 11992 + }, + { + "epoch": 0.14, + "learning_rate": 2.2475238359714894e-05, + "loss": 0.0045, + "step": 11994 + }, + { + "epoch": 0.14, + "learning_rate": 2.247061001573637e-05, + "loss": 0.1323, + "step": 11996 + }, + { + "epoch": 0.14, + "learning_rate": 2.2465981671757845e-05, + "loss": 5.4044, + "step": 11998 + }, + { + "epoch": 0.14, + "learning_rate": 2.246135332777932e-05, + "loss": 0.8646, + "step": 12000 + }, + { + "epoch": 0.14, + "learning_rate": 2.2456724983800796e-05, + "loss": 3.5455, + "step": 12002 + }, + { + "epoch": 0.14, + "learning_rate": 2.245209663982227e-05, + "loss": 2.9853, + "step": 12004 + }, + { + "epoch": 0.14, + "learning_rate": 2.244746829584375e-05, + "loss": 3.3614, + "step": 12006 + }, + { + "epoch": 0.14, + "learning_rate": 2.2442839951865226e-05, + "loss": 4.2385, + "step": 12008 + }, + { + "epoch": 0.14, + "learning_rate": 2.24382116078867e-05, + "loss": 0.2548, + "step": 12010 + }, + { + "epoch": 0.14, + "learning_rate": 2.2433583263908177e-05, + "loss": 3.9689, + "step": 12012 + }, + { + "epoch": 0.14, + "learning_rate": 2.2428954919929652e-05, + "loss": 1.7411, + "step": 12014 + }, + { + "epoch": 0.14, + "learning_rate": 2.2424326575951127e-05, + "loss": 4.1931, + "step": 12016 + }, + { + "epoch": 0.14, + "learning_rate": 2.2419698231972603e-05, + "loss": 2.3021, + "step": 12018 + }, + { + "epoch": 0.14, + "learning_rate": 2.241506988799408e-05, + "loss": 2.5676, + "step": 12020 + }, + { + "epoch": 0.14, + "learning_rate": 2.2410441544015554e-05, + "loss": 2.5231, + "step": 12022 + }, + { + "epoch": 0.14, + "learning_rate": 2.240581320003703e-05, + "loss": 0.963, + "step": 12024 + }, + { + "epoch": 0.14, + "learning_rate": 2.2401184856058505e-05, + "loss": 2.9893, + "step": 12026 + }, + { + "epoch": 0.14, + "learning_rate": 2.2396556512079977e-05, + "loss": 3.1346, + "step": 12028 + }, + { + "epoch": 0.14, + "learning_rate": 2.2391928168101452e-05, + "loss": 0.734, + "step": 12030 + }, + { + "epoch": 0.14, + "learning_rate": 2.2387299824122927e-05, + "loss": 0.3818, + "step": 12032 + }, + { + "epoch": 0.14, + "learning_rate": 2.2382671480144403e-05, + "loss": 0.7746, + "step": 12034 + }, + { + "epoch": 0.14, + "learning_rate": 2.237804313616588e-05, + "loss": 1.9022, + "step": 12036 + }, + { + "epoch": 0.14, + "learning_rate": 2.2373414792187357e-05, + "loss": 0.9363, + "step": 12038 + }, + { + "epoch": 0.14, + "learning_rate": 2.2368786448208833e-05, + "loss": 6.3397, + "step": 12040 + }, + { + "epoch": 0.14, + "learning_rate": 2.2364158104230308e-05, + "loss": 0.6541, + "step": 12042 + }, + { + "epoch": 0.14, + "learning_rate": 2.2359529760251783e-05, + "loss": 0.0231, + "step": 12044 + }, + { + "epoch": 0.14, + "learning_rate": 2.235490141627326e-05, + "loss": 1.9023, + "step": 12046 + }, + { + "epoch": 0.14, + "learning_rate": 2.2350273072294734e-05, + "loss": 0.6167, + "step": 12048 + }, + { + "epoch": 0.14, + "learning_rate": 2.234564472831621e-05, + "loss": 3.7969, + "step": 12050 + }, + { + "epoch": 0.14, + "learning_rate": 2.2341016384337685e-05, + "loss": 1.2143, + "step": 12052 + }, + { + "epoch": 0.14, + "learning_rate": 2.233638804035916e-05, + "loss": 1.7819, + "step": 12054 + }, + { + "epoch": 0.14, + "learning_rate": 2.2331759696380636e-05, + "loss": 1.2891, + "step": 12056 + }, + { + "epoch": 0.14, + "learning_rate": 2.232713135240211e-05, + "loss": 0.0008, + "step": 12058 + }, + { + "epoch": 0.14, + "learning_rate": 2.2322503008423587e-05, + "loss": 0.9438, + "step": 12060 + }, + { + "epoch": 0.14, + "learning_rate": 2.2317874664445062e-05, + "loss": 0.3625, + "step": 12062 + }, + { + "epoch": 0.14, + "learning_rate": 2.2313246320466538e-05, + "loss": 1.334, + "step": 12064 + }, + { + "epoch": 0.14, + "learning_rate": 2.2308617976488013e-05, + "loss": 1.6971, + "step": 12066 + }, + { + "epoch": 0.14, + "learning_rate": 2.230398963250949e-05, + "loss": 6.2369, + "step": 12068 + }, + { + "epoch": 0.14, + "learning_rate": 2.2299361288530964e-05, + "loss": 4.2267, + "step": 12070 + }, + { + "epoch": 0.14, + "learning_rate": 2.229473294455244e-05, + "loss": 4.8855, + "step": 12072 + }, + { + "epoch": 0.14, + "learning_rate": 2.2290104600573915e-05, + "loss": 1.0188, + "step": 12074 + }, + { + "epoch": 0.14, + "learning_rate": 2.228547625659539e-05, + "loss": 0.0299, + "step": 12076 + }, + { + "epoch": 0.14, + "learning_rate": 2.2280847912616866e-05, + "loss": 0.0009, + "step": 12078 + }, + { + "epoch": 0.14, + "learning_rate": 2.2276219568638344e-05, + "loss": 2.502, + "step": 12080 + }, + { + "epoch": 0.14, + "learning_rate": 2.227159122465982e-05, + "loss": 1.8609, + "step": 12082 + }, + { + "epoch": 0.14, + "learning_rate": 2.2266962880681295e-05, + "loss": 0.9015, + "step": 12084 + }, + { + "epoch": 0.14, + "learning_rate": 2.226233453670277e-05, + "loss": 2.3432, + "step": 12086 + }, + { + "epoch": 0.14, + "learning_rate": 2.2257706192724246e-05, + "loss": 2.0292, + "step": 12088 + }, + { + "epoch": 0.14, + "learning_rate": 2.225307784874572e-05, + "loss": 0.0017, + "step": 12090 + }, + { + "epoch": 0.14, + "learning_rate": 2.2248449504767197e-05, + "loss": 0.6692, + "step": 12092 + }, + { + "epoch": 0.14, + "learning_rate": 2.2243821160788672e-05, + "loss": 0.0007, + "step": 12094 + }, + { + "epoch": 0.14, + "learning_rate": 2.2239192816810148e-05, + "loss": 4.3866, + "step": 12096 + }, + { + "epoch": 0.14, + "learning_rate": 2.2234564472831623e-05, + "loss": 0.4791, + "step": 12098 + }, + { + "epoch": 0.14, + "learning_rate": 2.2229936128853095e-05, + "loss": 3.2548, + "step": 12100 + }, + { + "epoch": 0.14, + "learning_rate": 2.222530778487457e-05, + "loss": 3.4736, + "step": 12102 + }, + { + "epoch": 0.14, + "learning_rate": 2.2220679440896046e-05, + "loss": 0.0005, + "step": 12104 + }, + { + "epoch": 0.14, + "learning_rate": 2.221605109691752e-05, + "loss": 1.9351, + "step": 12106 + }, + { + "epoch": 0.14, + "learning_rate": 2.2211422752938997e-05, + "loss": 1.5958, + "step": 12108 + }, + { + "epoch": 0.14, + "learning_rate": 2.2206794408960472e-05, + "loss": 2.1569, + "step": 12110 + }, + { + "epoch": 0.14, + "learning_rate": 2.220216606498195e-05, + "loss": 2.6058, + "step": 12112 + }, + { + "epoch": 0.14, + "learning_rate": 2.2197537721003427e-05, + "loss": 3.3552, + "step": 12114 + }, + { + "epoch": 0.14, + "learning_rate": 2.2192909377024902e-05, + "loss": 2.2583, + "step": 12116 + }, + { + "epoch": 0.14, + "learning_rate": 2.2188281033046378e-05, + "loss": 2.1984, + "step": 12118 + }, + { + "epoch": 0.14, + "learning_rate": 2.2183652689067853e-05, + "loss": 0.004, + "step": 12120 + }, + { + "epoch": 0.14, + "learning_rate": 2.217902434508933e-05, + "loss": 0.9704, + "step": 12122 + }, + { + "epoch": 0.14, + "learning_rate": 2.2174396001110804e-05, + "loss": 1.2457, + "step": 12124 + }, + { + "epoch": 0.14, + "learning_rate": 2.216976765713228e-05, + "loss": 0.5382, + "step": 12126 + }, + { + "epoch": 0.14, + "learning_rate": 2.2165139313153755e-05, + "loss": 5.6551, + "step": 12128 + }, + { + "epoch": 0.14, + "learning_rate": 2.216051096917523e-05, + "loss": 0.4806, + "step": 12130 + }, + { + "epoch": 0.14, + "learning_rate": 2.2155882625196706e-05, + "loss": 0.0005, + "step": 12132 + }, + { + "epoch": 0.14, + "learning_rate": 2.215125428121818e-05, + "loss": 2.9863, + "step": 12134 + }, + { + "epoch": 0.14, + "learning_rate": 2.2146625937239656e-05, + "loss": 1.9234, + "step": 12136 + }, + { + "epoch": 0.14, + "learning_rate": 2.2141997593261132e-05, + "loss": 5.055, + "step": 12138 + }, + { + "epoch": 0.14, + "learning_rate": 2.2137369249282607e-05, + "loss": 1.0453, + "step": 12140 + }, + { + "epoch": 0.14, + "learning_rate": 2.2132740905304083e-05, + "loss": 1.0144, + "step": 12142 + }, + { + "epoch": 0.14, + "learning_rate": 2.2128112561325558e-05, + "loss": 0.1294, + "step": 12144 + }, + { + "epoch": 0.14, + "learning_rate": 2.2123484217347033e-05, + "loss": 2.2321, + "step": 12146 + }, + { + "epoch": 0.14, + "learning_rate": 2.211885587336851e-05, + "loss": 1.4939, + "step": 12148 + }, + { + "epoch": 0.14, + "learning_rate": 2.2114227529389984e-05, + "loss": 8.8338, + "step": 12150 + }, + { + "epoch": 0.14, + "learning_rate": 2.2109599185411463e-05, + "loss": 2.9383, + "step": 12152 + }, + { + "epoch": 0.14, + "learning_rate": 2.210497084143294e-05, + "loss": 1.5335, + "step": 12154 + }, + { + "epoch": 0.14, + "learning_rate": 2.2100342497454414e-05, + "loss": 0.0331, + "step": 12156 + }, + { + "epoch": 0.14, + "learning_rate": 2.209571415347589e-05, + "loss": 0.9013, + "step": 12158 + }, + { + "epoch": 0.14, + "learning_rate": 2.2091085809497365e-05, + "loss": 3.6362, + "step": 12160 + }, + { + "epoch": 0.14, + "learning_rate": 2.208645746551884e-05, + "loss": 1.8841, + "step": 12162 + }, + { + "epoch": 0.14, + "learning_rate": 2.2081829121540316e-05, + "loss": 3.9153, + "step": 12164 + }, + { + "epoch": 0.14, + "learning_rate": 2.207720077756179e-05, + "loss": 3.5298, + "step": 12166 + }, + { + "epoch": 0.14, + "learning_rate": 2.2072572433583267e-05, + "loss": 0.2455, + "step": 12168 + }, + { + "epoch": 0.14, + "learning_rate": 2.2067944089604742e-05, + "loss": 5.2964, + "step": 12170 + }, + { + "epoch": 0.14, + "learning_rate": 2.2063315745626214e-05, + "loss": 3.1358, + "step": 12172 + }, + { + "epoch": 0.14, + "learning_rate": 2.205868740164769e-05, + "loss": 1.8038, + "step": 12174 + }, + { + "epoch": 0.14, + "learning_rate": 2.2054059057669165e-05, + "loss": 2.3105, + "step": 12176 + }, + { + "epoch": 0.14, + "learning_rate": 2.204943071369064e-05, + "loss": 1.283, + "step": 12178 + }, + { + "epoch": 0.14, + "learning_rate": 2.2044802369712116e-05, + "loss": 0.8064, + "step": 12180 + }, + { + "epoch": 0.14, + "learning_rate": 2.204017402573359e-05, + "loss": 2.1813, + "step": 12182 + }, + { + "epoch": 0.14, + "learning_rate": 2.2035545681755067e-05, + "loss": 4.4802, + "step": 12184 + }, + { + "epoch": 0.14, + "learning_rate": 2.2030917337776545e-05, + "loss": 0.1084, + "step": 12186 + }, + { + "epoch": 0.14, + "learning_rate": 2.202628899379802e-05, + "loss": 0.9921, + "step": 12188 + }, + { + "epoch": 0.14, + "learning_rate": 2.2021660649819496e-05, + "loss": 1.6508, + "step": 12190 + }, + { + "epoch": 0.14, + "learning_rate": 2.201703230584097e-05, + "loss": 1.4724, + "step": 12192 + }, + { + "epoch": 0.14, + "learning_rate": 2.2012403961862447e-05, + "loss": 2.9722, + "step": 12194 + }, + { + "epoch": 0.14, + "learning_rate": 2.2007775617883922e-05, + "loss": 1.4826, + "step": 12196 + }, + { + "epoch": 0.14, + "learning_rate": 2.2003147273905398e-05, + "loss": 0.2834, + "step": 12198 + }, + { + "epoch": 0.14, + "learning_rate": 2.1998518929926873e-05, + "loss": 0.1618, + "step": 12200 + }, + { + "epoch": 0.14, + "learning_rate": 2.199389058594835e-05, + "loss": 5.1766, + "step": 12202 + }, + { + "epoch": 0.14, + "learning_rate": 2.1989262241969824e-05, + "loss": 2.6722, + "step": 12204 + }, + { + "epoch": 0.14, + "learning_rate": 2.19846338979913e-05, + "loss": 0.0054, + "step": 12206 + }, + { + "epoch": 0.14, + "learning_rate": 2.1980005554012775e-05, + "loss": 2.4434, + "step": 12208 + }, + { + "epoch": 0.14, + "learning_rate": 2.197537721003425e-05, + "loss": 2.4976, + "step": 12210 + }, + { + "epoch": 0.14, + "learning_rate": 2.1970748866055726e-05, + "loss": 0.9715, + "step": 12212 + }, + { + "epoch": 0.14, + "learning_rate": 2.19661205220772e-05, + "loss": 3.3726, + "step": 12214 + }, + { + "epoch": 0.14, + "learning_rate": 2.1961492178098677e-05, + "loss": 2.3836, + "step": 12216 + }, + { + "epoch": 0.14, + "learning_rate": 2.1956863834120152e-05, + "loss": 3.7789, + "step": 12218 + }, + { + "epoch": 0.14, + "learning_rate": 2.1952235490141628e-05, + "loss": 4.7102, + "step": 12220 + }, + { + "epoch": 0.14, + "learning_rate": 2.1947607146163103e-05, + "loss": 0.8015, + "step": 12222 + }, + { + "epoch": 0.14, + "learning_rate": 2.194297880218458e-05, + "loss": 0.3311, + "step": 12224 + }, + { + "epoch": 0.14, + "learning_rate": 2.1938350458206057e-05, + "loss": 2.3435, + "step": 12226 + }, + { + "epoch": 0.14, + "learning_rate": 2.1933722114227533e-05, + "loss": 1.0832, + "step": 12228 + }, + { + "epoch": 0.14, + "learning_rate": 2.1929093770249008e-05, + "loss": 3.1573, + "step": 12230 + }, + { + "epoch": 0.14, + "learning_rate": 2.1924465426270484e-05, + "loss": 1.0696, + "step": 12232 + }, + { + "epoch": 0.14, + "learning_rate": 2.191983708229196e-05, + "loss": 0.7533, + "step": 12234 + }, + { + "epoch": 0.14, + "learning_rate": 2.1915208738313434e-05, + "loss": 1.7905, + "step": 12236 + }, + { + "epoch": 0.14, + "learning_rate": 2.191058039433491e-05, + "loss": 1.5091, + "step": 12238 + }, + { + "epoch": 0.14, + "learning_rate": 2.1905952050356385e-05, + "loss": 0.914, + "step": 12240 + }, + { + "epoch": 0.14, + "learning_rate": 2.190132370637786e-05, + "loss": 2.3226, + "step": 12242 + }, + { + "epoch": 0.14, + "learning_rate": 2.1896695362399336e-05, + "loss": 0.0121, + "step": 12244 + }, + { + "epoch": 0.14, + "learning_rate": 2.1892067018420808e-05, + "loss": 0.7989, + "step": 12246 + }, + { + "epoch": 0.14, + "learning_rate": 2.1887438674442284e-05, + "loss": 1.2917, + "step": 12248 + }, + { + "epoch": 0.14, + "learning_rate": 2.188281033046376e-05, + "loss": 2.9295, + "step": 12250 + }, + { + "epoch": 0.14, + "learning_rate": 2.1878181986485234e-05, + "loss": 3.944, + "step": 12252 + }, + { + "epoch": 0.14, + "learning_rate": 2.187355364250671e-05, + "loss": 3.2112, + "step": 12254 + }, + { + "epoch": 0.14, + "learning_rate": 2.1868925298528185e-05, + "loss": 0.0018, + "step": 12256 + }, + { + "epoch": 0.14, + "learning_rate": 2.186429695454966e-05, + "loss": 5.8588, + "step": 12258 + }, + { + "epoch": 0.14, + "learning_rate": 2.185966861057114e-05, + "loss": 3.2808, + "step": 12260 + }, + { + "epoch": 0.14, + "learning_rate": 2.1855040266592615e-05, + "loss": 1.3014, + "step": 12262 + }, + { + "epoch": 0.14, + "learning_rate": 2.185041192261409e-05, + "loss": 3.1334, + "step": 12264 + }, + { + "epoch": 0.14, + "learning_rate": 2.1845783578635566e-05, + "loss": 1.7578, + "step": 12266 + }, + { + "epoch": 0.14, + "learning_rate": 2.184115523465704e-05, + "loss": 0.0008, + "step": 12268 + }, + { + "epoch": 0.14, + "learning_rate": 2.1836526890678517e-05, + "loss": 1.9474, + "step": 12270 + }, + { + "epoch": 0.14, + "learning_rate": 2.1831898546699992e-05, + "loss": 0.0259, + "step": 12272 + }, + { + "epoch": 0.14, + "learning_rate": 2.1827270202721467e-05, + "loss": 0.1273, + "step": 12274 + }, + { + "epoch": 0.14, + "learning_rate": 2.1822641858742943e-05, + "loss": 3.8764, + "step": 12276 + }, + { + "epoch": 0.14, + "learning_rate": 2.1818013514764418e-05, + "loss": 1.2258, + "step": 12278 + }, + { + "epoch": 0.14, + "learning_rate": 2.1813385170785894e-05, + "loss": 0.4149, + "step": 12280 + }, + { + "epoch": 0.14, + "learning_rate": 2.180875682680737e-05, + "loss": 1.7527, + "step": 12282 + }, + { + "epoch": 0.14, + "learning_rate": 2.1804128482828845e-05, + "loss": 0.3528, + "step": 12284 + }, + { + "epoch": 0.14, + "learning_rate": 2.179950013885032e-05, + "loss": 1.575, + "step": 12286 + }, + { + "epoch": 0.14, + "learning_rate": 2.1794871794871795e-05, + "loss": 0.2444, + "step": 12288 + }, + { + "epoch": 0.14, + "learning_rate": 2.179024345089327e-05, + "loss": 3.2597, + "step": 12290 + }, + { + "epoch": 0.14, + "learning_rate": 2.1785615106914746e-05, + "loss": 0.5994, + "step": 12292 + }, + { + "epoch": 0.14, + "learning_rate": 2.178098676293622e-05, + "loss": 4.9125, + "step": 12294 + }, + { + "epoch": 0.14, + "learning_rate": 2.1776358418957697e-05, + "loss": 0.0007, + "step": 12296 + }, + { + "epoch": 0.14, + "learning_rate": 2.1771730074979173e-05, + "loss": 0.0036, + "step": 12298 + }, + { + "epoch": 0.14, + "learning_rate": 2.176710173100065e-05, + "loss": 0.0005, + "step": 12300 + }, + { + "epoch": 0.14, + "learning_rate": 2.1762473387022127e-05, + "loss": 5.7649, + "step": 12302 + }, + { + "epoch": 0.14, + "learning_rate": 2.1757845043043602e-05, + "loss": 3.4341, + "step": 12304 + }, + { + "epoch": 0.14, + "learning_rate": 2.1753216699065078e-05, + "loss": 2.5572, + "step": 12306 + }, + { + "epoch": 0.14, + "learning_rate": 2.1748588355086553e-05, + "loss": 2.3305, + "step": 12308 + }, + { + "epoch": 0.14, + "learning_rate": 2.174396001110803e-05, + "loss": 0.9504, + "step": 12310 + }, + { + "epoch": 0.14, + "learning_rate": 2.1739331667129504e-05, + "loss": 6.5185, + "step": 12312 + }, + { + "epoch": 0.14, + "learning_rate": 2.173470332315098e-05, + "loss": 1.2865, + "step": 12314 + }, + { + "epoch": 0.14, + "learning_rate": 2.1730074979172455e-05, + "loss": 1.8144, + "step": 12316 + }, + { + "epoch": 0.14, + "learning_rate": 2.1725446635193927e-05, + "loss": 1.6653, + "step": 12318 + }, + { + "epoch": 0.14, + "learning_rate": 2.1720818291215402e-05, + "loss": 1.0492, + "step": 12320 + }, + { + "epoch": 0.14, + "learning_rate": 2.1716189947236878e-05, + "loss": 0.0015, + "step": 12322 + }, + { + "epoch": 0.14, + "learning_rate": 2.1711561603258353e-05, + "loss": 1.3316, + "step": 12324 + }, + { + "epoch": 0.14, + "learning_rate": 2.170693325927983e-05, + "loss": 0.2044, + "step": 12326 + }, + { + "epoch": 0.14, + "learning_rate": 2.1702304915301304e-05, + "loss": 0.0038, + "step": 12328 + }, + { + "epoch": 0.14, + "learning_rate": 2.169767657132278e-05, + "loss": 0.5461, + "step": 12330 + }, + { + "epoch": 0.14, + "learning_rate": 2.1693048227344258e-05, + "loss": 0.7605, + "step": 12332 + }, + { + "epoch": 0.14, + "learning_rate": 2.1688419883365734e-05, + "loss": 0.192, + "step": 12334 + }, + { + "epoch": 0.14, + "learning_rate": 2.168379153938721e-05, + "loss": 0.6901, + "step": 12336 + }, + { + "epoch": 0.14, + "learning_rate": 2.1679163195408684e-05, + "loss": 0.0063, + "step": 12338 + }, + { + "epoch": 0.14, + "learning_rate": 2.167453485143016e-05, + "loss": 1.4803, + "step": 12340 + }, + { + "epoch": 0.14, + "learning_rate": 2.1669906507451635e-05, + "loss": 0.6461, + "step": 12342 + }, + { + "epoch": 0.14, + "learning_rate": 2.166527816347311e-05, + "loss": 0.0063, + "step": 12344 + }, + { + "epoch": 0.14, + "learning_rate": 2.1660649819494586e-05, + "loss": 10.8623, + "step": 12346 + }, + { + "epoch": 0.14, + "learning_rate": 2.165602147551606e-05, + "loss": 1.2709, + "step": 12348 + }, + { + "epoch": 0.14, + "learning_rate": 2.1651393131537537e-05, + "loss": 0.1504, + "step": 12350 + }, + { + "epoch": 0.14, + "learning_rate": 2.1646764787559012e-05, + "loss": 0.0179, + "step": 12352 + }, + { + "epoch": 0.14, + "learning_rate": 2.1642136443580488e-05, + "loss": 1.2711, + "step": 12354 + }, + { + "epoch": 0.14, + "learning_rate": 2.1637508099601963e-05, + "loss": 0.7502, + "step": 12356 + }, + { + "epoch": 0.14, + "learning_rate": 2.163287975562344e-05, + "loss": 2.5909, + "step": 12358 + }, + { + "epoch": 0.14, + "learning_rate": 2.1628251411644914e-05, + "loss": 0.8392, + "step": 12360 + }, + { + "epoch": 0.14, + "learning_rate": 2.162362306766639e-05, + "loss": 0.9376, + "step": 12362 + }, + { + "epoch": 0.14, + "learning_rate": 2.1618994723687865e-05, + "loss": 2.5356, + "step": 12364 + }, + { + "epoch": 0.14, + "learning_rate": 2.161436637970934e-05, + "loss": 0.044, + "step": 12366 + }, + { + "epoch": 0.14, + "learning_rate": 2.1609738035730816e-05, + "loss": 0.7521, + "step": 12368 + }, + { + "epoch": 0.14, + "learning_rate": 2.160510969175229e-05, + "loss": 0.3094, + "step": 12370 + }, + { + "epoch": 0.14, + "learning_rate": 2.1600481347773767e-05, + "loss": 8.6228, + "step": 12372 + }, + { + "epoch": 0.14, + "learning_rate": 2.1595853003795245e-05, + "loss": 3.5834, + "step": 12374 + }, + { + "epoch": 0.14, + "learning_rate": 2.159122465981672e-05, + "loss": 2.2051, + "step": 12376 + }, + { + "epoch": 0.14, + "learning_rate": 2.1586596315838196e-05, + "loss": 3.6073, + "step": 12378 + }, + { + "epoch": 0.14, + "learning_rate": 2.1581967971859672e-05, + "loss": 0.0027, + "step": 12380 + }, + { + "epoch": 0.14, + "learning_rate": 2.1577339627881147e-05, + "loss": 1.0955, + "step": 12382 + }, + { + "epoch": 0.14, + "learning_rate": 2.1572711283902623e-05, + "loss": 2.8374, + "step": 12384 + }, + { + "epoch": 0.14, + "learning_rate": 2.1568082939924098e-05, + "loss": 2.2274, + "step": 12386 + }, + { + "epoch": 0.14, + "learning_rate": 2.1563454595945573e-05, + "loss": 0.0005, + "step": 12388 + }, + { + "epoch": 0.14, + "learning_rate": 2.1558826251967045e-05, + "loss": 1.9176, + "step": 12390 + }, + { + "epoch": 0.14, + "learning_rate": 2.155419790798852e-05, + "loss": 2.9668, + "step": 12392 + }, + { + "epoch": 0.14, + "learning_rate": 2.1549569564009996e-05, + "loss": 4.8629, + "step": 12394 + }, + { + "epoch": 0.14, + "learning_rate": 2.1544941220031472e-05, + "loss": 5.0929, + "step": 12396 + }, + { + "epoch": 0.14, + "learning_rate": 2.1540312876052947e-05, + "loss": 1.4477, + "step": 12398 + }, + { + "epoch": 0.14, + "learning_rate": 2.1535684532074423e-05, + "loss": 1.8518, + "step": 12400 + }, + { + "epoch": 0.14, + "learning_rate": 2.1531056188095898e-05, + "loss": 0.9887, + "step": 12402 + }, + { + "epoch": 0.14, + "learning_rate": 2.1526427844117373e-05, + "loss": 0.3072, + "step": 12404 + }, + { + "epoch": 0.14, + "learning_rate": 2.1521799500138852e-05, + "loss": 3.3984, + "step": 12406 + }, + { + "epoch": 0.14, + "learning_rate": 2.1517171156160328e-05, + "loss": 1.8133, + "step": 12408 + }, + { + "epoch": 0.14, + "learning_rate": 2.1512542812181803e-05, + "loss": 1.4735, + "step": 12410 + }, + { + "epoch": 0.14, + "learning_rate": 2.150791446820328e-05, + "loss": 1.2587, + "step": 12412 + }, + { + "epoch": 0.14, + "learning_rate": 2.1503286124224754e-05, + "loss": 2.3244, + "step": 12414 + }, + { + "epoch": 0.14, + "learning_rate": 2.149865778024623e-05, + "loss": 0.1937, + "step": 12416 + }, + { + "epoch": 0.14, + "learning_rate": 2.1494029436267705e-05, + "loss": 3.2552, + "step": 12418 + }, + { + "epoch": 0.14, + "learning_rate": 2.148940109228918e-05, + "loss": 3.808, + "step": 12420 + }, + { + "epoch": 0.14, + "learning_rate": 2.1484772748310656e-05, + "loss": 0.9016, + "step": 12422 + }, + { + "epoch": 0.14, + "learning_rate": 2.148014440433213e-05, + "loss": 4.1214, + "step": 12424 + }, + { + "epoch": 0.14, + "learning_rate": 2.1475516060353606e-05, + "loss": 0.0313, + "step": 12426 + }, + { + "epoch": 0.14, + "learning_rate": 2.1470887716375082e-05, + "loss": 2.4133, + "step": 12428 + }, + { + "epoch": 0.14, + "learning_rate": 2.1466259372396557e-05, + "loss": 1.5587, + "step": 12430 + }, + { + "epoch": 0.14, + "learning_rate": 2.1461631028418033e-05, + "loss": 0.0006, + "step": 12432 + }, + { + "epoch": 0.14, + "learning_rate": 2.1457002684439508e-05, + "loss": 0.0472, + "step": 12434 + }, + { + "epoch": 0.14, + "learning_rate": 2.1452374340460984e-05, + "loss": 2.6021, + "step": 12436 + }, + { + "epoch": 0.14, + "learning_rate": 2.144774599648246e-05, + "loss": 1.9521, + "step": 12438 + }, + { + "epoch": 0.14, + "learning_rate": 2.1443117652503934e-05, + "loss": 3.805, + "step": 12440 + }, + { + "epoch": 0.14, + "learning_rate": 2.143848930852541e-05, + "loss": 0.8187, + "step": 12442 + }, + { + "epoch": 0.14, + "learning_rate": 2.1433860964546885e-05, + "loss": 0.7968, + "step": 12444 + }, + { + "epoch": 0.14, + "learning_rate": 2.1429232620568364e-05, + "loss": 2.4827, + "step": 12446 + }, + { + "epoch": 0.14, + "learning_rate": 2.142460427658984e-05, + "loss": 1.9531, + "step": 12448 + }, + { + "epoch": 0.14, + "learning_rate": 2.1419975932611315e-05, + "loss": 2.2906, + "step": 12450 + }, + { + "epoch": 0.14, + "learning_rate": 2.141534758863279e-05, + "loss": 0.0272, + "step": 12452 + }, + { + "epoch": 0.14, + "learning_rate": 2.1410719244654266e-05, + "loss": 0.7308, + "step": 12454 + }, + { + "epoch": 0.14, + "learning_rate": 2.140609090067574e-05, + "loss": 6.2819, + "step": 12456 + }, + { + "epoch": 0.14, + "learning_rate": 2.1401462556697217e-05, + "loss": 4.7064, + "step": 12458 + }, + { + "epoch": 0.14, + "learning_rate": 2.1396834212718692e-05, + "loss": 3.9976, + "step": 12460 + }, + { + "epoch": 0.14, + "learning_rate": 2.1392205868740164e-05, + "loss": 0.4805, + "step": 12462 + }, + { + "epoch": 0.14, + "learning_rate": 2.138757752476164e-05, + "loss": 1.328, + "step": 12464 + }, + { + "epoch": 0.14, + "learning_rate": 2.1382949180783115e-05, + "loss": 0.0034, + "step": 12466 + }, + { + "epoch": 0.14, + "learning_rate": 2.137832083680459e-05, + "loss": 0.0015, + "step": 12468 + }, + { + "epoch": 0.14, + "learning_rate": 2.1373692492826066e-05, + "loss": 5.5032, + "step": 12470 + }, + { + "epoch": 0.14, + "learning_rate": 2.136906414884754e-05, + "loss": 0.2279, + "step": 12472 + }, + { + "epoch": 0.14, + "learning_rate": 2.1364435804869017e-05, + "loss": 3.3994, + "step": 12474 + }, + { + "epoch": 0.14, + "learning_rate": 2.1359807460890492e-05, + "loss": 4.1042, + "step": 12476 + }, + { + "epoch": 0.14, + "learning_rate": 2.1355179116911968e-05, + "loss": 0.4884, + "step": 12478 + }, + { + "epoch": 0.14, + "learning_rate": 2.1350550772933446e-05, + "loss": 2.0963, + "step": 12480 + }, + { + "epoch": 0.14, + "learning_rate": 2.1345922428954922e-05, + "loss": 3.9096, + "step": 12482 + }, + { + "epoch": 0.14, + "learning_rate": 2.1341294084976397e-05, + "loss": 0.9575, + "step": 12484 + }, + { + "epoch": 0.14, + "learning_rate": 2.1336665740997873e-05, + "loss": 1.9021, + "step": 12486 + }, + { + "epoch": 0.14, + "learning_rate": 2.1332037397019348e-05, + "loss": 0.0673, + "step": 12488 + }, + { + "epoch": 0.14, + "learning_rate": 2.1327409053040823e-05, + "loss": 0.0168, + "step": 12490 + }, + { + "epoch": 0.14, + "learning_rate": 2.13227807090623e-05, + "loss": 0.5293, + "step": 12492 + }, + { + "epoch": 0.14, + "learning_rate": 2.1318152365083774e-05, + "loss": 0.5195, + "step": 12494 + }, + { + "epoch": 0.14, + "learning_rate": 2.131352402110525e-05, + "loss": 5.101, + "step": 12496 + }, + { + "epoch": 0.14, + "learning_rate": 2.1308895677126725e-05, + "loss": 0.7056, + "step": 12498 + }, + { + "epoch": 0.14, + "learning_rate": 2.13042673331482e-05, + "loss": 1.975, + "step": 12500 + }, + { + "epoch": 0.14, + "learning_rate": 2.1299638989169676e-05, + "loss": 1.7308, + "step": 12502 + }, + { + "epoch": 0.14, + "learning_rate": 2.129501064519115e-05, + "loss": 0.001, + "step": 12504 + }, + { + "epoch": 0.14, + "learning_rate": 2.1290382301212627e-05, + "loss": 0.5795, + "step": 12506 + }, + { + "epoch": 0.14, + "learning_rate": 2.1285753957234102e-05, + "loss": 6.7808, + "step": 12508 + }, + { + "epoch": 0.14, + "learning_rate": 2.1281125613255578e-05, + "loss": 0.737, + "step": 12510 + }, + { + "epoch": 0.14, + "learning_rate": 2.1276497269277053e-05, + "loss": 2.0307, + "step": 12512 + }, + { + "epoch": 0.14, + "learning_rate": 2.127186892529853e-05, + "loss": 1.8205, + "step": 12514 + }, + { + "epoch": 0.14, + "learning_rate": 2.1267240581320004e-05, + "loss": 7.3931, + "step": 12516 + }, + { + "epoch": 0.14, + "learning_rate": 2.126261223734148e-05, + "loss": 5.2621, + "step": 12518 + }, + { + "epoch": 0.14, + "learning_rate": 2.1257983893362958e-05, + "loss": 4.605, + "step": 12520 + }, + { + "epoch": 0.14, + "learning_rate": 2.1253355549384434e-05, + "loss": 0.0411, + "step": 12522 + }, + { + "epoch": 0.14, + "learning_rate": 2.124872720540591e-05, + "loss": 3.6925, + "step": 12524 + }, + { + "epoch": 0.14, + "learning_rate": 2.1244098861427384e-05, + "loss": 0.0049, + "step": 12526 + }, + { + "epoch": 0.14, + "learning_rate": 2.123947051744886e-05, + "loss": 1.5443, + "step": 12528 + }, + { + "epoch": 0.14, + "learning_rate": 2.1234842173470335e-05, + "loss": 0.4193, + "step": 12530 + }, + { + "epoch": 0.14, + "learning_rate": 2.123021382949181e-05, + "loss": 0.619, + "step": 12532 + }, + { + "epoch": 0.14, + "learning_rate": 2.1225585485513286e-05, + "loss": 0.002, + "step": 12534 + }, + { + "epoch": 0.14, + "learning_rate": 2.1220957141534758e-05, + "loss": 0.0042, + "step": 12536 + }, + { + "epoch": 0.14, + "learning_rate": 2.1216328797556234e-05, + "loss": 2.2486, + "step": 12538 + }, + { + "epoch": 0.14, + "learning_rate": 2.121170045357771e-05, + "loss": 0.2441, + "step": 12540 + }, + { + "epoch": 0.14, + "learning_rate": 2.1207072109599184e-05, + "loss": 0.0034, + "step": 12542 + }, + { + "epoch": 0.14, + "learning_rate": 2.120244376562066e-05, + "loss": 0.5065, + "step": 12544 + }, + { + "epoch": 0.14, + "learning_rate": 2.1197815421642135e-05, + "loss": 1.5904, + "step": 12546 + }, + { + "epoch": 0.14, + "learning_rate": 2.119318707766361e-05, + "loss": 0.0006, + "step": 12548 + }, + { + "epoch": 0.14, + "learning_rate": 2.1188558733685086e-05, + "loss": 0.0007, + "step": 12550 + }, + { + "epoch": 0.14, + "learning_rate": 2.118393038970656e-05, + "loss": 3.5764, + "step": 12552 + }, + { + "epoch": 0.14, + "learning_rate": 2.117930204572804e-05, + "loss": 0.0056, + "step": 12554 + }, + { + "epoch": 0.14, + "learning_rate": 2.1174673701749516e-05, + "loss": 0.0141, + "step": 12556 + }, + { + "epoch": 0.14, + "learning_rate": 2.117004535777099e-05, + "loss": 0.7065, + "step": 12558 + }, + { + "epoch": 0.14, + "learning_rate": 2.1165417013792467e-05, + "loss": 0.4386, + "step": 12560 + }, + { + "epoch": 0.14, + "learning_rate": 2.1160788669813942e-05, + "loss": 0.0003, + "step": 12562 + }, + { + "epoch": 0.14, + "learning_rate": 2.1156160325835418e-05, + "loss": 0.0003, + "step": 12564 + }, + { + "epoch": 0.14, + "learning_rate": 2.1151531981856893e-05, + "loss": 3.5506, + "step": 12566 + }, + { + "epoch": 0.14, + "learning_rate": 2.114690363787837e-05, + "loss": 0.0942, + "step": 12568 + }, + { + "epoch": 0.14, + "learning_rate": 2.1142275293899844e-05, + "loss": 2.3554, + "step": 12570 + }, + { + "epoch": 0.14, + "learning_rate": 2.113764694992132e-05, + "loss": 0.009, + "step": 12572 + }, + { + "epoch": 0.14, + "learning_rate": 2.1133018605942795e-05, + "loss": 0.9075, + "step": 12574 + }, + { + "epoch": 0.14, + "learning_rate": 2.112839026196427e-05, + "loss": 0.0004, + "step": 12576 + }, + { + "epoch": 0.14, + "learning_rate": 2.1123761917985746e-05, + "loss": 3.1724, + "step": 12578 + }, + { + "epoch": 0.14, + "learning_rate": 2.111913357400722e-05, + "loss": 2.4001, + "step": 12580 + }, + { + "epoch": 0.14, + "learning_rate": 2.1114505230028696e-05, + "loss": 3.1454, + "step": 12582 + }, + { + "epoch": 0.14, + "learning_rate": 2.1109876886050172e-05, + "loss": 5.189, + "step": 12584 + }, + { + "epoch": 0.14, + "learning_rate": 2.1105248542071647e-05, + "loss": 1.067, + "step": 12586 + }, + { + "epoch": 0.14, + "learning_rate": 2.1100620198093123e-05, + "loss": 2.3112, + "step": 12588 + }, + { + "epoch": 0.15, + "learning_rate": 2.1095991854114598e-05, + "loss": 4.5257, + "step": 12590 + }, + { + "epoch": 0.15, + "learning_rate": 2.1091363510136073e-05, + "loss": 2.5536, + "step": 12592 + }, + { + "epoch": 0.15, + "learning_rate": 2.1086735166157552e-05, + "loss": 3.8379, + "step": 12594 + }, + { + "epoch": 0.15, + "learning_rate": 2.1082106822179028e-05, + "loss": 0.3602, + "step": 12596 + }, + { + "epoch": 0.15, + "learning_rate": 2.1077478478200503e-05, + "loss": 0.0007, + "step": 12598 + }, + { + "epoch": 0.15, + "learning_rate": 2.107285013422198e-05, + "loss": 0.8321, + "step": 12600 + }, + { + "epoch": 0.15, + "learning_rate": 2.1068221790243454e-05, + "loss": 0.0004, + "step": 12602 + }, + { + "epoch": 0.15, + "learning_rate": 2.106359344626493e-05, + "loss": 3.036, + "step": 12604 + }, + { + "epoch": 0.15, + "learning_rate": 2.1058965102286405e-05, + "loss": 0.3945, + "step": 12606 + }, + { + "epoch": 0.15, + "learning_rate": 2.1054336758307877e-05, + "loss": 0.7987, + "step": 12608 + }, + { + "epoch": 0.15, + "learning_rate": 2.1049708414329352e-05, + "loss": 3.4054, + "step": 12610 + }, + { + "epoch": 0.15, + "learning_rate": 2.1045080070350828e-05, + "loss": 2.5751, + "step": 12612 + }, + { + "epoch": 0.15, + "learning_rate": 2.1040451726372303e-05, + "loss": 3.7509, + "step": 12614 + }, + { + "epoch": 0.15, + "learning_rate": 2.103582338239378e-05, + "loss": 1.5793, + "step": 12616 + }, + { + "epoch": 0.15, + "learning_rate": 2.1031195038415254e-05, + "loss": 2.4525, + "step": 12618 + }, + { + "epoch": 0.15, + "learning_rate": 2.102656669443673e-05, + "loss": 4.0913, + "step": 12620 + }, + { + "epoch": 0.15, + "learning_rate": 2.1021938350458205e-05, + "loss": 2.4787, + "step": 12622 + }, + { + "epoch": 0.15, + "learning_rate": 2.101731000647968e-05, + "loss": 0.036, + "step": 12624 + }, + { + "epoch": 0.15, + "learning_rate": 2.101268166250116e-05, + "loss": 0.0006, + "step": 12626 + }, + { + "epoch": 0.15, + "learning_rate": 2.1008053318522635e-05, + "loss": 1.1443, + "step": 12628 + }, + { + "epoch": 0.15, + "learning_rate": 2.100342497454411e-05, + "loss": 1.4905, + "step": 12630 + }, + { + "epoch": 0.15, + "learning_rate": 2.0998796630565585e-05, + "loss": 0.1112, + "step": 12632 + }, + { + "epoch": 0.15, + "learning_rate": 2.099416828658706e-05, + "loss": 5.1063, + "step": 12634 + }, + { + "epoch": 0.15, + "learning_rate": 2.0989539942608536e-05, + "loss": 0.6458, + "step": 12636 + }, + { + "epoch": 0.15, + "learning_rate": 2.098491159863001e-05, + "loss": 2.1376, + "step": 12638 + }, + { + "epoch": 0.15, + "learning_rate": 2.0980283254651487e-05, + "loss": 2.3098, + "step": 12640 + }, + { + "epoch": 0.15, + "learning_rate": 2.0975654910672962e-05, + "loss": 2.2796, + "step": 12642 + }, + { + "epoch": 0.15, + "learning_rate": 2.0971026566694438e-05, + "loss": 1.2828, + "step": 12644 + }, + { + "epoch": 0.15, + "learning_rate": 2.0966398222715913e-05, + "loss": 1.5862, + "step": 12646 + }, + { + "epoch": 0.15, + "learning_rate": 2.096176987873739e-05, + "loss": 0.5897, + "step": 12648 + }, + { + "epoch": 0.15, + "learning_rate": 2.0957141534758864e-05, + "loss": 9.0643, + "step": 12650 + }, + { + "epoch": 0.15, + "learning_rate": 2.095251319078034e-05, + "loss": 2.4861, + "step": 12652 + }, + { + "epoch": 0.15, + "learning_rate": 2.0947884846801815e-05, + "loss": 0.0003, + "step": 12654 + }, + { + "epoch": 0.15, + "learning_rate": 2.094325650282329e-05, + "loss": 7.8567, + "step": 12656 + }, + { + "epoch": 0.15, + "learning_rate": 2.0938628158844766e-05, + "loss": 0.9355, + "step": 12658 + }, + { + "epoch": 0.15, + "learning_rate": 2.093399981486624e-05, + "loss": 0.7322, + "step": 12660 + }, + { + "epoch": 0.15, + "learning_rate": 2.0929371470887717e-05, + "loss": 1.5692, + "step": 12662 + }, + { + "epoch": 0.15, + "learning_rate": 2.0924743126909192e-05, + "loss": 0.4464, + "step": 12664 + }, + { + "epoch": 0.15, + "learning_rate": 2.092011478293067e-05, + "loss": 1.1605, + "step": 12666 + }, + { + "epoch": 0.15, + "learning_rate": 2.0915486438952146e-05, + "loss": 0.7542, + "step": 12668 + }, + { + "epoch": 0.15, + "learning_rate": 2.0910858094973622e-05, + "loss": 4.1189, + "step": 12670 + }, + { + "epoch": 0.15, + "learning_rate": 2.0906229750995097e-05, + "loss": 1.3662, + "step": 12672 + }, + { + "epoch": 0.15, + "learning_rate": 2.0901601407016573e-05, + "loss": 5.5894, + "step": 12674 + }, + { + "epoch": 0.15, + "learning_rate": 2.0896973063038048e-05, + "loss": 0.2732, + "step": 12676 + }, + { + "epoch": 0.15, + "learning_rate": 2.0892344719059524e-05, + "loss": 3.548, + "step": 12678 + }, + { + "epoch": 0.15, + "learning_rate": 2.0887716375080996e-05, + "loss": 1.5096, + "step": 12680 + }, + { + "epoch": 0.15, + "learning_rate": 2.088308803110247e-05, + "loss": 0.2838, + "step": 12682 + }, + { + "epoch": 0.15, + "learning_rate": 2.0878459687123946e-05, + "loss": 1.7244, + "step": 12684 + }, + { + "epoch": 0.15, + "learning_rate": 2.0873831343145422e-05, + "loss": 2.6493, + "step": 12686 + }, + { + "epoch": 0.15, + "learning_rate": 2.0869202999166897e-05, + "loss": 2.255, + "step": 12688 + }, + { + "epoch": 0.15, + "learning_rate": 2.0864574655188373e-05, + "loss": 1.7124, + "step": 12690 + }, + { + "epoch": 0.15, + "learning_rate": 2.0859946311209848e-05, + "loss": 0.8541, + "step": 12692 + }, + { + "epoch": 0.15, + "learning_rate": 2.0855317967231324e-05, + "loss": 4.0551, + "step": 12694 + }, + { + "epoch": 0.15, + "learning_rate": 2.08506896232528e-05, + "loss": 2.8005, + "step": 12696 + }, + { + "epoch": 0.15, + "learning_rate": 2.0846061279274274e-05, + "loss": 2.2532, + "step": 12698 + }, + { + "epoch": 0.15, + "learning_rate": 2.0841432935295753e-05, + "loss": 0.0196, + "step": 12700 + }, + { + "epoch": 0.15, + "learning_rate": 2.083680459131723e-05, + "loss": 0.1673, + "step": 12702 + }, + { + "epoch": 0.15, + "learning_rate": 2.0832176247338704e-05, + "loss": 3.9807, + "step": 12704 + }, + { + "epoch": 0.15, + "learning_rate": 2.082754790336018e-05, + "loss": 0.0847, + "step": 12706 + }, + { + "epoch": 0.15, + "learning_rate": 2.0822919559381655e-05, + "loss": 0.4479, + "step": 12708 + }, + { + "epoch": 0.15, + "learning_rate": 2.081829121540313e-05, + "loss": 2.2053, + "step": 12710 + }, + { + "epoch": 0.15, + "learning_rate": 2.0813662871424606e-05, + "loss": 2.7647, + "step": 12712 + }, + { + "epoch": 0.15, + "learning_rate": 2.080903452744608e-05, + "loss": 0.8371, + "step": 12714 + }, + { + "epoch": 0.15, + "learning_rate": 2.0804406183467557e-05, + "loss": 2.5045, + "step": 12716 + }, + { + "epoch": 0.15, + "learning_rate": 2.0799777839489032e-05, + "loss": 1.2528, + "step": 12718 + }, + { + "epoch": 0.15, + "learning_rate": 2.0795149495510507e-05, + "loss": 2.1941, + "step": 12720 + }, + { + "epoch": 0.15, + "learning_rate": 2.0790521151531983e-05, + "loss": 0.214, + "step": 12722 + }, + { + "epoch": 0.15, + "learning_rate": 2.0785892807553458e-05, + "loss": 1.1561, + "step": 12724 + }, + { + "epoch": 0.15, + "learning_rate": 2.0781264463574934e-05, + "loss": 0.8329, + "step": 12726 + }, + { + "epoch": 0.15, + "learning_rate": 2.077663611959641e-05, + "loss": 0.337, + "step": 12728 + }, + { + "epoch": 0.15, + "learning_rate": 2.0772007775617885e-05, + "loss": 1.6052, + "step": 12730 + }, + { + "epoch": 0.15, + "learning_rate": 2.076737943163936e-05, + "loss": 1.4785, + "step": 12732 + }, + { + "epoch": 0.15, + "learning_rate": 2.0762751087660835e-05, + "loss": 0.1332, + "step": 12734 + }, + { + "epoch": 0.15, + "learning_rate": 2.075812274368231e-05, + "loss": 3.4472, + "step": 12736 + }, + { + "epoch": 0.15, + "learning_rate": 2.0753494399703786e-05, + "loss": 1.1413, + "step": 12738 + }, + { + "epoch": 0.15, + "learning_rate": 2.0748866055725265e-05, + "loss": 1.5087, + "step": 12740 + }, + { + "epoch": 0.15, + "learning_rate": 2.074423771174674e-05, + "loss": 1.7845, + "step": 12742 + }, + { + "epoch": 0.15, + "learning_rate": 2.0739609367768216e-05, + "loss": 5.8833, + "step": 12744 + }, + { + "epoch": 0.15, + "learning_rate": 2.073498102378969e-05, + "loss": 0.2313, + "step": 12746 + }, + { + "epoch": 0.15, + "learning_rate": 2.0730352679811167e-05, + "loss": 0.0004, + "step": 12748 + }, + { + "epoch": 0.15, + "learning_rate": 2.0725724335832642e-05, + "loss": 1.6512, + "step": 12750 + }, + { + "epoch": 0.15, + "learning_rate": 2.0721095991854114e-05, + "loss": 2.8994, + "step": 12752 + }, + { + "epoch": 0.15, + "learning_rate": 2.071646764787559e-05, + "loss": 1.3225, + "step": 12754 + }, + { + "epoch": 0.15, + "learning_rate": 2.0711839303897065e-05, + "loss": 3.3233, + "step": 12756 + }, + { + "epoch": 0.15, + "learning_rate": 2.070721095991854e-05, + "loss": 0.049, + "step": 12758 + }, + { + "epoch": 0.15, + "learning_rate": 2.0702582615940016e-05, + "loss": 4.8666, + "step": 12760 + }, + { + "epoch": 0.15, + "learning_rate": 2.069795427196149e-05, + "loss": 0.0006, + "step": 12762 + }, + { + "epoch": 0.15, + "learning_rate": 2.0693325927982967e-05, + "loss": 0.0006, + "step": 12764 + }, + { + "epoch": 0.15, + "learning_rate": 2.0688697584004442e-05, + "loss": 1.4813, + "step": 12766 + }, + { + "epoch": 0.15, + "learning_rate": 2.0684069240025918e-05, + "loss": 6.5309, + "step": 12768 + }, + { + "epoch": 0.15, + "learning_rate": 2.0679440896047393e-05, + "loss": 4.0542, + "step": 12770 + }, + { + "epoch": 0.15, + "learning_rate": 2.067481255206887e-05, + "loss": 1.0355, + "step": 12772 + }, + { + "epoch": 0.15, + "learning_rate": 2.0670184208090347e-05, + "loss": 4.674, + "step": 12774 + }, + { + "epoch": 0.15, + "learning_rate": 2.0665555864111823e-05, + "loss": 4.5592, + "step": 12776 + }, + { + "epoch": 0.15, + "learning_rate": 2.0660927520133298e-05, + "loss": 0.3153, + "step": 12778 + }, + { + "epoch": 0.15, + "learning_rate": 2.0656299176154774e-05, + "loss": 0.1512, + "step": 12780 + }, + { + "epoch": 0.15, + "learning_rate": 2.065167083217625e-05, + "loss": 4.1385, + "step": 12782 + }, + { + "epoch": 0.15, + "learning_rate": 2.0647042488197724e-05, + "loss": 3.406, + "step": 12784 + }, + { + "epoch": 0.15, + "learning_rate": 2.06424141442192e-05, + "loss": 1.9741, + "step": 12786 + }, + { + "epoch": 0.15, + "learning_rate": 2.0637785800240675e-05, + "loss": 7.9059, + "step": 12788 + }, + { + "epoch": 0.15, + "learning_rate": 2.063315745626215e-05, + "loss": 0.0782, + "step": 12790 + }, + { + "epoch": 0.15, + "learning_rate": 2.0628529112283626e-05, + "loss": 0.9353, + "step": 12792 + }, + { + "epoch": 0.15, + "learning_rate": 2.06239007683051e-05, + "loss": 0.0064, + "step": 12794 + }, + { + "epoch": 0.15, + "learning_rate": 2.0619272424326577e-05, + "loss": 1.4788, + "step": 12796 + }, + { + "epoch": 0.15, + "learning_rate": 2.0614644080348052e-05, + "loss": 0.0015, + "step": 12798 + }, + { + "epoch": 0.15, + "learning_rate": 2.0610015736369528e-05, + "loss": 2.7884, + "step": 12800 + }, + { + "epoch": 0.15, + "learning_rate": 2.0605387392391003e-05, + "loss": 0.8954, + "step": 12802 + }, + { + "epoch": 0.15, + "learning_rate": 2.060075904841248e-05, + "loss": 0.9421, + "step": 12804 + }, + { + "epoch": 0.15, + "learning_rate": 2.0596130704433954e-05, + "loss": 0.0039, + "step": 12806 + }, + { + "epoch": 0.15, + "learning_rate": 2.059150236045543e-05, + "loss": 2.3414, + "step": 12808 + }, + { + "epoch": 0.15, + "learning_rate": 2.0586874016476905e-05, + "loss": 0.6735, + "step": 12810 + }, + { + "epoch": 0.15, + "learning_rate": 2.058224567249838e-05, + "loss": 3.2192, + "step": 12812 + }, + { + "epoch": 0.15, + "learning_rate": 2.057761732851986e-05, + "loss": 1.1327, + "step": 12814 + }, + { + "epoch": 0.15, + "learning_rate": 2.0572988984541335e-05, + "loss": 4.704, + "step": 12816 + }, + { + "epoch": 0.15, + "learning_rate": 2.056836064056281e-05, + "loss": 1.83, + "step": 12818 + }, + { + "epoch": 0.15, + "learning_rate": 2.0563732296584285e-05, + "loss": 4.1857, + "step": 12820 + }, + { + "epoch": 0.15, + "learning_rate": 2.055910395260576e-05, + "loss": 1.3008, + "step": 12822 + }, + { + "epoch": 0.15, + "learning_rate": 2.0554475608627236e-05, + "loss": 1.7161, + "step": 12824 + }, + { + "epoch": 0.15, + "learning_rate": 2.054984726464871e-05, + "loss": 2.7708, + "step": 12826 + }, + { + "epoch": 0.15, + "learning_rate": 2.0545218920670184e-05, + "loss": 2.8979, + "step": 12828 + }, + { + "epoch": 0.15, + "learning_rate": 2.054059057669166e-05, + "loss": 1.6352, + "step": 12830 + }, + { + "epoch": 0.15, + "learning_rate": 2.0535962232713135e-05, + "loss": 4.6781, + "step": 12832 + }, + { + "epoch": 0.15, + "learning_rate": 2.053133388873461e-05, + "loss": 0.8877, + "step": 12834 + }, + { + "epoch": 0.15, + "learning_rate": 2.0526705544756085e-05, + "loss": 0.4359, + "step": 12836 + }, + { + "epoch": 0.15, + "learning_rate": 2.052207720077756e-05, + "loss": 3.532, + "step": 12838 + }, + { + "epoch": 0.15, + "learning_rate": 2.0517448856799036e-05, + "loss": 2.4899, + "step": 12840 + }, + { + "epoch": 0.15, + "learning_rate": 2.0512820512820512e-05, + "loss": 0.0106, + "step": 12842 + }, + { + "epoch": 0.15, + "learning_rate": 2.0508192168841987e-05, + "loss": 0.6557, + "step": 12844 + }, + { + "epoch": 0.15, + "learning_rate": 2.0503563824863466e-05, + "loss": 0.0064, + "step": 12846 + }, + { + "epoch": 0.15, + "learning_rate": 2.049893548088494e-05, + "loss": 0.8528, + "step": 12848 + }, + { + "epoch": 0.15, + "learning_rate": 2.0494307136906417e-05, + "loss": 0.8456, + "step": 12850 + }, + { + "epoch": 0.15, + "learning_rate": 2.0489678792927892e-05, + "loss": 3.2293, + "step": 12852 + }, + { + "epoch": 0.15, + "learning_rate": 2.0485050448949368e-05, + "loss": 2.3659, + "step": 12854 + }, + { + "epoch": 0.15, + "learning_rate": 2.0480422104970843e-05, + "loss": 1.6216, + "step": 12856 + }, + { + "epoch": 0.15, + "learning_rate": 2.047579376099232e-05, + "loss": 0.3492, + "step": 12858 + }, + { + "epoch": 0.15, + "learning_rate": 2.0471165417013794e-05, + "loss": 6.1425, + "step": 12860 + }, + { + "epoch": 0.15, + "learning_rate": 2.046653707303527e-05, + "loss": 4.7499, + "step": 12862 + }, + { + "epoch": 0.15, + "learning_rate": 2.0461908729056745e-05, + "loss": 0.0539, + "step": 12864 + }, + { + "epoch": 0.15, + "learning_rate": 2.045728038507822e-05, + "loss": 0.977, + "step": 12866 + }, + { + "epoch": 0.15, + "learning_rate": 2.0452652041099696e-05, + "loss": 0.0017, + "step": 12868 + }, + { + "epoch": 0.15, + "learning_rate": 2.044802369712117e-05, + "loss": 1.4585, + "step": 12870 + }, + { + "epoch": 0.15, + "learning_rate": 2.0443395353142646e-05, + "loss": 2.122, + "step": 12872 + }, + { + "epoch": 0.15, + "learning_rate": 2.0438767009164122e-05, + "loss": 2.2445, + "step": 12874 + }, + { + "epoch": 0.15, + "learning_rate": 2.0434138665185597e-05, + "loss": 0.2004, + "step": 12876 + }, + { + "epoch": 0.15, + "learning_rate": 2.0429510321207073e-05, + "loss": 0.433, + "step": 12878 + }, + { + "epoch": 0.15, + "learning_rate": 2.0424881977228548e-05, + "loss": 1.3428, + "step": 12880 + }, + { + "epoch": 0.15, + "learning_rate": 2.0420253633250024e-05, + "loss": 1.0899, + "step": 12882 + }, + { + "epoch": 0.15, + "learning_rate": 2.04156252892715e-05, + "loss": 0.0003, + "step": 12884 + }, + { + "epoch": 0.15, + "learning_rate": 2.0410996945292974e-05, + "loss": 8.1004, + "step": 12886 + }, + { + "epoch": 0.15, + "learning_rate": 2.0406368601314453e-05, + "loss": 2.4207, + "step": 12888 + }, + { + "epoch": 0.15, + "learning_rate": 2.040174025733593e-05, + "loss": 1.0712, + "step": 12890 + }, + { + "epoch": 0.15, + "learning_rate": 2.0397111913357404e-05, + "loss": 1.9182, + "step": 12892 + }, + { + "epoch": 0.15, + "learning_rate": 2.039248356937888e-05, + "loss": 1.3091, + "step": 12894 + }, + { + "epoch": 0.15, + "learning_rate": 2.0387855225400355e-05, + "loss": 3.1954, + "step": 12896 + }, + { + "epoch": 0.15, + "learning_rate": 2.0383226881421827e-05, + "loss": 1.1417, + "step": 12898 + }, + { + "epoch": 0.15, + "learning_rate": 2.0378598537443302e-05, + "loss": 8.073, + "step": 12900 + }, + { + "epoch": 0.15, + "learning_rate": 2.0373970193464778e-05, + "loss": 1.351, + "step": 12902 + }, + { + "epoch": 0.15, + "learning_rate": 2.0369341849486253e-05, + "loss": 0.9674, + "step": 12904 + }, + { + "epoch": 0.15, + "learning_rate": 2.036471350550773e-05, + "loss": 3.9606, + "step": 12906 + }, + { + "epoch": 0.15, + "learning_rate": 2.0360085161529204e-05, + "loss": 4.4583, + "step": 12908 + }, + { + "epoch": 0.15, + "learning_rate": 2.035545681755068e-05, + "loss": 0.8235, + "step": 12910 + }, + { + "epoch": 0.15, + "learning_rate": 2.0350828473572155e-05, + "loss": 0.885, + "step": 12912 + }, + { + "epoch": 0.15, + "learning_rate": 2.034620012959363e-05, + "loss": 2.0684, + "step": 12914 + }, + { + "epoch": 0.15, + "learning_rate": 2.0341571785615106e-05, + "loss": 0.8671, + "step": 12916 + }, + { + "epoch": 0.15, + "learning_rate": 2.033694344163658e-05, + "loss": 2.2294, + "step": 12918 + }, + { + "epoch": 0.15, + "learning_rate": 2.033231509765806e-05, + "loss": 2.7857, + "step": 12920 + }, + { + "epoch": 0.15, + "learning_rate": 2.0327686753679535e-05, + "loss": 2.0503, + "step": 12922 + }, + { + "epoch": 0.15, + "learning_rate": 2.032305840970101e-05, + "loss": 1.6263, + "step": 12924 + }, + { + "epoch": 0.15, + "learning_rate": 2.0318430065722486e-05, + "loss": 1.1333, + "step": 12926 + }, + { + "epoch": 0.15, + "learning_rate": 2.0313801721743962e-05, + "loss": 0.4096, + "step": 12928 + }, + { + "epoch": 0.15, + "learning_rate": 2.0309173377765437e-05, + "loss": 2.0053, + "step": 12930 + }, + { + "epoch": 0.15, + "learning_rate": 2.0304545033786913e-05, + "loss": 2.9911, + "step": 12932 + }, + { + "epoch": 0.15, + "learning_rate": 2.0299916689808388e-05, + "loss": 1.2863, + "step": 12934 + }, + { + "epoch": 0.15, + "learning_rate": 2.0295288345829863e-05, + "loss": 2.0375, + "step": 12936 + }, + { + "epoch": 0.15, + "learning_rate": 2.029066000185134e-05, + "loss": 1.662, + "step": 12938 + }, + { + "epoch": 0.15, + "learning_rate": 2.0286031657872814e-05, + "loss": 0.5018, + "step": 12940 + }, + { + "epoch": 0.15, + "learning_rate": 2.028140331389429e-05, + "loss": 2.9333, + "step": 12942 + }, + { + "epoch": 0.15, + "learning_rate": 2.0276774969915765e-05, + "loss": 0.1588, + "step": 12944 + }, + { + "epoch": 0.15, + "learning_rate": 2.027214662593724e-05, + "loss": 0.1355, + "step": 12946 + }, + { + "epoch": 0.15, + "learning_rate": 2.0267518281958716e-05, + "loss": 3.4959, + "step": 12948 + }, + { + "epoch": 0.15, + "learning_rate": 2.026288993798019e-05, + "loss": 0.0731, + "step": 12950 + }, + { + "epoch": 0.15, + "learning_rate": 2.0258261594001667e-05, + "loss": 0.0023, + "step": 12952 + }, + { + "epoch": 0.15, + "learning_rate": 2.0253633250023142e-05, + "loss": 5.8339, + "step": 12954 + }, + { + "epoch": 0.15, + "learning_rate": 2.0249004906044618e-05, + "loss": 3.2694, + "step": 12956 + }, + { + "epoch": 0.15, + "learning_rate": 2.0244376562066093e-05, + "loss": 2.7101, + "step": 12958 + }, + { + "epoch": 0.15, + "learning_rate": 2.0239748218087572e-05, + "loss": 2.5793, + "step": 12960 + }, + { + "epoch": 0.15, + "learning_rate": 2.0235119874109047e-05, + "loss": 0.3884, + "step": 12962 + }, + { + "epoch": 0.15, + "learning_rate": 2.0230491530130523e-05, + "loss": 0.4251, + "step": 12964 + }, + { + "epoch": 0.15, + "learning_rate": 2.0225863186151998e-05, + "loss": 5.3666, + "step": 12966 + }, + { + "epoch": 0.15, + "learning_rate": 2.0221234842173474e-05, + "loss": 0.0722, + "step": 12968 + }, + { + "epoch": 0.15, + "learning_rate": 2.0216606498194946e-05, + "loss": 2.2163, + "step": 12970 + }, + { + "epoch": 0.15, + "learning_rate": 2.021197815421642e-05, + "loss": 1.84, + "step": 12972 + }, + { + "epoch": 0.15, + "learning_rate": 2.0207349810237897e-05, + "loss": 6.5032, + "step": 12974 + }, + { + "epoch": 0.15, + "learning_rate": 2.0202721466259372e-05, + "loss": 0.5512, + "step": 12976 + }, + { + "epoch": 0.15, + "learning_rate": 2.0198093122280847e-05, + "loss": 4.4299, + "step": 12978 + }, + { + "epoch": 0.15, + "learning_rate": 2.0193464778302323e-05, + "loss": 6.1811, + "step": 12980 + }, + { + "epoch": 0.15, + "learning_rate": 2.0188836434323798e-05, + "loss": 0.5635, + "step": 12982 + }, + { + "epoch": 0.15, + "learning_rate": 2.0184208090345274e-05, + "loss": 2.2466, + "step": 12984 + }, + { + "epoch": 0.15, + "learning_rate": 2.017957974636675e-05, + "loss": 2.2934, + "step": 12986 + }, + { + "epoch": 0.15, + "learning_rate": 2.0174951402388224e-05, + "loss": 2.1685, + "step": 12988 + }, + { + "epoch": 0.15, + "learning_rate": 2.01703230584097e-05, + "loss": 0.7945, + "step": 12990 + }, + { + "epoch": 0.15, + "learning_rate": 2.0165694714431175e-05, + "loss": 1.0952, + "step": 12992 + }, + { + "epoch": 0.15, + "learning_rate": 2.0161066370452654e-05, + "loss": 3.53, + "step": 12994 + }, + { + "epoch": 0.15, + "learning_rate": 2.015643802647413e-05, + "loss": 4.7015, + "step": 12996 + }, + { + "epoch": 0.15, + "learning_rate": 2.0151809682495605e-05, + "loss": 3.2434, + "step": 12998 + }, + { + "epoch": 0.15, + "learning_rate": 2.014718133851708e-05, + "loss": 3.6138, + "step": 13000 + }, + { + "epoch": 0.15, + "learning_rate": 2.0142552994538556e-05, + "loss": 4.4853, + "step": 13002 + }, + { + "epoch": 0.15, + "learning_rate": 2.013792465056003e-05, + "loss": 3.1557, + "step": 13004 + }, + { + "epoch": 0.15, + "learning_rate": 2.0133296306581507e-05, + "loss": 2.6206, + "step": 13006 + }, + { + "epoch": 0.15, + "learning_rate": 2.0128667962602982e-05, + "loss": 1.1429, + "step": 13008 + }, + { + "epoch": 0.15, + "learning_rate": 2.0124039618624458e-05, + "loss": 3.8085, + "step": 13010 + }, + { + "epoch": 0.15, + "learning_rate": 2.0119411274645933e-05, + "loss": 0.3173, + "step": 13012 + }, + { + "epoch": 0.15, + "learning_rate": 2.011478293066741e-05, + "loss": 0.9372, + "step": 13014 + }, + { + "epoch": 0.15, + "learning_rate": 2.0110154586688884e-05, + "loss": 0.9879, + "step": 13016 + }, + { + "epoch": 0.15, + "learning_rate": 2.010552624271036e-05, + "loss": 2.7954, + "step": 13018 + }, + { + "epoch": 0.15, + "learning_rate": 2.0100897898731835e-05, + "loss": 1.0263, + "step": 13020 + }, + { + "epoch": 0.15, + "learning_rate": 2.009626955475331e-05, + "loss": 3.327, + "step": 13022 + }, + { + "epoch": 0.15, + "learning_rate": 2.0091641210774786e-05, + "loss": 4.23, + "step": 13024 + }, + { + "epoch": 0.15, + "learning_rate": 2.008701286679626e-05, + "loss": 4.3586, + "step": 13026 + }, + { + "epoch": 0.15, + "learning_rate": 2.0082384522817736e-05, + "loss": 1.4732, + "step": 13028 + }, + { + "epoch": 0.15, + "learning_rate": 2.0077756178839212e-05, + "loss": 0.9702, + "step": 13030 + }, + { + "epoch": 0.15, + "learning_rate": 2.0073127834860687e-05, + "loss": 0.1685, + "step": 13032 + }, + { + "epoch": 0.15, + "learning_rate": 2.0068499490882166e-05, + "loss": 0.2289, + "step": 13034 + }, + { + "epoch": 0.15, + "learning_rate": 2.006387114690364e-05, + "loss": 1.0722, + "step": 13036 + }, + { + "epoch": 0.15, + "learning_rate": 2.0059242802925117e-05, + "loss": 0.558, + "step": 13038 + }, + { + "epoch": 0.15, + "learning_rate": 2.0054614458946592e-05, + "loss": 0.0638, + "step": 13040 + }, + { + "epoch": 0.15, + "learning_rate": 2.0049986114968064e-05, + "loss": 2.7206, + "step": 13042 + }, + { + "epoch": 0.15, + "learning_rate": 2.004535777098954e-05, + "loss": 0.8041, + "step": 13044 + }, + { + "epoch": 0.15, + "learning_rate": 2.0040729427011015e-05, + "loss": 0.0004, + "step": 13046 + }, + { + "epoch": 0.15, + "learning_rate": 2.003610108303249e-05, + "loss": 2.5999, + "step": 13048 + }, + { + "epoch": 0.15, + "learning_rate": 2.0031472739053966e-05, + "loss": 2.5038, + "step": 13050 + }, + { + "epoch": 0.15, + "learning_rate": 2.002684439507544e-05, + "loss": 3.0792, + "step": 13052 + }, + { + "epoch": 0.15, + "learning_rate": 2.0022216051096917e-05, + "loss": 1.8828, + "step": 13054 + }, + { + "epoch": 0.15, + "learning_rate": 2.0017587707118392e-05, + "loss": 4.3957, + "step": 13056 + }, + { + "epoch": 0.15, + "learning_rate": 2.0012959363139868e-05, + "loss": 4.3117, + "step": 13058 + }, + { + "epoch": 0.15, + "learning_rate": 2.0008331019161343e-05, + "loss": 0.0086, + "step": 13060 + }, + { + "epoch": 0.15, + "learning_rate": 2.000370267518282e-05, + "loss": 0.0156, + "step": 13062 + }, + { + "epoch": 0.15, + "learning_rate": 1.9999074331204294e-05, + "loss": 2.0186, + "step": 13064 + }, + { + "epoch": 0.15, + "learning_rate": 1.999444598722577e-05, + "loss": 5.6924, + "step": 13066 + }, + { + "epoch": 0.15, + "learning_rate": 1.9989817643247248e-05, + "loss": 0.3991, + "step": 13068 + }, + { + "epoch": 0.15, + "learning_rate": 1.9985189299268724e-05, + "loss": 5.2702, + "step": 13070 + }, + { + "epoch": 0.15, + "learning_rate": 1.99805609552902e-05, + "loss": 0.446, + "step": 13072 + }, + { + "epoch": 0.15, + "learning_rate": 1.9975932611311675e-05, + "loss": 0.1372, + "step": 13074 + }, + { + "epoch": 0.15, + "learning_rate": 1.997130426733315e-05, + "loss": 0.0803, + "step": 13076 + }, + { + "epoch": 0.15, + "learning_rate": 1.9966675923354625e-05, + "loss": 1.4658, + "step": 13078 + }, + { + "epoch": 0.15, + "learning_rate": 1.99620475793761e-05, + "loss": 2.6716, + "step": 13080 + }, + { + "epoch": 0.15, + "learning_rate": 1.9957419235397576e-05, + "loss": 1.9509, + "step": 13082 + }, + { + "epoch": 0.15, + "learning_rate": 1.995279089141905e-05, + "loss": 2.1457, + "step": 13084 + }, + { + "epoch": 0.15, + "learning_rate": 1.9948162547440527e-05, + "loss": 0.0201, + "step": 13086 + }, + { + "epoch": 0.15, + "learning_rate": 1.9943534203462002e-05, + "loss": 2.0043, + "step": 13088 + }, + { + "epoch": 0.15, + "learning_rate": 1.9938905859483478e-05, + "loss": 0.7776, + "step": 13090 + }, + { + "epoch": 0.15, + "learning_rate": 1.9934277515504953e-05, + "loss": 0.9837, + "step": 13092 + }, + { + "epoch": 0.15, + "learning_rate": 1.992964917152643e-05, + "loss": 3.5779, + "step": 13094 + }, + { + "epoch": 0.15, + "learning_rate": 1.9925020827547904e-05, + "loss": 0.1374, + "step": 13096 + }, + { + "epoch": 0.15, + "learning_rate": 1.992039248356938e-05, + "loss": 2.7978, + "step": 13098 + }, + { + "epoch": 0.15, + "learning_rate": 1.9915764139590855e-05, + "loss": 3.157, + "step": 13100 + }, + { + "epoch": 0.15, + "learning_rate": 1.991113579561233e-05, + "loss": 5.375, + "step": 13102 + }, + { + "epoch": 0.15, + "learning_rate": 1.9906507451633806e-05, + "loss": 1.2498, + "step": 13104 + }, + { + "epoch": 0.15, + "learning_rate": 1.990187910765528e-05, + "loss": 0.6471, + "step": 13106 + }, + { + "epoch": 0.15, + "learning_rate": 1.989725076367676e-05, + "loss": 6.4151, + "step": 13108 + }, + { + "epoch": 0.15, + "learning_rate": 1.9892622419698236e-05, + "loss": 1.0298, + "step": 13110 + }, + { + "epoch": 0.15, + "learning_rate": 1.988799407571971e-05, + "loss": 8.0229, + "step": 13112 + }, + { + "epoch": 0.15, + "learning_rate": 1.9883365731741186e-05, + "loss": 1.5602, + "step": 13114 + }, + { + "epoch": 0.15, + "learning_rate": 1.987873738776266e-05, + "loss": 2.525, + "step": 13116 + }, + { + "epoch": 0.15, + "learning_rate": 1.9874109043784134e-05, + "loss": 0.4019, + "step": 13118 + }, + { + "epoch": 0.15, + "learning_rate": 1.986948069980561e-05, + "loss": 2.3709, + "step": 13120 + }, + { + "epoch": 0.15, + "learning_rate": 1.9864852355827085e-05, + "loss": 2.9419, + "step": 13122 + }, + { + "epoch": 0.15, + "learning_rate": 1.986022401184856e-05, + "loss": 4.3693, + "step": 13124 + }, + { + "epoch": 0.15, + "learning_rate": 1.9855595667870036e-05, + "loss": 2.1228, + "step": 13126 + }, + { + "epoch": 0.15, + "learning_rate": 1.985096732389151e-05, + "loss": 2.6704, + "step": 13128 + }, + { + "epoch": 0.15, + "learning_rate": 1.9846338979912986e-05, + "loss": 1.2145, + "step": 13130 + }, + { + "epoch": 0.15, + "learning_rate": 1.9841710635934462e-05, + "loss": 0.0025, + "step": 13132 + }, + { + "epoch": 0.15, + "learning_rate": 1.9837082291955937e-05, + "loss": 2.9182, + "step": 13134 + }, + { + "epoch": 0.15, + "learning_rate": 1.9832453947977413e-05, + "loss": 1.815, + "step": 13136 + }, + { + "epoch": 0.15, + "learning_rate": 1.9827825603998888e-05, + "loss": 2.299, + "step": 13138 + }, + { + "epoch": 0.15, + "learning_rate": 1.9823197260020367e-05, + "loss": 1.7682, + "step": 13140 + }, + { + "epoch": 0.15, + "learning_rate": 1.9818568916041842e-05, + "loss": 0.5695, + "step": 13142 + }, + { + "epoch": 0.15, + "learning_rate": 1.9813940572063318e-05, + "loss": 0.8733, + "step": 13144 + }, + { + "epoch": 0.15, + "learning_rate": 1.9809312228084793e-05, + "loss": 3.3152, + "step": 13146 + }, + { + "epoch": 0.15, + "learning_rate": 1.980468388410627e-05, + "loss": 0.1739, + "step": 13148 + }, + { + "epoch": 0.15, + "learning_rate": 1.9800055540127744e-05, + "loss": 0.0021, + "step": 13150 + }, + { + "epoch": 0.15, + "learning_rate": 1.979542719614922e-05, + "loss": 1.3449, + "step": 13152 + }, + { + "epoch": 0.15, + "learning_rate": 1.9790798852170695e-05, + "loss": 0.0807, + "step": 13154 + }, + { + "epoch": 0.15, + "learning_rate": 1.978617050819217e-05, + "loss": 3.2318, + "step": 13156 + }, + { + "epoch": 0.15, + "learning_rate": 1.9781542164213646e-05, + "loss": 1.1967, + "step": 13158 + }, + { + "epoch": 0.15, + "learning_rate": 1.977691382023512e-05, + "loss": 1.7287, + "step": 13160 + }, + { + "epoch": 0.15, + "learning_rate": 1.9772285476256597e-05, + "loss": 2.5766, + "step": 13162 + }, + { + "epoch": 0.15, + "learning_rate": 1.9767657132278072e-05, + "loss": 5.8898, + "step": 13164 + }, + { + "epoch": 0.15, + "learning_rate": 1.9763028788299547e-05, + "loss": 2.5702, + "step": 13166 + }, + { + "epoch": 0.15, + "learning_rate": 1.9758400444321023e-05, + "loss": 1.5105, + "step": 13168 + }, + { + "epoch": 0.15, + "learning_rate": 1.9753772100342498e-05, + "loss": 0.5421, + "step": 13170 + }, + { + "epoch": 0.15, + "learning_rate": 1.9749143756363974e-05, + "loss": 0.0004, + "step": 13172 + }, + { + "epoch": 0.15, + "learning_rate": 1.974451541238545e-05, + "loss": 0.9888, + "step": 13174 + }, + { + "epoch": 0.15, + "learning_rate": 1.9739887068406925e-05, + "loss": 1.0898, + "step": 13176 + }, + { + "epoch": 0.15, + "learning_rate": 1.97352587244284e-05, + "loss": 2.1534, + "step": 13178 + }, + { + "epoch": 0.15, + "learning_rate": 1.9730630380449875e-05, + "loss": 2.3472, + "step": 13180 + }, + { + "epoch": 0.15, + "learning_rate": 1.9726002036471354e-05, + "loss": 4.6581, + "step": 13182 + }, + { + "epoch": 0.15, + "learning_rate": 1.972137369249283e-05, + "loss": 0.0004, + "step": 13184 + }, + { + "epoch": 0.15, + "learning_rate": 1.9716745348514305e-05, + "loss": 2.2599, + "step": 13186 + }, + { + "epoch": 0.15, + "learning_rate": 1.9712117004535777e-05, + "loss": 3.3159, + "step": 13188 + }, + { + "epoch": 0.15, + "learning_rate": 1.9707488660557253e-05, + "loss": 1.3744, + "step": 13190 + }, + { + "epoch": 0.15, + "learning_rate": 1.9702860316578728e-05, + "loss": 3.7766, + "step": 13192 + }, + { + "epoch": 0.15, + "learning_rate": 1.9698231972600203e-05, + "loss": 4.8249, + "step": 13194 + }, + { + "epoch": 0.15, + "learning_rate": 1.969360362862168e-05, + "loss": 0.0013, + "step": 13196 + }, + { + "epoch": 0.15, + "learning_rate": 1.9688975284643154e-05, + "loss": 1.8392, + "step": 13198 + }, + { + "epoch": 0.15, + "learning_rate": 1.968434694066463e-05, + "loss": 1.8035, + "step": 13200 + }, + { + "epoch": 0.15, + "learning_rate": 1.9679718596686105e-05, + "loss": 0.0012, + "step": 13202 + }, + { + "epoch": 0.15, + "learning_rate": 1.967509025270758e-05, + "loss": 0.8788, + "step": 13204 + }, + { + "epoch": 0.15, + "learning_rate": 1.9670461908729056e-05, + "loss": 2.1079, + "step": 13206 + }, + { + "epoch": 0.15, + "learning_rate": 1.966583356475053e-05, + "loss": 1.9554, + "step": 13208 + }, + { + "epoch": 0.15, + "learning_rate": 1.9661205220772007e-05, + "loss": 6.6322, + "step": 13210 + }, + { + "epoch": 0.15, + "learning_rate": 1.9656576876793482e-05, + "loss": 4.0344, + "step": 13212 + }, + { + "epoch": 0.15, + "learning_rate": 1.965194853281496e-05, + "loss": 0.8296, + "step": 13214 + }, + { + "epoch": 0.15, + "learning_rate": 1.9647320188836436e-05, + "loss": 2.8819, + "step": 13216 + }, + { + "epoch": 0.15, + "learning_rate": 1.9642691844857912e-05, + "loss": 0.4434, + "step": 13218 + }, + { + "epoch": 0.15, + "learning_rate": 1.9638063500879387e-05, + "loss": 2.9619, + "step": 13220 + }, + { + "epoch": 0.15, + "learning_rate": 1.9633435156900863e-05, + "loss": 1.8469, + "step": 13222 + }, + { + "epoch": 0.15, + "learning_rate": 1.9628806812922338e-05, + "loss": 0.0019, + "step": 13224 + }, + { + "epoch": 0.15, + "learning_rate": 1.9624178468943814e-05, + "loss": 0.7726, + "step": 13226 + }, + { + "epoch": 0.15, + "learning_rate": 1.961955012496529e-05, + "loss": 1.4185, + "step": 13228 + }, + { + "epoch": 0.15, + "learning_rate": 1.9614921780986764e-05, + "loss": 1.9739, + "step": 13230 + }, + { + "epoch": 0.15, + "learning_rate": 1.961029343700824e-05, + "loss": 1.8834, + "step": 13232 + }, + { + "epoch": 0.15, + "learning_rate": 1.9605665093029715e-05, + "loss": 0.2679, + "step": 13234 + }, + { + "epoch": 0.15, + "learning_rate": 1.960103674905119e-05, + "loss": 1.5547, + "step": 13236 + }, + { + "epoch": 0.15, + "learning_rate": 1.9596408405072666e-05, + "loss": 5.7028, + "step": 13238 + }, + { + "epoch": 0.15, + "learning_rate": 1.959178006109414e-05, + "loss": 4.1657, + "step": 13240 + }, + { + "epoch": 0.15, + "learning_rate": 1.9587151717115617e-05, + "loss": 0.2202, + "step": 13242 + }, + { + "epoch": 0.15, + "learning_rate": 1.9582523373137092e-05, + "loss": 2.4301, + "step": 13244 + }, + { + "epoch": 0.15, + "learning_rate": 1.9577895029158568e-05, + "loss": 0.0003, + "step": 13246 + }, + { + "epoch": 0.15, + "learning_rate": 1.9573266685180043e-05, + "loss": 0.6021, + "step": 13248 + }, + { + "epoch": 0.15, + "learning_rate": 1.956863834120152e-05, + "loss": 0.7767, + "step": 13250 + }, + { + "epoch": 0.15, + "learning_rate": 1.9564009997222994e-05, + "loss": 1.0664, + "step": 13252 + }, + { + "epoch": 0.15, + "learning_rate": 1.9559381653244473e-05, + "loss": 2.0602, + "step": 13254 + }, + { + "epoch": 0.15, + "learning_rate": 1.955475330926595e-05, + "loss": 0.3002, + "step": 13256 + }, + { + "epoch": 0.15, + "learning_rate": 1.9550124965287424e-05, + "loss": 1.0303, + "step": 13258 + }, + { + "epoch": 0.15, + "learning_rate": 1.9545496621308896e-05, + "loss": 1.4251, + "step": 13260 + }, + { + "epoch": 0.15, + "learning_rate": 1.954086827733037e-05, + "loss": 0.3661, + "step": 13262 + }, + { + "epoch": 0.15, + "learning_rate": 1.9536239933351847e-05, + "loss": 1.893, + "step": 13264 + }, + { + "epoch": 0.15, + "learning_rate": 1.9531611589373322e-05, + "loss": 0.1163, + "step": 13266 + }, + { + "epoch": 0.15, + "learning_rate": 1.9526983245394797e-05, + "loss": 2.4839, + "step": 13268 + }, + { + "epoch": 0.15, + "learning_rate": 1.9522354901416273e-05, + "loss": 1.1198, + "step": 13270 + }, + { + "epoch": 0.15, + "learning_rate": 1.951772655743775e-05, + "loss": 0.2538, + "step": 13272 + }, + { + "epoch": 0.15, + "learning_rate": 1.9513098213459224e-05, + "loss": 7.7177, + "step": 13274 + }, + { + "epoch": 0.15, + "learning_rate": 1.95084698694807e-05, + "loss": 3.3919, + "step": 13276 + }, + { + "epoch": 0.15, + "learning_rate": 1.9503841525502175e-05, + "loss": 7.5268, + "step": 13278 + }, + { + "epoch": 0.15, + "learning_rate": 1.949921318152365e-05, + "loss": 1.0745, + "step": 13280 + }, + { + "epoch": 0.15, + "learning_rate": 1.9494584837545125e-05, + "loss": 7.178, + "step": 13282 + }, + { + "epoch": 0.15, + "learning_rate": 1.94899564935666e-05, + "loss": 0.7392, + "step": 13284 + }, + { + "epoch": 0.15, + "learning_rate": 1.9485328149588076e-05, + "loss": 1.693, + "step": 13286 + }, + { + "epoch": 0.15, + "learning_rate": 1.9480699805609555e-05, + "loss": 1.3612, + "step": 13288 + }, + { + "epoch": 0.15, + "learning_rate": 1.947607146163103e-05, + "loss": 3.6723, + "step": 13290 + }, + { + "epoch": 0.15, + "learning_rate": 1.9471443117652506e-05, + "loss": 2.4499, + "step": 13292 + }, + { + "epoch": 0.15, + "learning_rate": 1.946681477367398e-05, + "loss": 0.8863, + "step": 13294 + }, + { + "epoch": 0.15, + "learning_rate": 1.9462186429695457e-05, + "loss": 0.0016, + "step": 13296 + }, + { + "epoch": 0.15, + "learning_rate": 1.9457558085716932e-05, + "loss": 0.0405, + "step": 13298 + }, + { + "epoch": 0.15, + "learning_rate": 1.9452929741738408e-05, + "loss": 1.252, + "step": 13300 + }, + { + "epoch": 0.15, + "learning_rate": 1.9448301397759883e-05, + "loss": 0.0062, + "step": 13302 + }, + { + "epoch": 0.15, + "learning_rate": 1.944367305378136e-05, + "loss": 1.1732, + "step": 13304 + }, + { + "epoch": 0.15, + "learning_rate": 1.9439044709802834e-05, + "loss": 2.16, + "step": 13306 + }, + { + "epoch": 0.15, + "learning_rate": 1.943441636582431e-05, + "loss": 0.651, + "step": 13308 + }, + { + "epoch": 0.15, + "learning_rate": 1.9429788021845785e-05, + "loss": 0.7616, + "step": 13310 + }, + { + "epoch": 0.15, + "learning_rate": 1.942515967786726e-05, + "loss": 0.1402, + "step": 13312 + }, + { + "epoch": 0.15, + "learning_rate": 1.9420531333888736e-05, + "loss": 0.2991, + "step": 13314 + }, + { + "epoch": 0.15, + "learning_rate": 1.941590298991021e-05, + "loss": 2.7966, + "step": 13316 + }, + { + "epoch": 0.15, + "learning_rate": 1.9411274645931686e-05, + "loss": 0.0088, + "step": 13318 + }, + { + "epoch": 0.15, + "learning_rate": 1.9406646301953162e-05, + "loss": 2.9461, + "step": 13320 + }, + { + "epoch": 0.15, + "learning_rate": 1.9402017957974637e-05, + "loss": 1.5902, + "step": 13322 + }, + { + "epoch": 0.15, + "learning_rate": 1.9397389613996113e-05, + "loss": 0.0095, + "step": 13324 + }, + { + "epoch": 0.15, + "learning_rate": 1.9392761270017588e-05, + "loss": 7.1449, + "step": 13326 + }, + { + "epoch": 0.15, + "learning_rate": 1.9388132926039067e-05, + "loss": 1.391, + "step": 13328 + }, + { + "epoch": 0.15, + "learning_rate": 1.9383504582060542e-05, + "loss": 0.5612, + "step": 13330 + }, + { + "epoch": 0.15, + "learning_rate": 1.9378876238082014e-05, + "loss": 1.9488, + "step": 13332 + }, + { + "epoch": 0.15, + "learning_rate": 1.937424789410349e-05, + "loss": 1.062, + "step": 13334 + }, + { + "epoch": 0.15, + "learning_rate": 1.9369619550124965e-05, + "loss": 2.2017, + "step": 13336 + }, + { + "epoch": 0.15, + "learning_rate": 1.936499120614644e-05, + "loss": 0.2225, + "step": 13338 + }, + { + "epoch": 0.15, + "learning_rate": 1.9360362862167916e-05, + "loss": 3.2937, + "step": 13340 + }, + { + "epoch": 0.15, + "learning_rate": 1.935573451818939e-05, + "loss": 4.6619, + "step": 13342 + }, + { + "epoch": 0.15, + "learning_rate": 1.9351106174210867e-05, + "loss": 0.1726, + "step": 13344 + }, + { + "epoch": 0.15, + "learning_rate": 1.9346477830232342e-05, + "loss": 5.0939, + "step": 13346 + }, + { + "epoch": 0.15, + "learning_rate": 1.9341849486253818e-05, + "loss": 1.8783, + "step": 13348 + }, + { + "epoch": 0.15, + "learning_rate": 1.9337221142275293e-05, + "loss": 2.4307, + "step": 13350 + }, + { + "epoch": 0.15, + "learning_rate": 1.933259279829677e-05, + "loss": 1.1087, + "step": 13352 + }, + { + "epoch": 0.15, + "learning_rate": 1.9327964454318244e-05, + "loss": 0.0896, + "step": 13354 + }, + { + "epoch": 0.15, + "learning_rate": 1.932333611033972e-05, + "loss": 1.1433, + "step": 13356 + }, + { + "epoch": 0.15, + "learning_rate": 1.9318707766361195e-05, + "loss": 0.8672, + "step": 13358 + }, + { + "epoch": 0.15, + "learning_rate": 1.9314079422382674e-05, + "loss": 1.9787, + "step": 13360 + }, + { + "epoch": 0.15, + "learning_rate": 1.930945107840415e-05, + "loss": 2.3471, + "step": 13362 + }, + { + "epoch": 0.15, + "learning_rate": 1.9304822734425625e-05, + "loss": 4.8072, + "step": 13364 + }, + { + "epoch": 0.15, + "learning_rate": 1.93001943904471e-05, + "loss": 1.6637, + "step": 13366 + }, + { + "epoch": 0.15, + "learning_rate": 1.9295566046468575e-05, + "loss": 6.2773, + "step": 13368 + }, + { + "epoch": 0.15, + "learning_rate": 1.929093770249005e-05, + "loss": 0.066, + "step": 13370 + }, + { + "epoch": 0.15, + "learning_rate": 1.9286309358511526e-05, + "loss": 0.5475, + "step": 13372 + }, + { + "epoch": 0.15, + "learning_rate": 1.9281681014533002e-05, + "loss": 2.0016, + "step": 13374 + }, + { + "epoch": 0.15, + "learning_rate": 1.9277052670554477e-05, + "loss": 0.0011, + "step": 13376 + }, + { + "epoch": 0.15, + "learning_rate": 1.9272424326575953e-05, + "loss": 1.8281, + "step": 13378 + }, + { + "epoch": 0.15, + "learning_rate": 1.9267795982597428e-05, + "loss": 0.7233, + "step": 13380 + }, + { + "epoch": 0.15, + "learning_rate": 1.9263167638618903e-05, + "loss": 0.2477, + "step": 13382 + }, + { + "epoch": 0.15, + "learning_rate": 1.925853929464038e-05, + "loss": 0.732, + "step": 13384 + }, + { + "epoch": 0.15, + "learning_rate": 1.9253910950661854e-05, + "loss": 3.6406, + "step": 13386 + }, + { + "epoch": 0.15, + "learning_rate": 1.924928260668333e-05, + "loss": 1.425, + "step": 13388 + }, + { + "epoch": 0.15, + "learning_rate": 1.9244654262704805e-05, + "loss": 0.8571, + "step": 13390 + }, + { + "epoch": 0.15, + "learning_rate": 1.924002591872628e-05, + "loss": 0.4945, + "step": 13392 + }, + { + "epoch": 0.15, + "learning_rate": 1.9235397574747756e-05, + "loss": 2.2872, + "step": 13394 + }, + { + "epoch": 0.15, + "learning_rate": 1.923076923076923e-05, + "loss": 3.4967, + "step": 13396 + }, + { + "epoch": 0.15, + "learning_rate": 1.9226140886790707e-05, + "loss": 0.0177, + "step": 13398 + }, + { + "epoch": 0.15, + "learning_rate": 1.9221512542812182e-05, + "loss": 8.7932, + "step": 13400 + }, + { + "epoch": 0.15, + "learning_rate": 1.921688419883366e-05, + "loss": 0.064, + "step": 13402 + }, + { + "epoch": 0.15, + "learning_rate": 1.9212255854855137e-05, + "loss": 3.9577, + "step": 13404 + }, + { + "epoch": 0.15, + "learning_rate": 1.920762751087661e-05, + "loss": 1.807, + "step": 13406 + }, + { + "epoch": 0.15, + "learning_rate": 1.9202999166898084e-05, + "loss": 1.4847, + "step": 13408 + }, + { + "epoch": 0.15, + "learning_rate": 1.919837082291956e-05, + "loss": 1.5291, + "step": 13410 + }, + { + "epoch": 0.15, + "learning_rate": 1.9193742478941035e-05, + "loss": 0.3745, + "step": 13412 + }, + { + "epoch": 0.15, + "learning_rate": 1.918911413496251e-05, + "loss": 1.5542, + "step": 13414 + }, + { + "epoch": 0.15, + "learning_rate": 1.9184485790983986e-05, + "loss": 3.0253, + "step": 13416 + }, + { + "epoch": 0.15, + "learning_rate": 1.917985744700546e-05, + "loss": 2.1434, + "step": 13418 + }, + { + "epoch": 0.15, + "learning_rate": 1.9175229103026937e-05, + "loss": 0.0156, + "step": 13420 + }, + { + "epoch": 0.15, + "learning_rate": 1.9170600759048412e-05, + "loss": 1.7343, + "step": 13422 + }, + { + "epoch": 0.15, + "learning_rate": 1.9165972415069887e-05, + "loss": 2.0155, + "step": 13424 + }, + { + "epoch": 0.15, + "learning_rate": 1.9161344071091363e-05, + "loss": 1.801, + "step": 13426 + }, + { + "epoch": 0.15, + "learning_rate": 1.9156715727112838e-05, + "loss": 3.9315, + "step": 13428 + }, + { + "epoch": 0.15, + "learning_rate": 1.9152087383134314e-05, + "loss": 6.218, + "step": 13430 + }, + { + "epoch": 0.15, + "learning_rate": 1.914745903915579e-05, + "loss": 2.6268, + "step": 13432 + }, + { + "epoch": 0.15, + "learning_rate": 1.9142830695177268e-05, + "loss": 1.003, + "step": 13434 + }, + { + "epoch": 0.15, + "learning_rate": 1.9138202351198743e-05, + "loss": 3.4408, + "step": 13436 + }, + { + "epoch": 0.15, + "learning_rate": 1.913357400722022e-05, + "loss": 0.0215, + "step": 13438 + }, + { + "epoch": 0.15, + "learning_rate": 1.9128945663241694e-05, + "loss": 3.2655, + "step": 13440 + }, + { + "epoch": 0.15, + "learning_rate": 1.912431731926317e-05, + "loss": 2.3826, + "step": 13442 + }, + { + "epoch": 0.15, + "learning_rate": 1.9119688975284645e-05, + "loss": 1.793, + "step": 13444 + }, + { + "epoch": 0.15, + "learning_rate": 1.911506063130612e-05, + "loss": 3.7872, + "step": 13446 + }, + { + "epoch": 0.15, + "learning_rate": 1.9110432287327596e-05, + "loss": 0.8993, + "step": 13448 + }, + { + "epoch": 0.15, + "learning_rate": 1.910580394334907e-05, + "loss": 1.6614, + "step": 13450 + }, + { + "epoch": 0.15, + "learning_rate": 1.9101175599370547e-05, + "loss": 0.0035, + "step": 13452 + }, + { + "epoch": 0.15, + "learning_rate": 1.9096547255392022e-05, + "loss": 1.6602, + "step": 13454 + }, + { + "epoch": 0.15, + "learning_rate": 1.9091918911413498e-05, + "loss": 2.0293, + "step": 13456 + }, + { + "epoch": 0.16, + "learning_rate": 1.9087290567434973e-05, + "loss": 2.1621, + "step": 13458 + }, + { + "epoch": 0.16, + "learning_rate": 1.908266222345645e-05, + "loss": 0.5429, + "step": 13460 + }, + { + "epoch": 0.16, + "learning_rate": 1.9078033879477924e-05, + "loss": 2.2707, + "step": 13462 + }, + { + "epoch": 0.16, + "learning_rate": 1.90734055354994e-05, + "loss": 1.7344, + "step": 13464 + }, + { + "epoch": 0.16, + "learning_rate": 1.9068777191520875e-05, + "loss": 2.8464, + "step": 13466 + }, + { + "epoch": 0.16, + "learning_rate": 1.906414884754235e-05, + "loss": 2.0617, + "step": 13468 + }, + { + "epoch": 0.16, + "learning_rate": 1.9059520503563826e-05, + "loss": 1.9911, + "step": 13470 + }, + { + "epoch": 0.16, + "learning_rate": 1.90548921595853e-05, + "loss": 4.3424, + "step": 13472 + }, + { + "epoch": 0.16, + "learning_rate": 1.905026381560678e-05, + "loss": 0.7343, + "step": 13474 + }, + { + "epoch": 0.16, + "learning_rate": 1.9045635471628255e-05, + "loss": 0.3217, + "step": 13476 + }, + { + "epoch": 0.16, + "learning_rate": 1.9041007127649727e-05, + "loss": 3.6979, + "step": 13478 + }, + { + "epoch": 0.16, + "learning_rate": 1.9036378783671203e-05, + "loss": 0.0049, + "step": 13480 + }, + { + "epoch": 0.16, + "learning_rate": 1.9031750439692678e-05, + "loss": 2.2857, + "step": 13482 + }, + { + "epoch": 0.16, + "learning_rate": 1.9027122095714153e-05, + "loss": 1.3107, + "step": 13484 + }, + { + "epoch": 0.16, + "learning_rate": 1.902249375173563e-05, + "loss": 1.7359, + "step": 13486 + }, + { + "epoch": 0.16, + "learning_rate": 1.9017865407757104e-05, + "loss": 3.3044, + "step": 13488 + }, + { + "epoch": 0.16, + "learning_rate": 1.901323706377858e-05, + "loss": 0.3497, + "step": 13490 + }, + { + "epoch": 0.16, + "learning_rate": 1.9008608719800055e-05, + "loss": 5.6237, + "step": 13492 + }, + { + "epoch": 0.16, + "learning_rate": 1.900398037582153e-05, + "loss": 1.2943, + "step": 13494 + }, + { + "epoch": 0.16, + "learning_rate": 1.8999352031843006e-05, + "loss": 0.4622, + "step": 13496 + }, + { + "epoch": 0.16, + "learning_rate": 1.899472368786448e-05, + "loss": 0.1548, + "step": 13498 + }, + { + "epoch": 0.16, + "learning_rate": 1.8990095343885957e-05, + "loss": 0.4961, + "step": 13500 + }, + { + "epoch": 0.16, + "learning_rate": 1.8985466999907432e-05, + "loss": 2.8406, + "step": 13502 + }, + { + "epoch": 0.16, + "learning_rate": 1.8980838655928908e-05, + "loss": 0.3669, + "step": 13504 + }, + { + "epoch": 0.16, + "learning_rate": 1.8976210311950383e-05, + "loss": 7.0224, + "step": 13506 + }, + { + "epoch": 0.16, + "learning_rate": 1.8971581967971862e-05, + "loss": 2.6181, + "step": 13508 + }, + { + "epoch": 0.16, + "learning_rate": 1.8966953623993337e-05, + "loss": 2.2659, + "step": 13510 + }, + { + "epoch": 0.16, + "learning_rate": 1.8962325280014813e-05, + "loss": 0.9308, + "step": 13512 + }, + { + "epoch": 0.16, + "learning_rate": 1.8957696936036288e-05, + "loss": 1.0779, + "step": 13514 + }, + { + "epoch": 0.16, + "learning_rate": 1.8953068592057764e-05, + "loss": 3.317, + "step": 13516 + }, + { + "epoch": 0.16, + "learning_rate": 1.894844024807924e-05, + "loss": 2.8721, + "step": 13518 + }, + { + "epoch": 0.16, + "learning_rate": 1.8943811904100715e-05, + "loss": 3.4025, + "step": 13520 + }, + { + "epoch": 0.16, + "learning_rate": 1.893918356012219e-05, + "loss": 0.0011, + "step": 13522 + }, + { + "epoch": 0.16, + "learning_rate": 1.8934555216143665e-05, + "loss": 1.824, + "step": 13524 + }, + { + "epoch": 0.16, + "learning_rate": 1.892992687216514e-05, + "loss": 2.6945, + "step": 13526 + }, + { + "epoch": 0.16, + "learning_rate": 1.8925298528186616e-05, + "loss": 4.9413, + "step": 13528 + }, + { + "epoch": 0.16, + "learning_rate": 1.892067018420809e-05, + "loss": 3.023, + "step": 13530 + }, + { + "epoch": 0.16, + "learning_rate": 1.8916041840229567e-05, + "loss": 3.4314, + "step": 13532 + }, + { + "epoch": 0.16, + "learning_rate": 1.8911413496251043e-05, + "loss": 1.2994, + "step": 13534 + }, + { + "epoch": 0.16, + "learning_rate": 1.8906785152272518e-05, + "loss": 0.0043, + "step": 13536 + }, + { + "epoch": 0.16, + "learning_rate": 1.8902156808293993e-05, + "loss": 1.3918, + "step": 13538 + }, + { + "epoch": 0.16, + "learning_rate": 1.889752846431547e-05, + "loss": 1.8283, + "step": 13540 + }, + { + "epoch": 0.16, + "learning_rate": 1.8892900120336944e-05, + "loss": 0.1822, + "step": 13542 + }, + { + "epoch": 0.16, + "learning_rate": 1.888827177635842e-05, + "loss": 1.0366, + "step": 13544 + }, + { + "epoch": 0.16, + "learning_rate": 1.8883643432379895e-05, + "loss": 0.4983, + "step": 13546 + }, + { + "epoch": 0.16, + "learning_rate": 1.8879015088401374e-05, + "loss": 0.2731, + "step": 13548 + }, + { + "epoch": 0.16, + "learning_rate": 1.8874386744422846e-05, + "loss": 1.3759, + "step": 13550 + }, + { + "epoch": 0.16, + "learning_rate": 1.886975840044432e-05, + "loss": 2.325, + "step": 13552 + }, + { + "epoch": 0.16, + "learning_rate": 1.8865130056465797e-05, + "loss": 0.8758, + "step": 13554 + }, + { + "epoch": 0.16, + "learning_rate": 1.8860501712487272e-05, + "loss": 3.4189, + "step": 13556 + }, + { + "epoch": 0.16, + "learning_rate": 1.8855873368508748e-05, + "loss": 2.2829, + "step": 13558 + }, + { + "epoch": 0.16, + "learning_rate": 1.8851245024530223e-05, + "loss": 0.2938, + "step": 13560 + }, + { + "epoch": 0.16, + "learning_rate": 1.88466166805517e-05, + "loss": 3.2978, + "step": 13562 + }, + { + "epoch": 0.16, + "learning_rate": 1.8841988336573174e-05, + "loss": 2.121, + "step": 13564 + }, + { + "epoch": 0.16, + "learning_rate": 1.883735999259465e-05, + "loss": 0.0033, + "step": 13566 + }, + { + "epoch": 0.16, + "learning_rate": 1.8832731648616125e-05, + "loss": 1.3646, + "step": 13568 + }, + { + "epoch": 0.16, + "learning_rate": 1.88281033046376e-05, + "loss": 1.1591, + "step": 13570 + }, + { + "epoch": 0.16, + "learning_rate": 1.8823474960659076e-05, + "loss": 1.3265, + "step": 13572 + }, + { + "epoch": 0.16, + "learning_rate": 1.881884661668055e-05, + "loss": 1.2844, + "step": 13574 + }, + { + "epoch": 0.16, + "learning_rate": 1.8814218272702026e-05, + "loss": 0.7665, + "step": 13576 + }, + { + "epoch": 0.16, + "learning_rate": 1.8809589928723502e-05, + "loss": 4.886, + "step": 13578 + }, + { + "epoch": 0.16, + "learning_rate": 1.8804961584744977e-05, + "loss": 2.2057, + "step": 13580 + }, + { + "epoch": 0.16, + "learning_rate": 1.8800333240766456e-05, + "loss": 0.6421, + "step": 13582 + }, + { + "epoch": 0.16, + "learning_rate": 1.879570489678793e-05, + "loss": 5.9533, + "step": 13584 + }, + { + "epoch": 0.16, + "learning_rate": 1.8791076552809407e-05, + "loss": 2.1755, + "step": 13586 + }, + { + "epoch": 0.16, + "learning_rate": 1.8786448208830882e-05, + "loss": 3.3844, + "step": 13588 + }, + { + "epoch": 0.16, + "learning_rate": 1.8781819864852358e-05, + "loss": 0.0006, + "step": 13590 + }, + { + "epoch": 0.16, + "learning_rate": 1.8777191520873833e-05, + "loss": 0.6003, + "step": 13592 + }, + { + "epoch": 0.16, + "learning_rate": 1.877256317689531e-05, + "loss": 1.5959, + "step": 13594 + }, + { + "epoch": 0.16, + "learning_rate": 1.8767934832916784e-05, + "loss": 3.9595, + "step": 13596 + }, + { + "epoch": 0.16, + "learning_rate": 1.876330648893826e-05, + "loss": 0.1259, + "step": 13598 + }, + { + "epoch": 0.16, + "learning_rate": 1.8758678144959735e-05, + "loss": 0.0061, + "step": 13600 + }, + { + "epoch": 0.16, + "learning_rate": 1.875404980098121e-05, + "loss": 0.9808, + "step": 13602 + }, + { + "epoch": 0.16, + "learning_rate": 1.8749421457002686e-05, + "loss": 9.5234, + "step": 13604 + }, + { + "epoch": 0.16, + "learning_rate": 1.874479311302416e-05, + "loss": 0.6039, + "step": 13606 + }, + { + "epoch": 0.16, + "learning_rate": 1.8740164769045637e-05, + "loss": 2.8311, + "step": 13608 + }, + { + "epoch": 0.16, + "learning_rate": 1.8735536425067112e-05, + "loss": 0.4645, + "step": 13610 + }, + { + "epoch": 0.16, + "learning_rate": 1.8730908081088587e-05, + "loss": 2.3792, + "step": 13612 + }, + { + "epoch": 0.16, + "learning_rate": 1.8726279737110063e-05, + "loss": 0.0067, + "step": 13614 + }, + { + "epoch": 0.16, + "learning_rate": 1.8721651393131538e-05, + "loss": 1.2012, + "step": 13616 + }, + { + "epoch": 0.16, + "learning_rate": 1.8717023049153014e-05, + "loss": 2.3347, + "step": 13618 + }, + { + "epoch": 0.16, + "learning_rate": 1.871239470517449e-05, + "loss": 4.5485, + "step": 13620 + }, + { + "epoch": 0.16, + "learning_rate": 1.8707766361195965e-05, + "loss": 1.3667, + "step": 13622 + }, + { + "epoch": 0.16, + "learning_rate": 1.870313801721744e-05, + "loss": 0.704, + "step": 13624 + }, + { + "epoch": 0.16, + "learning_rate": 1.8698509673238915e-05, + "loss": 4.1856, + "step": 13626 + }, + { + "epoch": 0.16, + "learning_rate": 1.869388132926039e-05, + "loss": 4.8153, + "step": 13628 + }, + { + "epoch": 0.16, + "learning_rate": 1.8689252985281866e-05, + "loss": 2.9679, + "step": 13630 + }, + { + "epoch": 0.16, + "learning_rate": 1.8684624641303342e-05, + "loss": 3.6085, + "step": 13632 + }, + { + "epoch": 0.16, + "learning_rate": 1.8679996297324817e-05, + "loss": 0.2644, + "step": 13634 + }, + { + "epoch": 0.16, + "learning_rate": 1.8675367953346293e-05, + "loss": 2.8538, + "step": 13636 + }, + { + "epoch": 0.16, + "learning_rate": 1.8670739609367768e-05, + "loss": 0.0032, + "step": 13638 + }, + { + "epoch": 0.16, + "learning_rate": 1.8666111265389243e-05, + "loss": 0.402, + "step": 13640 + }, + { + "epoch": 0.16, + "learning_rate": 1.866148292141072e-05, + "loss": 0.2989, + "step": 13642 + }, + { + "epoch": 0.16, + "learning_rate": 1.8656854577432194e-05, + "loss": 0.0036, + "step": 13644 + }, + { + "epoch": 0.16, + "learning_rate": 1.865222623345367e-05, + "loss": 2.5014, + "step": 13646 + }, + { + "epoch": 0.16, + "learning_rate": 1.8647597889475145e-05, + "loss": 0.714, + "step": 13648 + }, + { + "epoch": 0.16, + "learning_rate": 1.864296954549662e-05, + "loss": 3.6613, + "step": 13650 + }, + { + "epoch": 0.16, + "learning_rate": 1.8638341201518096e-05, + "loss": 0.1582, + "step": 13652 + }, + { + "epoch": 0.16, + "learning_rate": 1.8633712857539575e-05, + "loss": 0.006, + "step": 13654 + }, + { + "epoch": 0.16, + "learning_rate": 1.862908451356105e-05, + "loss": 0.0009, + "step": 13656 + }, + { + "epoch": 0.16, + "learning_rate": 1.8624456169582526e-05, + "loss": 2.0837, + "step": 13658 + }, + { + "epoch": 0.16, + "learning_rate": 1.8619827825604e-05, + "loss": 0.0553, + "step": 13660 + }, + { + "epoch": 0.16, + "learning_rate": 1.8615199481625476e-05, + "loss": 0.1025, + "step": 13662 + }, + { + "epoch": 0.16, + "learning_rate": 1.8610571137646952e-05, + "loss": 0.8116, + "step": 13664 + }, + { + "epoch": 0.16, + "learning_rate": 1.8605942793668427e-05, + "loss": 0.0281, + "step": 13666 + }, + { + "epoch": 0.16, + "learning_rate": 1.8601314449689903e-05, + "loss": 5.2014, + "step": 13668 + }, + { + "epoch": 0.16, + "learning_rate": 1.8596686105711378e-05, + "loss": 1.9461, + "step": 13670 + }, + { + "epoch": 0.16, + "learning_rate": 1.8592057761732854e-05, + "loss": 0.0005, + "step": 13672 + }, + { + "epoch": 0.16, + "learning_rate": 1.858742941775433e-05, + "loss": 3.2973, + "step": 13674 + }, + { + "epoch": 0.16, + "learning_rate": 1.8582801073775804e-05, + "loss": 4.205, + "step": 13676 + }, + { + "epoch": 0.16, + "learning_rate": 1.857817272979728e-05, + "loss": 3.4792, + "step": 13678 + }, + { + "epoch": 0.16, + "learning_rate": 1.8573544385818755e-05, + "loss": 2.1713, + "step": 13680 + }, + { + "epoch": 0.16, + "learning_rate": 1.856891604184023e-05, + "loss": 0.0022, + "step": 13682 + }, + { + "epoch": 0.16, + "learning_rate": 1.8564287697861706e-05, + "loss": 2.7074, + "step": 13684 + }, + { + "epoch": 0.16, + "learning_rate": 1.855965935388318e-05, + "loss": 4.5783, + "step": 13686 + }, + { + "epoch": 0.16, + "learning_rate": 1.8555031009904657e-05, + "loss": 2.8419, + "step": 13688 + }, + { + "epoch": 0.16, + "learning_rate": 1.8550402665926132e-05, + "loss": 4.0734, + "step": 13690 + }, + { + "epoch": 0.16, + "learning_rate": 1.8545774321947608e-05, + "loss": 2.8184, + "step": 13692 + }, + { + "epoch": 0.16, + "learning_rate": 1.8541145977969083e-05, + "loss": 9.5152, + "step": 13694 + }, + { + "epoch": 0.16, + "learning_rate": 1.853651763399056e-05, + "loss": 0.5262, + "step": 13696 + }, + { + "epoch": 0.16, + "learning_rate": 1.8531889290012034e-05, + "loss": 1.9495, + "step": 13698 + }, + { + "epoch": 0.16, + "learning_rate": 1.852726094603351e-05, + "loss": 5.6337, + "step": 13700 + }, + { + "epoch": 0.16, + "learning_rate": 1.8522632602054985e-05, + "loss": 6.1846, + "step": 13702 + }, + { + "epoch": 0.16, + "learning_rate": 1.851800425807646e-05, + "loss": 4.0793, + "step": 13704 + }, + { + "epoch": 0.16, + "learning_rate": 1.8513375914097936e-05, + "loss": 0.2953, + "step": 13706 + }, + { + "epoch": 0.16, + "learning_rate": 1.850874757011941e-05, + "loss": 1.2353, + "step": 13708 + }, + { + "epoch": 0.16, + "learning_rate": 1.8504119226140887e-05, + "loss": 0.1664, + "step": 13710 + }, + { + "epoch": 0.16, + "learning_rate": 1.8499490882162362e-05, + "loss": 1.1889, + "step": 13712 + }, + { + "epoch": 0.16, + "learning_rate": 1.8494862538183837e-05, + "loss": 0.004, + "step": 13714 + }, + { + "epoch": 0.16, + "learning_rate": 1.8490234194205313e-05, + "loss": 1.6534, + "step": 13716 + }, + { + "epoch": 0.16, + "learning_rate": 1.848560585022679e-05, + "loss": 0.355, + "step": 13718 + }, + { + "epoch": 0.16, + "learning_rate": 1.8480977506248264e-05, + "loss": 1.8386, + "step": 13720 + }, + { + "epoch": 0.16, + "learning_rate": 1.847634916226974e-05, + "loss": 0.0067, + "step": 13722 + }, + { + "epoch": 0.16, + "learning_rate": 1.8471720818291215e-05, + "loss": 0.0006, + "step": 13724 + }, + { + "epoch": 0.16, + "learning_rate": 1.846709247431269e-05, + "loss": 0.096, + "step": 13726 + }, + { + "epoch": 0.16, + "learning_rate": 1.846246413033417e-05, + "loss": 1.469, + "step": 13728 + }, + { + "epoch": 0.16, + "learning_rate": 1.8457835786355644e-05, + "loss": 0.0005, + "step": 13730 + }, + { + "epoch": 0.16, + "learning_rate": 1.845320744237712e-05, + "loss": 3.3645, + "step": 13732 + }, + { + "epoch": 0.16, + "learning_rate": 1.8448579098398595e-05, + "loss": 1.5309, + "step": 13734 + }, + { + "epoch": 0.16, + "learning_rate": 1.844395075442007e-05, + "loss": 3.5192, + "step": 13736 + }, + { + "epoch": 0.16, + "learning_rate": 1.8439322410441546e-05, + "loss": 1.0765, + "step": 13738 + }, + { + "epoch": 0.16, + "learning_rate": 1.843469406646302e-05, + "loss": 2.5575, + "step": 13740 + }, + { + "epoch": 0.16, + "learning_rate": 1.8430065722484497e-05, + "loss": 3.3089, + "step": 13742 + }, + { + "epoch": 0.16, + "learning_rate": 1.8425437378505972e-05, + "loss": 1.5296, + "step": 13744 + }, + { + "epoch": 0.16, + "learning_rate": 1.8420809034527448e-05, + "loss": 0.5082, + "step": 13746 + }, + { + "epoch": 0.16, + "learning_rate": 1.8416180690548923e-05, + "loss": 8.9516, + "step": 13748 + }, + { + "epoch": 0.16, + "learning_rate": 1.84115523465704e-05, + "loss": 1.272, + "step": 13750 + }, + { + "epoch": 0.16, + "learning_rate": 1.8406924002591874e-05, + "loss": 2.1876, + "step": 13752 + }, + { + "epoch": 0.16, + "learning_rate": 1.840229565861335e-05, + "loss": 3.1165, + "step": 13754 + }, + { + "epoch": 0.16, + "learning_rate": 1.8397667314634825e-05, + "loss": 2.5071, + "step": 13756 + }, + { + "epoch": 0.16, + "learning_rate": 1.83930389706563e-05, + "loss": 0.8418, + "step": 13758 + }, + { + "epoch": 0.16, + "learning_rate": 1.8388410626677776e-05, + "loss": 2.0806, + "step": 13760 + }, + { + "epoch": 0.16, + "learning_rate": 1.838378228269925e-05, + "loss": 1.7485, + "step": 13762 + }, + { + "epoch": 0.16, + "learning_rate": 1.8379153938720726e-05, + "loss": 0.0035, + "step": 13764 + }, + { + "epoch": 0.16, + "learning_rate": 1.8374525594742202e-05, + "loss": 0.5644, + "step": 13766 + }, + { + "epoch": 0.16, + "learning_rate": 1.8369897250763677e-05, + "loss": 1.09, + "step": 13768 + }, + { + "epoch": 0.16, + "learning_rate": 1.8365268906785153e-05, + "loss": 0.0922, + "step": 13770 + }, + { + "epoch": 0.16, + "learning_rate": 1.8360640562806628e-05, + "loss": 0.0333, + "step": 13772 + }, + { + "epoch": 0.16, + "learning_rate": 1.8356012218828104e-05, + "loss": 0.5531, + "step": 13774 + }, + { + "epoch": 0.16, + "learning_rate": 1.835138387484958e-05, + "loss": 0.5936, + "step": 13776 + }, + { + "epoch": 0.16, + "learning_rate": 1.8346755530871054e-05, + "loss": 1.3969, + "step": 13778 + }, + { + "epoch": 0.16, + "learning_rate": 1.834212718689253e-05, + "loss": 4.8334, + "step": 13780 + }, + { + "epoch": 0.16, + "learning_rate": 1.8337498842914005e-05, + "loss": 1.6664, + "step": 13782 + }, + { + "epoch": 0.16, + "learning_rate": 1.833287049893548e-05, + "loss": 2.933, + "step": 13784 + }, + { + "epoch": 0.16, + "learning_rate": 1.8328242154956956e-05, + "loss": 0.5058, + "step": 13786 + }, + { + "epoch": 0.16, + "learning_rate": 1.832361381097843e-05, + "loss": 2.9082, + "step": 13788 + }, + { + "epoch": 0.16, + "learning_rate": 1.8318985466999907e-05, + "loss": 0.6259, + "step": 13790 + }, + { + "epoch": 0.16, + "learning_rate": 1.8314357123021382e-05, + "loss": 0.0009, + "step": 13792 + }, + { + "epoch": 0.16, + "learning_rate": 1.8309728779042858e-05, + "loss": 0.7306, + "step": 13794 + }, + { + "epoch": 0.16, + "learning_rate": 1.8305100435064333e-05, + "loss": 0.128, + "step": 13796 + }, + { + "epoch": 0.16, + "learning_rate": 1.830047209108581e-05, + "loss": 0.1012, + "step": 13798 + }, + { + "epoch": 0.16, + "learning_rate": 1.8295843747107284e-05, + "loss": 2.7364, + "step": 13800 + }, + { + "epoch": 0.16, + "learning_rate": 1.8291215403128763e-05, + "loss": 8.0037, + "step": 13802 + }, + { + "epoch": 0.16, + "learning_rate": 1.828658705915024e-05, + "loss": 0.38, + "step": 13804 + }, + { + "epoch": 0.16, + "learning_rate": 1.8281958715171714e-05, + "loss": 4.2929, + "step": 13806 + }, + { + "epoch": 0.16, + "learning_rate": 1.827733037119319e-05, + "loss": 2.4929, + "step": 13808 + }, + { + "epoch": 0.16, + "learning_rate": 1.8272702027214665e-05, + "loss": 0.0006, + "step": 13810 + }, + { + "epoch": 0.16, + "learning_rate": 1.826807368323614e-05, + "loss": 2.191, + "step": 13812 + }, + { + "epoch": 0.16, + "learning_rate": 1.8263445339257615e-05, + "loss": 0.9902, + "step": 13814 + }, + { + "epoch": 0.16, + "learning_rate": 1.825881699527909e-05, + "loss": 0.0587, + "step": 13816 + }, + { + "epoch": 0.16, + "learning_rate": 1.8254188651300566e-05, + "loss": 1.1723, + "step": 13818 + }, + { + "epoch": 0.16, + "learning_rate": 1.8249560307322042e-05, + "loss": 0.0024, + "step": 13820 + }, + { + "epoch": 0.16, + "learning_rate": 1.8244931963343517e-05, + "loss": 2.1411, + "step": 13822 + }, + { + "epoch": 0.16, + "learning_rate": 1.8240303619364993e-05, + "loss": 1.1483, + "step": 13824 + }, + { + "epoch": 0.16, + "learning_rate": 1.8235675275386468e-05, + "loss": 0.679, + "step": 13826 + }, + { + "epoch": 0.16, + "learning_rate": 1.8231046931407943e-05, + "loss": 5.6413, + "step": 13828 + }, + { + "epoch": 0.16, + "learning_rate": 1.822641858742942e-05, + "loss": 1.399, + "step": 13830 + }, + { + "epoch": 0.16, + "learning_rate": 1.8221790243450894e-05, + "loss": 1.1885, + "step": 13832 + }, + { + "epoch": 0.16, + "learning_rate": 1.821716189947237e-05, + "loss": 2.2914, + "step": 13834 + }, + { + "epoch": 0.16, + "learning_rate": 1.8212533555493845e-05, + "loss": 0.331, + "step": 13836 + }, + { + "epoch": 0.16, + "learning_rate": 1.820790521151532e-05, + "loss": 1.0843, + "step": 13838 + }, + { + "epoch": 0.16, + "learning_rate": 1.8203276867536796e-05, + "loss": 0.7516, + "step": 13840 + }, + { + "epoch": 0.16, + "learning_rate": 1.819864852355827e-05, + "loss": 3.0404, + "step": 13842 + }, + { + "epoch": 0.16, + "learning_rate": 1.8194020179579747e-05, + "loss": 4.6834, + "step": 13844 + }, + { + "epoch": 0.16, + "learning_rate": 1.8189391835601222e-05, + "loss": 0.3035, + "step": 13846 + }, + { + "epoch": 0.16, + "learning_rate": 1.8184763491622698e-05, + "loss": 0.0032, + "step": 13848 + }, + { + "epoch": 0.16, + "learning_rate": 1.8180135147644173e-05, + "loss": 2.2313, + "step": 13850 + }, + { + "epoch": 0.16, + "learning_rate": 1.817550680366565e-05, + "loss": 0.031, + "step": 13852 + }, + { + "epoch": 0.16, + "learning_rate": 1.8170878459687124e-05, + "loss": 3.0287, + "step": 13854 + }, + { + "epoch": 0.16, + "learning_rate": 1.81662501157086e-05, + "loss": 0.1104, + "step": 13856 + }, + { + "epoch": 0.16, + "learning_rate": 1.8161621771730075e-05, + "loss": 0.3485, + "step": 13858 + }, + { + "epoch": 0.16, + "learning_rate": 1.815699342775155e-05, + "loss": 0.5355, + "step": 13860 + }, + { + "epoch": 0.16, + "learning_rate": 1.8152365083773026e-05, + "loss": 3.9581, + "step": 13862 + }, + { + "epoch": 0.16, + "learning_rate": 1.81477367397945e-05, + "loss": 0.0016, + "step": 13864 + }, + { + "epoch": 0.16, + "learning_rate": 1.8143108395815977e-05, + "loss": 0.0009, + "step": 13866 + }, + { + "epoch": 0.16, + "learning_rate": 1.8138480051837452e-05, + "loss": 0.6097, + "step": 13868 + }, + { + "epoch": 0.16, + "learning_rate": 1.8133851707858927e-05, + "loss": 0.526, + "step": 13870 + }, + { + "epoch": 0.16, + "learning_rate": 1.8129223363880403e-05, + "loss": 5.0417, + "step": 13872 + }, + { + "epoch": 0.16, + "learning_rate": 1.8124595019901878e-05, + "loss": 0.0045, + "step": 13874 + }, + { + "epoch": 0.16, + "learning_rate": 1.8119966675923357e-05, + "loss": 2.9541, + "step": 13876 + }, + { + "epoch": 0.16, + "learning_rate": 1.8115338331944832e-05, + "loss": 6.5442, + "step": 13878 + }, + { + "epoch": 0.16, + "learning_rate": 1.8110709987966308e-05, + "loss": 3.7948, + "step": 13880 + }, + { + "epoch": 0.16, + "learning_rate": 1.8106081643987783e-05, + "loss": 0.4933, + "step": 13882 + }, + { + "epoch": 0.16, + "learning_rate": 1.810145330000926e-05, + "loss": 0.227, + "step": 13884 + }, + { + "epoch": 0.16, + "learning_rate": 1.8096824956030734e-05, + "loss": 2.2246, + "step": 13886 + }, + { + "epoch": 0.16, + "learning_rate": 1.809219661205221e-05, + "loss": 8.2544, + "step": 13888 + }, + { + "epoch": 0.16, + "learning_rate": 1.8087568268073685e-05, + "loss": 4.5197, + "step": 13890 + }, + { + "epoch": 0.16, + "learning_rate": 1.808293992409516e-05, + "loss": 0.3681, + "step": 13892 + }, + { + "epoch": 0.16, + "learning_rate": 1.8078311580116636e-05, + "loss": 0.4646, + "step": 13894 + }, + { + "epoch": 0.16, + "learning_rate": 1.807368323613811e-05, + "loss": 0.6976, + "step": 13896 + }, + { + "epoch": 0.16, + "learning_rate": 1.8069054892159587e-05, + "loss": 3.012, + "step": 13898 + }, + { + "epoch": 0.16, + "learning_rate": 1.8064426548181062e-05, + "loss": 2.4573, + "step": 13900 + }, + { + "epoch": 0.16, + "learning_rate": 1.8059798204202538e-05, + "loss": 0.0005, + "step": 13902 + }, + { + "epoch": 0.16, + "learning_rate": 1.8055169860224013e-05, + "loss": 0.0065, + "step": 13904 + }, + { + "epoch": 0.16, + "learning_rate": 1.805054151624549e-05, + "loss": 1.3884, + "step": 13906 + }, + { + "epoch": 0.16, + "learning_rate": 1.8045913172266964e-05, + "loss": 3.2777, + "step": 13908 + }, + { + "epoch": 0.16, + "learning_rate": 1.804128482828844e-05, + "loss": 0.4445, + "step": 13910 + }, + { + "epoch": 0.16, + "learning_rate": 1.8036656484309915e-05, + "loss": 0.008, + "step": 13912 + }, + { + "epoch": 0.16, + "learning_rate": 1.803202814033139e-05, + "loss": 0.0562, + "step": 13914 + }, + { + "epoch": 0.16, + "learning_rate": 1.8027399796352866e-05, + "loss": 3.1283, + "step": 13916 + }, + { + "epoch": 0.16, + "learning_rate": 1.802277145237434e-05, + "loss": 0.1598, + "step": 13918 + }, + { + "epoch": 0.16, + "learning_rate": 1.8018143108395816e-05, + "loss": 0.7442, + "step": 13920 + }, + { + "epoch": 0.16, + "learning_rate": 1.8013514764417292e-05, + "loss": 0.7279, + "step": 13922 + }, + { + "epoch": 0.16, + "learning_rate": 1.8008886420438767e-05, + "loss": 5.597, + "step": 13924 + }, + { + "epoch": 0.16, + "learning_rate": 1.8004258076460243e-05, + "loss": 2.4086, + "step": 13926 + }, + { + "epoch": 0.16, + "learning_rate": 1.7999629732481718e-05, + "loss": 1.517, + "step": 13928 + }, + { + "epoch": 0.16, + "learning_rate": 1.7995001388503194e-05, + "loss": 0.0006, + "step": 13930 + }, + { + "epoch": 0.16, + "learning_rate": 1.799037304452467e-05, + "loss": 0.0005, + "step": 13932 + }, + { + "epoch": 0.16, + "learning_rate": 1.7985744700546144e-05, + "loss": 4.8246, + "step": 13934 + }, + { + "epoch": 0.16, + "learning_rate": 1.798111635656762e-05, + "loss": 2.6006, + "step": 13936 + }, + { + "epoch": 0.16, + "learning_rate": 1.7976488012589095e-05, + "loss": 0.1007, + "step": 13938 + }, + { + "epoch": 0.16, + "learning_rate": 1.797185966861057e-05, + "loss": 1.4297, + "step": 13940 + }, + { + "epoch": 0.16, + "learning_rate": 1.7967231324632046e-05, + "loss": 0.1441, + "step": 13942 + }, + { + "epoch": 0.16, + "learning_rate": 1.796260298065352e-05, + "loss": 1.8149, + "step": 13944 + }, + { + "epoch": 0.16, + "learning_rate": 1.7957974636674997e-05, + "loss": 1.6269, + "step": 13946 + }, + { + "epoch": 0.16, + "learning_rate": 1.7953346292696476e-05, + "loss": 0.0004, + "step": 13948 + }, + { + "epoch": 0.16, + "learning_rate": 1.794871794871795e-05, + "loss": 2.3885, + "step": 13950 + }, + { + "epoch": 0.16, + "learning_rate": 1.7944089604739427e-05, + "loss": 1.1743, + "step": 13952 + }, + { + "epoch": 0.16, + "learning_rate": 1.7939461260760902e-05, + "loss": 3.4835, + "step": 13954 + }, + { + "epoch": 0.16, + "learning_rate": 1.7934832916782377e-05, + "loss": 4.7977, + "step": 13956 + }, + { + "epoch": 0.16, + "learning_rate": 1.7930204572803853e-05, + "loss": 1.7986, + "step": 13958 + }, + { + "epoch": 0.16, + "learning_rate": 1.7925576228825328e-05, + "loss": 2.5674, + "step": 13960 + }, + { + "epoch": 0.16, + "learning_rate": 1.7920947884846804e-05, + "loss": 0.0008, + "step": 13962 + }, + { + "epoch": 0.16, + "learning_rate": 1.791631954086828e-05, + "loss": 0.0019, + "step": 13964 + }, + { + "epoch": 0.16, + "learning_rate": 1.7911691196889755e-05, + "loss": 1.8908, + "step": 13966 + }, + { + "epoch": 0.16, + "learning_rate": 1.790706285291123e-05, + "loss": 4.4968, + "step": 13968 + }, + { + "epoch": 0.16, + "learning_rate": 1.7902434508932705e-05, + "loss": 2.6313, + "step": 13970 + }, + { + "epoch": 0.16, + "learning_rate": 1.789780616495418e-05, + "loss": 4.3402, + "step": 13972 + }, + { + "epoch": 0.16, + "learning_rate": 1.7893177820975656e-05, + "loss": 1.689, + "step": 13974 + }, + { + "epoch": 0.16, + "learning_rate": 1.788854947699713e-05, + "loss": 3.3673, + "step": 13976 + }, + { + "epoch": 0.16, + "learning_rate": 1.7883921133018607e-05, + "loss": 3.8666, + "step": 13978 + }, + { + "epoch": 0.16, + "learning_rate": 1.7879292789040083e-05, + "loss": 0.0342, + "step": 13980 + }, + { + "epoch": 0.16, + "learning_rate": 1.7874664445061558e-05, + "loss": 5.9796, + "step": 13982 + }, + { + "epoch": 0.16, + "learning_rate": 1.7870036101083033e-05, + "loss": 3.1522, + "step": 13984 + }, + { + "epoch": 0.16, + "learning_rate": 1.786540775710451e-05, + "loss": 3.4608, + "step": 13986 + }, + { + "epoch": 0.16, + "learning_rate": 1.7860779413125984e-05, + "loss": 2.4209, + "step": 13988 + }, + { + "epoch": 0.16, + "learning_rate": 1.785615106914746e-05, + "loss": 0.0036, + "step": 13990 + }, + { + "epoch": 0.16, + "learning_rate": 1.7851522725168935e-05, + "loss": 0.084, + "step": 13992 + }, + { + "epoch": 0.16, + "learning_rate": 1.784689438119041e-05, + "loss": 0.273, + "step": 13994 + }, + { + "epoch": 0.16, + "learning_rate": 1.7842266037211886e-05, + "loss": 1.0989, + "step": 13996 + }, + { + "epoch": 0.16, + "learning_rate": 1.783763769323336e-05, + "loss": 0.5135, + "step": 13998 + }, + { + "epoch": 0.16, + "learning_rate": 1.7833009349254837e-05, + "loss": 5.2966, + "step": 14000 + }, + { + "epoch": 0.16, + "learning_rate": 1.7828381005276312e-05, + "loss": 1.5025, + "step": 14002 + }, + { + "epoch": 0.16, + "learning_rate": 1.7823752661297788e-05, + "loss": 2.2765, + "step": 14004 + }, + { + "epoch": 0.16, + "learning_rate": 1.7819124317319263e-05, + "loss": 3.2422, + "step": 14006 + }, + { + "epoch": 0.16, + "learning_rate": 1.781449597334074e-05, + "loss": 1.2087, + "step": 14008 + }, + { + "epoch": 0.16, + "learning_rate": 1.7809867629362214e-05, + "loss": 5.4052, + "step": 14010 + }, + { + "epoch": 0.16, + "learning_rate": 1.780523928538369e-05, + "loss": 2.9954, + "step": 14012 + }, + { + "epoch": 0.16, + "learning_rate": 1.7800610941405165e-05, + "loss": 2.333, + "step": 14014 + }, + { + "epoch": 0.16, + "learning_rate": 1.779598259742664e-05, + "loss": 3.5513, + "step": 14016 + }, + { + "epoch": 0.16, + "learning_rate": 1.7791354253448116e-05, + "loss": 0.834, + "step": 14018 + }, + { + "epoch": 0.16, + "learning_rate": 1.778672590946959e-05, + "loss": 0.5189, + "step": 14020 + }, + { + "epoch": 0.16, + "learning_rate": 1.778209756549107e-05, + "loss": 0.9156, + "step": 14022 + }, + { + "epoch": 0.16, + "learning_rate": 1.7777469221512545e-05, + "loss": 2.3368, + "step": 14024 + }, + { + "epoch": 0.16, + "learning_rate": 1.777284087753402e-05, + "loss": 2.5395, + "step": 14026 + }, + { + "epoch": 0.16, + "learning_rate": 1.7768212533555496e-05, + "loss": 2.3215, + "step": 14028 + }, + { + "epoch": 0.16, + "learning_rate": 1.776358418957697e-05, + "loss": 1.3904, + "step": 14030 + }, + { + "epoch": 0.16, + "learning_rate": 1.7758955845598447e-05, + "loss": 0.0558, + "step": 14032 + }, + { + "epoch": 0.16, + "learning_rate": 1.7754327501619922e-05, + "loss": 0.3764, + "step": 14034 + }, + { + "epoch": 0.16, + "learning_rate": 1.7749699157641398e-05, + "loss": 2.5968, + "step": 14036 + }, + { + "epoch": 0.16, + "learning_rate": 1.7745070813662873e-05, + "loss": 1.8432, + "step": 14038 + }, + { + "epoch": 0.16, + "learning_rate": 1.774044246968435e-05, + "loss": 6.6889, + "step": 14040 + }, + { + "epoch": 0.16, + "learning_rate": 1.7735814125705824e-05, + "loss": 2.1079, + "step": 14042 + }, + { + "epoch": 0.16, + "learning_rate": 1.77311857817273e-05, + "loss": 0.243, + "step": 14044 + }, + { + "epoch": 0.16, + "learning_rate": 1.7726557437748775e-05, + "loss": 0.0754, + "step": 14046 + }, + { + "epoch": 0.16, + "learning_rate": 1.772192909377025e-05, + "loss": 0.0792, + "step": 14048 + }, + { + "epoch": 0.16, + "learning_rate": 1.7717300749791726e-05, + "loss": 2.5114, + "step": 14050 + }, + { + "epoch": 0.16, + "learning_rate": 1.77126724058132e-05, + "loss": 2.5162, + "step": 14052 + }, + { + "epoch": 0.16, + "learning_rate": 1.7708044061834677e-05, + "loss": 1.3254, + "step": 14054 + }, + { + "epoch": 0.16, + "learning_rate": 1.7703415717856152e-05, + "loss": 1.4562, + "step": 14056 + }, + { + "epoch": 0.16, + "learning_rate": 1.7698787373877627e-05, + "loss": 2.6882, + "step": 14058 + }, + { + "epoch": 0.16, + "learning_rate": 1.7694159029899103e-05, + "loss": 4.1705, + "step": 14060 + }, + { + "epoch": 0.16, + "learning_rate": 1.768953068592058e-05, + "loss": 5.2712, + "step": 14062 + }, + { + "epoch": 0.16, + "learning_rate": 1.7684902341942054e-05, + "loss": 2.4092, + "step": 14064 + }, + { + "epoch": 0.16, + "learning_rate": 1.768027399796353e-05, + "loss": 1.7373, + "step": 14066 + }, + { + "epoch": 0.16, + "learning_rate": 1.7675645653985005e-05, + "loss": 4.2599, + "step": 14068 + }, + { + "epoch": 0.16, + "learning_rate": 1.767101731000648e-05, + "loss": 0.0008, + "step": 14070 + }, + { + "epoch": 0.16, + "learning_rate": 1.7666388966027955e-05, + "loss": 2.8173, + "step": 14072 + }, + { + "epoch": 0.16, + "learning_rate": 1.766176062204943e-05, + "loss": 2.1836, + "step": 14074 + }, + { + "epoch": 0.16, + "learning_rate": 1.7657132278070906e-05, + "loss": 1.623, + "step": 14076 + }, + { + "epoch": 0.16, + "learning_rate": 1.7652503934092382e-05, + "loss": 3.3051, + "step": 14078 + }, + { + "epoch": 0.16, + "learning_rate": 1.7647875590113857e-05, + "loss": 0.0016, + "step": 14080 + }, + { + "epoch": 0.16, + "learning_rate": 1.7643247246135333e-05, + "loss": 1.4861, + "step": 14082 + }, + { + "epoch": 0.16, + "learning_rate": 1.7638618902156808e-05, + "loss": 1.7357, + "step": 14084 + }, + { + "epoch": 0.16, + "learning_rate": 1.7633990558178283e-05, + "loss": 1.275, + "step": 14086 + }, + { + "epoch": 0.16, + "learning_rate": 1.762936221419976e-05, + "loss": 0.2014, + "step": 14088 + }, + { + "epoch": 0.16, + "learning_rate": 1.7624733870221234e-05, + "loss": 2.9736, + "step": 14090 + }, + { + "epoch": 0.16, + "learning_rate": 1.762010552624271e-05, + "loss": 2.7779, + "step": 14092 + }, + { + "epoch": 0.16, + "learning_rate": 1.7615477182264185e-05, + "loss": 2.2258, + "step": 14094 + }, + { + "epoch": 0.16, + "learning_rate": 1.7610848838285664e-05, + "loss": 1.8074, + "step": 14096 + }, + { + "epoch": 0.16, + "learning_rate": 1.760622049430714e-05, + "loss": 1.2776, + "step": 14098 + }, + { + "epoch": 0.16, + "learning_rate": 1.7601592150328615e-05, + "loss": 0.0108, + "step": 14100 + }, + { + "epoch": 0.16, + "learning_rate": 1.759696380635009e-05, + "loss": 0.4278, + "step": 14102 + }, + { + "epoch": 0.16, + "learning_rate": 1.7592335462371566e-05, + "loss": 0.4535, + "step": 14104 + }, + { + "epoch": 0.16, + "learning_rate": 1.758770711839304e-05, + "loss": 0.003, + "step": 14106 + }, + { + "epoch": 0.16, + "learning_rate": 1.7583078774414516e-05, + "loss": 1.889, + "step": 14108 + }, + { + "epoch": 0.16, + "learning_rate": 1.7578450430435992e-05, + "loss": 0.0005, + "step": 14110 + }, + { + "epoch": 0.16, + "learning_rate": 1.7573822086457467e-05, + "loss": 0.3605, + "step": 14112 + }, + { + "epoch": 0.16, + "learning_rate": 1.7569193742478943e-05, + "loss": 0.0007, + "step": 14114 + }, + { + "epoch": 0.16, + "learning_rate": 1.7564565398500418e-05, + "loss": 1.0247, + "step": 14116 + }, + { + "epoch": 0.16, + "learning_rate": 1.7559937054521894e-05, + "loss": 0.4573, + "step": 14118 + }, + { + "epoch": 0.16, + "learning_rate": 1.755530871054337e-05, + "loss": 1.6794, + "step": 14120 + }, + { + "epoch": 0.16, + "learning_rate": 1.7550680366564844e-05, + "loss": 2.1704, + "step": 14122 + }, + { + "epoch": 0.16, + "learning_rate": 1.754605202258632e-05, + "loss": 4.0895, + "step": 14124 + }, + { + "epoch": 0.16, + "learning_rate": 1.7541423678607795e-05, + "loss": 0.8793, + "step": 14126 + }, + { + "epoch": 0.16, + "learning_rate": 1.753679533462927e-05, + "loss": 2.5844, + "step": 14128 + }, + { + "epoch": 0.16, + "learning_rate": 1.7532166990650746e-05, + "loss": 0.2051, + "step": 14130 + }, + { + "epoch": 0.16, + "learning_rate": 1.752753864667222e-05, + "loss": 2.2902, + "step": 14132 + }, + { + "epoch": 0.16, + "learning_rate": 1.7522910302693697e-05, + "loss": 4.0179, + "step": 14134 + }, + { + "epoch": 0.16, + "learning_rate": 1.7518281958715172e-05, + "loss": 0.0017, + "step": 14136 + }, + { + "epoch": 0.16, + "learning_rate": 1.7513653614736648e-05, + "loss": 4.8414, + "step": 14138 + }, + { + "epoch": 0.16, + "learning_rate": 1.7509025270758123e-05, + "loss": 2.2602, + "step": 14140 + }, + { + "epoch": 0.16, + "learning_rate": 1.75043969267796e-05, + "loss": 1.85, + "step": 14142 + }, + { + "epoch": 0.16, + "learning_rate": 1.7499768582801074e-05, + "loss": 6.2589, + "step": 14144 + }, + { + "epoch": 0.16, + "learning_rate": 1.749514023882255e-05, + "loss": 1.1886, + "step": 14146 + }, + { + "epoch": 0.16, + "learning_rate": 1.7490511894844025e-05, + "loss": 0.6075, + "step": 14148 + }, + { + "epoch": 0.16, + "learning_rate": 1.74858835508655e-05, + "loss": 0.9095, + "step": 14150 + }, + { + "epoch": 0.16, + "learning_rate": 1.7481255206886976e-05, + "loss": 2.2343, + "step": 14152 + }, + { + "epoch": 0.16, + "learning_rate": 1.747662686290845e-05, + "loss": 0.4552, + "step": 14154 + }, + { + "epoch": 0.16, + "learning_rate": 1.7471998518929927e-05, + "loss": 1.708, + "step": 14156 + }, + { + "epoch": 0.16, + "learning_rate": 1.7467370174951402e-05, + "loss": 0.042, + "step": 14158 + }, + { + "epoch": 0.16, + "learning_rate": 1.7462741830972877e-05, + "loss": 2.2137, + "step": 14160 + }, + { + "epoch": 0.16, + "learning_rate": 1.7458113486994353e-05, + "loss": 3.0299, + "step": 14162 + }, + { + "epoch": 0.16, + "learning_rate": 1.745348514301583e-05, + "loss": 1.7795, + "step": 14164 + }, + { + "epoch": 0.16, + "learning_rate": 1.7448856799037304e-05, + "loss": 0.8973, + "step": 14166 + }, + { + "epoch": 0.16, + "learning_rate": 1.7444228455058783e-05, + "loss": 1.7542, + "step": 14168 + }, + { + "epoch": 0.16, + "learning_rate": 1.7439600111080258e-05, + "loss": 0.6673, + "step": 14170 + }, + { + "epoch": 0.16, + "learning_rate": 1.7434971767101733e-05, + "loss": 4.5049, + "step": 14172 + }, + { + "epoch": 0.16, + "learning_rate": 1.743034342312321e-05, + "loss": 0.1665, + "step": 14174 + }, + { + "epoch": 0.16, + "learning_rate": 1.7425715079144684e-05, + "loss": 0.4135, + "step": 14176 + }, + { + "epoch": 0.16, + "learning_rate": 1.742108673516616e-05, + "loss": 1.6589, + "step": 14178 + }, + { + "epoch": 0.16, + "learning_rate": 1.7416458391187635e-05, + "loss": 3.176, + "step": 14180 + }, + { + "epoch": 0.16, + "learning_rate": 1.741183004720911e-05, + "loss": 0.3027, + "step": 14182 + }, + { + "epoch": 0.16, + "learning_rate": 1.7407201703230586e-05, + "loss": 1.937, + "step": 14184 + }, + { + "epoch": 0.16, + "learning_rate": 1.740257335925206e-05, + "loss": 0.9323, + "step": 14186 + }, + { + "epoch": 0.16, + "learning_rate": 1.7397945015273537e-05, + "loss": 1.0712, + "step": 14188 + }, + { + "epoch": 0.16, + "learning_rate": 1.7393316671295012e-05, + "loss": 5.5913, + "step": 14190 + }, + { + "epoch": 0.16, + "learning_rate": 1.7388688327316488e-05, + "loss": 0.455, + "step": 14192 + }, + { + "epoch": 0.16, + "learning_rate": 1.7384059983337963e-05, + "loss": 3.7109, + "step": 14194 + }, + { + "epoch": 0.16, + "learning_rate": 1.737943163935944e-05, + "loss": 1.6452, + "step": 14196 + }, + { + "epoch": 0.16, + "learning_rate": 1.7374803295380914e-05, + "loss": 0.8799, + "step": 14198 + }, + { + "epoch": 0.16, + "learning_rate": 1.737017495140239e-05, + "loss": 2.719, + "step": 14200 + }, + { + "epoch": 0.16, + "learning_rate": 1.7365546607423865e-05, + "loss": 1.7441, + "step": 14202 + }, + { + "epoch": 0.16, + "learning_rate": 1.736091826344534e-05, + "loss": 0.4166, + "step": 14204 + }, + { + "epoch": 0.16, + "learning_rate": 1.7356289919466816e-05, + "loss": 2.2928, + "step": 14206 + }, + { + "epoch": 0.16, + "learning_rate": 1.735166157548829e-05, + "loss": 0.6742, + "step": 14208 + }, + { + "epoch": 0.16, + "learning_rate": 1.7347033231509767e-05, + "loss": 3.7499, + "step": 14210 + }, + { + "epoch": 0.16, + "learning_rate": 1.7342404887531242e-05, + "loss": 3.6805, + "step": 14212 + }, + { + "epoch": 0.16, + "learning_rate": 1.7337776543552717e-05, + "loss": 0.1114, + "step": 14214 + }, + { + "epoch": 0.16, + "learning_rate": 1.7333148199574193e-05, + "loss": 5.7093, + "step": 14216 + }, + { + "epoch": 0.16, + "learning_rate": 1.7328519855595668e-05, + "loss": 2.7732, + "step": 14218 + }, + { + "epoch": 0.16, + "learning_rate": 1.7323891511617144e-05, + "loss": 4.7145, + "step": 14220 + }, + { + "epoch": 0.16, + "learning_rate": 1.731926316763862e-05, + "loss": 0.4944, + "step": 14222 + }, + { + "epoch": 0.16, + "learning_rate": 1.7314634823660094e-05, + "loss": 1.701, + "step": 14224 + }, + { + "epoch": 0.16, + "learning_rate": 1.731000647968157e-05, + "loss": 1.7965, + "step": 14226 + }, + { + "epoch": 0.16, + "learning_rate": 1.7305378135703045e-05, + "loss": 0.8086, + "step": 14228 + }, + { + "epoch": 0.16, + "learning_rate": 1.730074979172452e-05, + "loss": 1.4546, + "step": 14230 + }, + { + "epoch": 0.16, + "learning_rate": 1.7296121447745996e-05, + "loss": 0.0336, + "step": 14232 + }, + { + "epoch": 0.16, + "learning_rate": 1.729149310376747e-05, + "loss": 2.2752, + "step": 14234 + }, + { + "epoch": 0.16, + "learning_rate": 1.7286864759788947e-05, + "loss": 0.4761, + "step": 14236 + }, + { + "epoch": 0.16, + "learning_rate": 1.7282236415810422e-05, + "loss": 0.0008, + "step": 14238 + }, + { + "epoch": 0.16, + "learning_rate": 1.7277608071831898e-05, + "loss": 4.2546, + "step": 14240 + }, + { + "epoch": 0.16, + "learning_rate": 1.7272979727853377e-05, + "loss": 1.3129, + "step": 14242 + }, + { + "epoch": 0.16, + "learning_rate": 1.7268351383874852e-05, + "loss": 3.3543, + "step": 14244 + }, + { + "epoch": 0.16, + "learning_rate": 1.7263723039896328e-05, + "loss": 0.6315, + "step": 14246 + }, + { + "epoch": 0.16, + "learning_rate": 1.7259094695917803e-05, + "loss": 2.9811, + "step": 14248 + }, + { + "epoch": 0.16, + "learning_rate": 1.725446635193928e-05, + "loss": 3.6715, + "step": 14250 + }, + { + "epoch": 0.16, + "learning_rate": 1.7249838007960754e-05, + "loss": 0.0035, + "step": 14252 + }, + { + "epoch": 0.16, + "learning_rate": 1.724520966398223e-05, + "loss": 0.1769, + "step": 14254 + }, + { + "epoch": 0.16, + "learning_rate": 1.7240581320003705e-05, + "loss": 0.0534, + "step": 14256 + }, + { + "epoch": 0.16, + "learning_rate": 1.723595297602518e-05, + "loss": 0.7155, + "step": 14258 + }, + { + "epoch": 0.16, + "learning_rate": 1.7231324632046656e-05, + "loss": 3.239, + "step": 14260 + }, + { + "epoch": 0.16, + "learning_rate": 1.722669628806813e-05, + "loss": 5.7438, + "step": 14262 + }, + { + "epoch": 0.16, + "learning_rate": 1.7222067944089606e-05, + "loss": 0.4151, + "step": 14264 + }, + { + "epoch": 0.16, + "learning_rate": 1.7217439600111082e-05, + "loss": 0.0713, + "step": 14266 + }, + { + "epoch": 0.16, + "learning_rate": 1.7212811256132557e-05, + "loss": 5.3267, + "step": 14268 + }, + { + "epoch": 0.16, + "learning_rate": 1.7208182912154033e-05, + "loss": 1.5913, + "step": 14270 + }, + { + "epoch": 0.16, + "learning_rate": 1.7203554568175508e-05, + "loss": 1.466, + "step": 14272 + }, + { + "epoch": 0.16, + "learning_rate": 1.7198926224196983e-05, + "loss": 3.3951, + "step": 14274 + }, + { + "epoch": 0.16, + "learning_rate": 1.719429788021846e-05, + "loss": 0.2657, + "step": 14276 + }, + { + "epoch": 0.16, + "learning_rate": 1.7189669536239934e-05, + "loss": 1.6616, + "step": 14278 + }, + { + "epoch": 0.16, + "learning_rate": 1.718504119226141e-05, + "loss": 3.9433, + "step": 14280 + }, + { + "epoch": 0.16, + "learning_rate": 1.7180412848282885e-05, + "loss": 1.3582, + "step": 14282 + }, + { + "epoch": 0.16, + "learning_rate": 1.717578450430436e-05, + "loss": 2.9668, + "step": 14284 + }, + { + "epoch": 0.16, + "learning_rate": 1.7171156160325836e-05, + "loss": 2.3829, + "step": 14286 + }, + { + "epoch": 0.16, + "learning_rate": 1.716652781634731e-05, + "loss": 1.9184, + "step": 14288 + }, + { + "epoch": 0.16, + "learning_rate": 1.7161899472368787e-05, + "loss": 4.6674, + "step": 14290 + }, + { + "epoch": 0.16, + "learning_rate": 1.7157271128390262e-05, + "loss": 1.0855, + "step": 14292 + }, + { + "epoch": 0.16, + "learning_rate": 1.7152642784411738e-05, + "loss": 0.0407, + "step": 14294 + }, + { + "epoch": 0.16, + "learning_rate": 1.7148014440433213e-05, + "loss": 1.6122, + "step": 14296 + }, + { + "epoch": 0.16, + "learning_rate": 1.714338609645469e-05, + "loss": 6.2775, + "step": 14298 + }, + { + "epoch": 0.16, + "learning_rate": 1.7138757752476164e-05, + "loss": 0.0018, + "step": 14300 + }, + { + "epoch": 0.16, + "learning_rate": 1.713412940849764e-05, + "loss": 1.5273, + "step": 14302 + }, + { + "epoch": 0.16, + "learning_rate": 1.7129501064519115e-05, + "loss": 0.3795, + "step": 14304 + }, + { + "epoch": 0.16, + "learning_rate": 1.712487272054059e-05, + "loss": 4.8135, + "step": 14306 + }, + { + "epoch": 0.16, + "learning_rate": 1.7120244376562066e-05, + "loss": 2.4487, + "step": 14308 + }, + { + "epoch": 0.16, + "learning_rate": 1.711561603258354e-05, + "loss": 0.6626, + "step": 14310 + }, + { + "epoch": 0.16, + "learning_rate": 1.7110987688605017e-05, + "loss": 0.003, + "step": 14312 + }, + { + "epoch": 0.16, + "learning_rate": 1.7106359344626492e-05, + "loss": 3.9032, + "step": 14314 + }, + { + "epoch": 0.16, + "learning_rate": 1.710173100064797e-05, + "loss": 5.8713, + "step": 14316 + }, + { + "epoch": 0.16, + "learning_rate": 1.7097102656669446e-05, + "loss": 7.7147, + "step": 14318 + }, + { + "epoch": 0.16, + "learning_rate": 1.709247431269092e-05, + "loss": 1.2029, + "step": 14320 + }, + { + "epoch": 0.16, + "learning_rate": 1.7087845968712397e-05, + "loss": 0.0137, + "step": 14322 + }, + { + "epoch": 0.16, + "learning_rate": 1.7083217624733872e-05, + "loss": 1.214, + "step": 14324 + }, + { + "epoch": 0.17, + "learning_rate": 1.7078589280755348e-05, + "loss": 0.0131, + "step": 14326 + }, + { + "epoch": 0.17, + "learning_rate": 1.7073960936776823e-05, + "loss": 1.149, + "step": 14328 + }, + { + "epoch": 0.17, + "learning_rate": 1.70693325927983e-05, + "loss": 0.0121, + "step": 14330 + }, + { + "epoch": 0.17, + "learning_rate": 1.7064704248819774e-05, + "loss": 1.4038, + "step": 14332 + }, + { + "epoch": 0.17, + "learning_rate": 1.706007590484125e-05, + "loss": 0.805, + "step": 14334 + }, + { + "epoch": 0.17, + "learning_rate": 1.7055447560862725e-05, + "loss": 0.6927, + "step": 14336 + }, + { + "epoch": 0.17, + "learning_rate": 1.70508192168842e-05, + "loss": 0.961, + "step": 14338 + }, + { + "epoch": 0.17, + "learning_rate": 1.7046190872905676e-05, + "loss": 1.6342, + "step": 14340 + }, + { + "epoch": 0.17, + "learning_rate": 1.704156252892715e-05, + "loss": 0.0606, + "step": 14342 + }, + { + "epoch": 0.17, + "learning_rate": 1.7036934184948627e-05, + "loss": 0.3379, + "step": 14344 + }, + { + "epoch": 0.17, + "learning_rate": 1.7032305840970102e-05, + "loss": 0.6071, + "step": 14346 + }, + { + "epoch": 0.17, + "learning_rate": 1.7027677496991578e-05, + "loss": 0.0979, + "step": 14348 + }, + { + "epoch": 0.17, + "learning_rate": 1.7023049153013053e-05, + "loss": 0.1134, + "step": 14350 + }, + { + "epoch": 0.17, + "learning_rate": 1.701842080903453e-05, + "loss": 0.0223, + "step": 14352 + }, + { + "epoch": 0.17, + "learning_rate": 1.7013792465056004e-05, + "loss": 0.6196, + "step": 14354 + }, + { + "epoch": 0.17, + "learning_rate": 1.700916412107748e-05, + "loss": 0.6496, + "step": 14356 + }, + { + "epoch": 0.17, + "learning_rate": 1.7004535777098955e-05, + "loss": 5.8406, + "step": 14358 + }, + { + "epoch": 0.17, + "learning_rate": 1.699990743312043e-05, + "loss": 0.3483, + "step": 14360 + }, + { + "epoch": 0.17, + "learning_rate": 1.6995279089141906e-05, + "loss": 0.371, + "step": 14362 + }, + { + "epoch": 0.17, + "learning_rate": 1.699065074516338e-05, + "loss": 0.9759, + "step": 14364 + }, + { + "epoch": 0.17, + "learning_rate": 1.6986022401184856e-05, + "loss": 0.513, + "step": 14366 + }, + { + "epoch": 0.17, + "learning_rate": 1.6981394057206332e-05, + "loss": 0.024, + "step": 14368 + }, + { + "epoch": 0.17, + "learning_rate": 1.6976765713227807e-05, + "loss": 4.3694, + "step": 14370 + }, + { + "epoch": 0.17, + "learning_rate": 1.6972137369249283e-05, + "loss": 1.6888, + "step": 14372 + }, + { + "epoch": 0.17, + "learning_rate": 1.6967509025270758e-05, + "loss": 2.7927, + "step": 14374 + }, + { + "epoch": 0.17, + "learning_rate": 1.6962880681292234e-05, + "loss": 0.0003, + "step": 14376 + }, + { + "epoch": 0.17, + "learning_rate": 1.695825233731371e-05, + "loss": 3.7456, + "step": 14378 + }, + { + "epoch": 0.17, + "learning_rate": 1.6953623993335184e-05, + "loss": 7.3766, + "step": 14380 + }, + { + "epoch": 0.17, + "learning_rate": 1.694899564935666e-05, + "loss": 0.4556, + "step": 14382 + }, + { + "epoch": 0.17, + "learning_rate": 1.6944367305378135e-05, + "loss": 4.9924, + "step": 14384 + }, + { + "epoch": 0.17, + "learning_rate": 1.693973896139961e-05, + "loss": 0.0059, + "step": 14386 + }, + { + "epoch": 0.17, + "learning_rate": 1.6935110617421086e-05, + "loss": 0.637, + "step": 14388 + }, + { + "epoch": 0.17, + "learning_rate": 1.6930482273442565e-05, + "loss": 1.8404, + "step": 14390 + }, + { + "epoch": 0.17, + "learning_rate": 1.692585392946404e-05, + "loss": 0.1671, + "step": 14392 + }, + { + "epoch": 0.17, + "learning_rate": 1.6921225585485516e-05, + "loss": 2.9233, + "step": 14394 + }, + { + "epoch": 0.17, + "learning_rate": 1.691659724150699e-05, + "loss": 3.4532, + "step": 14396 + }, + { + "epoch": 0.17, + "learning_rate": 1.6911968897528467e-05, + "loss": 8.511, + "step": 14398 + }, + { + "epoch": 0.17, + "learning_rate": 1.6907340553549942e-05, + "loss": 1.9012, + "step": 14400 + }, + { + "epoch": 0.17, + "learning_rate": 1.6902712209571417e-05, + "loss": 0.0021, + "step": 14402 + }, + { + "epoch": 0.17, + "learning_rate": 1.6898083865592893e-05, + "loss": 2.4593, + "step": 14404 + }, + { + "epoch": 0.17, + "learning_rate": 1.6893455521614368e-05, + "loss": 4.9771, + "step": 14406 + }, + { + "epoch": 0.17, + "learning_rate": 1.6888827177635844e-05, + "loss": 0.687, + "step": 14408 + }, + { + "epoch": 0.17, + "learning_rate": 1.688419883365732e-05, + "loss": 3.7834, + "step": 14410 + }, + { + "epoch": 0.17, + "learning_rate": 1.6879570489678795e-05, + "loss": 0.2298, + "step": 14412 + }, + { + "epoch": 0.17, + "learning_rate": 1.687494214570027e-05, + "loss": 0.0003, + "step": 14414 + }, + { + "epoch": 0.17, + "learning_rate": 1.6870313801721745e-05, + "loss": 2.4668, + "step": 14416 + }, + { + "epoch": 0.17, + "learning_rate": 1.686568545774322e-05, + "loss": 4.7613, + "step": 14418 + }, + { + "epoch": 0.17, + "learning_rate": 1.6861057113764693e-05, + "loss": 4.0283, + "step": 14420 + }, + { + "epoch": 0.17, + "learning_rate": 1.685642876978617e-05, + "loss": 0.9501, + "step": 14422 + }, + { + "epoch": 0.17, + "learning_rate": 1.6851800425807647e-05, + "loss": 0.5037, + "step": 14424 + }, + { + "epoch": 0.17, + "learning_rate": 1.6847172081829123e-05, + "loss": 0.4629, + "step": 14426 + }, + { + "epoch": 0.17, + "learning_rate": 1.6842543737850598e-05, + "loss": 2.6493, + "step": 14428 + }, + { + "epoch": 0.17, + "learning_rate": 1.6837915393872073e-05, + "loss": 0.3727, + "step": 14430 + }, + { + "epoch": 0.17, + "learning_rate": 1.683328704989355e-05, + "loss": 1.3007, + "step": 14432 + }, + { + "epoch": 0.17, + "learning_rate": 1.6828658705915024e-05, + "loss": 0.5354, + "step": 14434 + }, + { + "epoch": 0.17, + "learning_rate": 1.68240303619365e-05, + "loss": 5.4703, + "step": 14436 + }, + { + "epoch": 0.17, + "learning_rate": 1.6819402017957975e-05, + "loss": 0.9454, + "step": 14438 + }, + { + "epoch": 0.17, + "learning_rate": 1.681477367397945e-05, + "loss": 0.0902, + "step": 14440 + }, + { + "epoch": 0.17, + "learning_rate": 1.6810145330000926e-05, + "loss": 1.3643, + "step": 14442 + }, + { + "epoch": 0.17, + "learning_rate": 1.68055169860224e-05, + "loss": 2.4239, + "step": 14444 + }, + { + "epoch": 0.17, + "learning_rate": 1.6800888642043877e-05, + "loss": 1.1963, + "step": 14446 + }, + { + "epoch": 0.17, + "learning_rate": 1.6796260298065352e-05, + "loss": 1.0935, + "step": 14448 + }, + { + "epoch": 0.17, + "learning_rate": 1.6791631954086828e-05, + "loss": 0.4362, + "step": 14450 + }, + { + "epoch": 0.17, + "learning_rate": 1.6787003610108303e-05, + "loss": 0.6111, + "step": 14452 + }, + { + "epoch": 0.17, + "learning_rate": 1.678237526612978e-05, + "loss": 0.2667, + "step": 14454 + }, + { + "epoch": 0.17, + "learning_rate": 1.6777746922151254e-05, + "loss": 6.5233, + "step": 14456 + }, + { + "epoch": 0.17, + "learning_rate": 1.677311857817273e-05, + "loss": 0.4211, + "step": 14458 + }, + { + "epoch": 0.17, + "learning_rate": 1.6768490234194205e-05, + "loss": 2.611, + "step": 14460 + }, + { + "epoch": 0.17, + "learning_rate": 1.6763861890215684e-05, + "loss": 0.0024, + "step": 14462 + }, + { + "epoch": 0.17, + "learning_rate": 1.675923354623716e-05, + "loss": 0.7943, + "step": 14464 + }, + { + "epoch": 0.17, + "learning_rate": 1.6754605202258634e-05, + "loss": 3.336, + "step": 14466 + }, + { + "epoch": 0.17, + "learning_rate": 1.674997685828011e-05, + "loss": 0.0063, + "step": 14468 + }, + { + "epoch": 0.17, + "learning_rate": 1.6745348514301585e-05, + "loss": 0.4712, + "step": 14470 + }, + { + "epoch": 0.17, + "learning_rate": 1.674072017032306e-05, + "loss": 0.0798, + "step": 14472 + }, + { + "epoch": 0.17, + "learning_rate": 1.6736091826344536e-05, + "loss": 1.7138, + "step": 14474 + }, + { + "epoch": 0.17, + "learning_rate": 1.673146348236601e-05, + "loss": 7.8274, + "step": 14476 + }, + { + "epoch": 0.17, + "learning_rate": 1.6726835138387487e-05, + "loss": 5.034, + "step": 14478 + }, + { + "epoch": 0.17, + "learning_rate": 1.6722206794408962e-05, + "loss": 2.2502, + "step": 14480 + }, + { + "epoch": 0.17, + "learning_rate": 1.6717578450430438e-05, + "loss": 1.1995, + "step": 14482 + }, + { + "epoch": 0.17, + "learning_rate": 1.6712950106451913e-05, + "loss": 0.7682, + "step": 14484 + }, + { + "epoch": 0.17, + "learning_rate": 1.670832176247339e-05, + "loss": 4.1127, + "step": 14486 + }, + { + "epoch": 0.17, + "learning_rate": 1.6703693418494864e-05, + "loss": 1.9902, + "step": 14488 + }, + { + "epoch": 0.17, + "learning_rate": 1.669906507451634e-05, + "loss": 2.3483, + "step": 14490 + }, + { + "epoch": 0.17, + "learning_rate": 1.669443673053781e-05, + "loss": 1.8379, + "step": 14492 + }, + { + "epoch": 0.17, + "learning_rate": 1.6689808386559287e-05, + "loss": 4.3483, + "step": 14494 + }, + { + "epoch": 0.17, + "learning_rate": 1.6685180042580766e-05, + "loss": 0.0016, + "step": 14496 + }, + { + "epoch": 0.17, + "learning_rate": 1.668055169860224e-05, + "loss": 1.3972, + "step": 14498 + }, + { + "epoch": 0.17, + "learning_rate": 1.6675923354623717e-05, + "loss": 0.1004, + "step": 14500 + }, + { + "epoch": 0.17, + "learning_rate": 1.6671295010645192e-05, + "loss": 0.1341, + "step": 14502 + }, + { + "epoch": 0.17, + "learning_rate": 1.6666666666666667e-05, + "loss": 2.1009, + "step": 14504 + }, + { + "epoch": 0.17, + "learning_rate": 1.6662038322688143e-05, + "loss": 6.019, + "step": 14506 + }, + { + "epoch": 0.17, + "learning_rate": 1.665740997870962e-05, + "loss": 3.1944, + "step": 14508 + }, + { + "epoch": 0.17, + "learning_rate": 1.6652781634731094e-05, + "loss": 0.1878, + "step": 14510 + }, + { + "epoch": 0.17, + "learning_rate": 1.664815329075257e-05, + "loss": 3.0428, + "step": 14512 + }, + { + "epoch": 0.17, + "learning_rate": 1.6643524946774045e-05, + "loss": 4.2022, + "step": 14514 + }, + { + "epoch": 0.17, + "learning_rate": 1.663889660279552e-05, + "loss": 1.8916, + "step": 14516 + }, + { + "epoch": 0.17, + "learning_rate": 1.6634268258816995e-05, + "loss": 0.7287, + "step": 14518 + }, + { + "epoch": 0.17, + "learning_rate": 1.662963991483847e-05, + "loss": 2.567, + "step": 14520 + }, + { + "epoch": 0.17, + "learning_rate": 1.6625011570859946e-05, + "loss": 0.0839, + "step": 14522 + }, + { + "epoch": 0.17, + "learning_rate": 1.6620383226881422e-05, + "loss": 2.2278, + "step": 14524 + }, + { + "epoch": 0.17, + "learning_rate": 1.6615754882902897e-05, + "loss": 0.792, + "step": 14526 + }, + { + "epoch": 0.17, + "learning_rate": 1.6611126538924373e-05, + "loss": 2.7499, + "step": 14528 + }, + { + "epoch": 0.17, + "learning_rate": 1.6606498194945848e-05, + "loss": 5.141, + "step": 14530 + }, + { + "epoch": 0.17, + "learning_rate": 1.6601869850967323e-05, + "loss": 0.1278, + "step": 14532 + }, + { + "epoch": 0.17, + "learning_rate": 1.65972415069888e-05, + "loss": 1.784, + "step": 14534 + }, + { + "epoch": 0.17, + "learning_rate": 1.6592613163010278e-05, + "loss": 3.2108, + "step": 14536 + }, + { + "epoch": 0.17, + "learning_rate": 1.6587984819031753e-05, + "loss": 3.0949, + "step": 14538 + }, + { + "epoch": 0.17, + "learning_rate": 1.658335647505323e-05, + "loss": 2.1306, + "step": 14540 + }, + { + "epoch": 0.17, + "learning_rate": 1.6578728131074704e-05, + "loss": 0.6708, + "step": 14542 + }, + { + "epoch": 0.17, + "learning_rate": 1.657409978709618e-05, + "loss": 0.0167, + "step": 14544 + }, + { + "epoch": 0.17, + "learning_rate": 1.6569471443117655e-05, + "loss": 0.0109, + "step": 14546 + }, + { + "epoch": 0.17, + "learning_rate": 1.656484309913913e-05, + "loss": 0.7003, + "step": 14548 + }, + { + "epoch": 0.17, + "learning_rate": 1.6560214755160606e-05, + "loss": 0.3928, + "step": 14550 + }, + { + "epoch": 0.17, + "learning_rate": 1.655558641118208e-05, + "loss": 0.002, + "step": 14552 + }, + { + "epoch": 0.17, + "learning_rate": 1.6550958067203556e-05, + "loss": 2.2274, + "step": 14554 + }, + { + "epoch": 0.17, + "learning_rate": 1.6546329723225032e-05, + "loss": 6.2223, + "step": 14556 + }, + { + "epoch": 0.17, + "learning_rate": 1.6541701379246507e-05, + "loss": 1.9749, + "step": 14558 + }, + { + "epoch": 0.17, + "learning_rate": 1.6537073035267983e-05, + "loss": 5.9238, + "step": 14560 + }, + { + "epoch": 0.17, + "learning_rate": 1.6532444691289458e-05, + "loss": 0.3642, + "step": 14562 + }, + { + "epoch": 0.17, + "learning_rate": 1.6527816347310934e-05, + "loss": 0.0018, + "step": 14564 + }, + { + "epoch": 0.17, + "learning_rate": 1.6523188003332406e-05, + "loss": 0.8708, + "step": 14566 + }, + { + "epoch": 0.17, + "learning_rate": 1.651855965935388e-05, + "loss": 2.196, + "step": 14568 + }, + { + "epoch": 0.17, + "learning_rate": 1.651393131537536e-05, + "loss": 4.1129, + "step": 14570 + }, + { + "epoch": 0.17, + "learning_rate": 1.6509302971396835e-05, + "loss": 2.0629, + "step": 14572 + }, + { + "epoch": 0.17, + "learning_rate": 1.650467462741831e-05, + "loss": 2.4866, + "step": 14574 + }, + { + "epoch": 0.17, + "learning_rate": 1.6500046283439786e-05, + "loss": 0.731, + "step": 14576 + }, + { + "epoch": 0.17, + "learning_rate": 1.649541793946126e-05, + "loss": 0.0461, + "step": 14578 + }, + { + "epoch": 0.17, + "learning_rate": 1.6490789595482737e-05, + "loss": 0.8755, + "step": 14580 + }, + { + "epoch": 0.17, + "learning_rate": 1.6486161251504212e-05, + "loss": 0.8764, + "step": 14582 + }, + { + "epoch": 0.17, + "learning_rate": 1.6481532907525688e-05, + "loss": 2.6596, + "step": 14584 + }, + { + "epoch": 0.17, + "learning_rate": 1.6476904563547163e-05, + "loss": 2.0307, + "step": 14586 + }, + { + "epoch": 0.17, + "learning_rate": 1.647227621956864e-05, + "loss": 4.654, + "step": 14588 + }, + { + "epoch": 0.17, + "learning_rate": 1.6467647875590114e-05, + "loss": 2.2553, + "step": 14590 + }, + { + "epoch": 0.17, + "learning_rate": 1.646301953161159e-05, + "loss": 5.778, + "step": 14592 + }, + { + "epoch": 0.17, + "learning_rate": 1.6458391187633065e-05, + "loss": 5.6582, + "step": 14594 + }, + { + "epoch": 0.17, + "learning_rate": 1.645376284365454e-05, + "loss": 0.7101, + "step": 14596 + }, + { + "epoch": 0.17, + "learning_rate": 1.6449134499676016e-05, + "loss": 6.4878, + "step": 14598 + }, + { + "epoch": 0.17, + "learning_rate": 1.644450615569749e-05, + "loss": 1.7383, + "step": 14600 + }, + { + "epoch": 0.17, + "learning_rate": 1.6439877811718967e-05, + "loss": 2.8553, + "step": 14602 + }, + { + "epoch": 0.17, + "learning_rate": 1.6435249467740442e-05, + "loss": 0.0021, + "step": 14604 + }, + { + "epoch": 0.17, + "learning_rate": 1.6430621123761918e-05, + "loss": 1.7971, + "step": 14606 + }, + { + "epoch": 0.17, + "learning_rate": 1.6425992779783393e-05, + "loss": 0.4776, + "step": 14608 + }, + { + "epoch": 0.17, + "learning_rate": 1.6421364435804872e-05, + "loss": 1.4232, + "step": 14610 + }, + { + "epoch": 0.17, + "learning_rate": 1.6416736091826347e-05, + "loss": 2.9551, + "step": 14612 + }, + { + "epoch": 0.17, + "learning_rate": 1.6412107747847823e-05, + "loss": 4.4844, + "step": 14614 + }, + { + "epoch": 0.17, + "learning_rate": 1.6407479403869298e-05, + "loss": 0.3095, + "step": 14616 + }, + { + "epoch": 0.17, + "learning_rate": 1.6402851059890773e-05, + "loss": 0.5737, + "step": 14618 + }, + { + "epoch": 0.17, + "learning_rate": 1.639822271591225e-05, + "loss": 0.4895, + "step": 14620 + }, + { + "epoch": 0.17, + "learning_rate": 1.6393594371933724e-05, + "loss": 6.6313, + "step": 14622 + }, + { + "epoch": 0.17, + "learning_rate": 1.63889660279552e-05, + "loss": 0.8642, + "step": 14624 + }, + { + "epoch": 0.17, + "learning_rate": 1.6384337683976675e-05, + "loss": 3.9628, + "step": 14626 + }, + { + "epoch": 0.17, + "learning_rate": 1.637970933999815e-05, + "loss": 0.1049, + "step": 14628 + }, + { + "epoch": 0.17, + "learning_rate": 1.6375080996019626e-05, + "loss": 5.5035, + "step": 14630 + }, + { + "epoch": 0.17, + "learning_rate": 1.63704526520411e-05, + "loss": 2.4347, + "step": 14632 + }, + { + "epoch": 0.17, + "learning_rate": 1.6365824308062577e-05, + "loss": 1.3342, + "step": 14634 + }, + { + "epoch": 0.17, + "learning_rate": 1.6361195964084052e-05, + "loss": 5.1133, + "step": 14636 + }, + { + "epoch": 0.17, + "learning_rate": 1.6356567620105524e-05, + "loss": 4.7734, + "step": 14638 + }, + { + "epoch": 0.17, + "learning_rate": 1.6351939276127e-05, + "loss": 1.2099, + "step": 14640 + }, + { + "epoch": 0.17, + "learning_rate": 1.634731093214848e-05, + "loss": 1.3096, + "step": 14642 + }, + { + "epoch": 0.17, + "learning_rate": 1.6342682588169954e-05, + "loss": 0.174, + "step": 14644 + }, + { + "epoch": 0.17, + "learning_rate": 1.633805424419143e-05, + "loss": 5.1999, + "step": 14646 + }, + { + "epoch": 0.17, + "learning_rate": 1.6333425900212905e-05, + "loss": 0.3116, + "step": 14648 + }, + { + "epoch": 0.17, + "learning_rate": 1.632879755623438e-05, + "loss": 2.0577, + "step": 14650 + }, + { + "epoch": 0.17, + "learning_rate": 1.6324169212255856e-05, + "loss": 0.1741, + "step": 14652 + }, + { + "epoch": 0.17, + "learning_rate": 1.631954086827733e-05, + "loss": 0.0057, + "step": 14654 + }, + { + "epoch": 0.17, + "learning_rate": 1.6314912524298807e-05, + "loss": 1.2336, + "step": 14656 + }, + { + "epoch": 0.17, + "learning_rate": 1.6310284180320282e-05, + "loss": 0.7137, + "step": 14658 + }, + { + "epoch": 0.17, + "learning_rate": 1.6305655836341757e-05, + "loss": 3.854, + "step": 14660 + }, + { + "epoch": 0.17, + "learning_rate": 1.6301027492363233e-05, + "loss": 0.2649, + "step": 14662 + }, + { + "epoch": 0.17, + "learning_rate": 1.6296399148384708e-05, + "loss": 1.3485, + "step": 14664 + }, + { + "epoch": 0.17, + "learning_rate": 1.6291770804406184e-05, + "loss": 0.7348, + "step": 14666 + }, + { + "epoch": 0.17, + "learning_rate": 1.628714246042766e-05, + "loss": 2.0194, + "step": 14668 + }, + { + "epoch": 0.17, + "learning_rate": 1.6282514116449134e-05, + "loss": 0.7178, + "step": 14670 + }, + { + "epoch": 0.17, + "learning_rate": 1.627788577247061e-05, + "loss": 1.2336, + "step": 14672 + }, + { + "epoch": 0.17, + "learning_rate": 1.6273257428492085e-05, + "loss": 1.1309, + "step": 14674 + }, + { + "epoch": 0.17, + "learning_rate": 1.626862908451356e-05, + "loss": 0.0027, + "step": 14676 + }, + { + "epoch": 0.17, + "learning_rate": 1.6264000740535036e-05, + "loss": 1.1177, + "step": 14678 + }, + { + "epoch": 0.17, + "learning_rate": 1.625937239655651e-05, + "loss": 3.6393, + "step": 14680 + }, + { + "epoch": 0.17, + "learning_rate": 1.6254744052577987e-05, + "loss": 0.9661, + "step": 14682 + }, + { + "epoch": 0.17, + "learning_rate": 1.6250115708599466e-05, + "loss": 2.6503, + "step": 14684 + }, + { + "epoch": 0.17, + "learning_rate": 1.624548736462094e-05, + "loss": 4.5529, + "step": 14686 + }, + { + "epoch": 0.17, + "learning_rate": 1.6240859020642417e-05, + "loss": 0.5666, + "step": 14688 + }, + { + "epoch": 0.17, + "learning_rate": 1.6236230676663892e-05, + "loss": 0.1965, + "step": 14690 + }, + { + "epoch": 0.17, + "learning_rate": 1.6231602332685368e-05, + "loss": 3.2071, + "step": 14692 + }, + { + "epoch": 0.17, + "learning_rate": 1.6226973988706843e-05, + "loss": 0.9101, + "step": 14694 + }, + { + "epoch": 0.17, + "learning_rate": 1.622234564472832e-05, + "loss": 1.8072, + "step": 14696 + }, + { + "epoch": 0.17, + "learning_rate": 1.6217717300749794e-05, + "loss": 0.0004, + "step": 14698 + }, + { + "epoch": 0.17, + "learning_rate": 1.621308895677127e-05, + "loss": 1.6776, + "step": 14700 + }, + { + "epoch": 0.17, + "learning_rate": 1.6208460612792745e-05, + "loss": 1.3997, + "step": 14702 + }, + { + "epoch": 0.17, + "learning_rate": 1.620383226881422e-05, + "loss": 1.2409, + "step": 14704 + }, + { + "epoch": 0.17, + "learning_rate": 1.6199203924835696e-05, + "loss": 0.0426, + "step": 14706 + }, + { + "epoch": 0.17, + "learning_rate": 1.619457558085717e-05, + "loss": 2.6735, + "step": 14708 + }, + { + "epoch": 0.17, + "learning_rate": 1.6189947236878643e-05, + "loss": 2.1446, + "step": 14710 + }, + { + "epoch": 0.17, + "learning_rate": 1.618531889290012e-05, + "loss": 1.559, + "step": 14712 + }, + { + "epoch": 0.17, + "learning_rate": 1.6180690548921594e-05, + "loss": 1.3772, + "step": 14714 + }, + { + "epoch": 0.17, + "learning_rate": 1.6176062204943073e-05, + "loss": 0.3061, + "step": 14716 + }, + { + "epoch": 0.17, + "learning_rate": 1.6171433860964548e-05, + "loss": 6.0269, + "step": 14718 + }, + { + "epoch": 0.17, + "learning_rate": 1.6166805516986023e-05, + "loss": 6.1993, + "step": 14720 + }, + { + "epoch": 0.17, + "learning_rate": 1.61621771730075e-05, + "loss": 2.9988, + "step": 14722 + }, + { + "epoch": 0.17, + "learning_rate": 1.6157548829028974e-05, + "loss": 0.8186, + "step": 14724 + }, + { + "epoch": 0.17, + "learning_rate": 1.615292048505045e-05, + "loss": 4.1417, + "step": 14726 + }, + { + "epoch": 0.17, + "learning_rate": 1.6148292141071925e-05, + "loss": 0.9359, + "step": 14728 + }, + { + "epoch": 0.17, + "learning_rate": 1.61436637970934e-05, + "loss": 2.2458, + "step": 14730 + }, + { + "epoch": 0.17, + "learning_rate": 1.6139035453114876e-05, + "loss": 0.0007, + "step": 14732 + }, + { + "epoch": 0.17, + "learning_rate": 1.613440710913635e-05, + "loss": 3.3542, + "step": 14734 + }, + { + "epoch": 0.17, + "learning_rate": 1.6129778765157827e-05, + "loss": 4.3072, + "step": 14736 + }, + { + "epoch": 0.17, + "learning_rate": 1.6125150421179302e-05, + "loss": 0.7174, + "step": 14738 + }, + { + "epoch": 0.17, + "learning_rate": 1.6120522077200778e-05, + "loss": 2.8969, + "step": 14740 + }, + { + "epoch": 0.17, + "learning_rate": 1.6115893733222253e-05, + "loss": 0.002, + "step": 14742 + }, + { + "epoch": 0.17, + "learning_rate": 1.611126538924373e-05, + "loss": 1.7346, + "step": 14744 + }, + { + "epoch": 0.17, + "learning_rate": 1.6106637045265204e-05, + "loss": 1.8579, + "step": 14746 + }, + { + "epoch": 0.17, + "learning_rate": 1.610200870128668e-05, + "loss": 2.2357, + "step": 14748 + }, + { + "epoch": 0.17, + "learning_rate": 1.6097380357308155e-05, + "loss": 1.0897, + "step": 14750 + }, + { + "epoch": 0.17, + "learning_rate": 1.609275201332963e-05, + "loss": 0.3098, + "step": 14752 + }, + { + "epoch": 0.17, + "learning_rate": 1.6088123669351106e-05, + "loss": 2.2993, + "step": 14754 + }, + { + "epoch": 0.17, + "learning_rate": 1.6083495325372585e-05, + "loss": 1.9363, + "step": 14756 + }, + { + "epoch": 0.17, + "learning_rate": 1.607886698139406e-05, + "loss": 2.1081, + "step": 14758 + }, + { + "epoch": 0.17, + "learning_rate": 1.6074238637415535e-05, + "loss": 1.4716, + "step": 14760 + }, + { + "epoch": 0.17, + "learning_rate": 1.606961029343701e-05, + "loss": 0.0005, + "step": 14762 + }, + { + "epoch": 0.17, + "learning_rate": 1.6064981949458486e-05, + "loss": 1.3264, + "step": 14764 + }, + { + "epoch": 0.17, + "learning_rate": 1.606035360547996e-05, + "loss": 0.2899, + "step": 14766 + }, + { + "epoch": 0.17, + "learning_rate": 1.6055725261501437e-05, + "loss": 4.7377, + "step": 14768 + }, + { + "epoch": 0.17, + "learning_rate": 1.6051096917522912e-05, + "loss": 5.0884, + "step": 14770 + }, + { + "epoch": 0.17, + "learning_rate": 1.6046468573544388e-05, + "loss": 4.0877, + "step": 14772 + }, + { + "epoch": 0.17, + "learning_rate": 1.6041840229565863e-05, + "loss": 2.2017, + "step": 14774 + }, + { + "epoch": 0.17, + "learning_rate": 1.603721188558734e-05, + "loss": 0.3592, + "step": 14776 + }, + { + "epoch": 0.17, + "learning_rate": 1.6032583541608814e-05, + "loss": 1.7904, + "step": 14778 + }, + { + "epoch": 0.17, + "learning_rate": 1.602795519763029e-05, + "loss": 5.972, + "step": 14780 + }, + { + "epoch": 0.17, + "learning_rate": 1.602332685365176e-05, + "loss": 0.1436, + "step": 14782 + }, + { + "epoch": 0.17, + "learning_rate": 1.6018698509673237e-05, + "loss": 0.5674, + "step": 14784 + }, + { + "epoch": 0.17, + "learning_rate": 1.6014070165694712e-05, + "loss": 0.3908, + "step": 14786 + }, + { + "epoch": 0.17, + "learning_rate": 1.6009441821716188e-05, + "loss": 1.2218, + "step": 14788 + }, + { + "epoch": 0.17, + "learning_rate": 1.6004813477737667e-05, + "loss": 0.799, + "step": 14790 + }, + { + "epoch": 0.17, + "learning_rate": 1.6000185133759142e-05, + "loss": 0.5871, + "step": 14792 + }, + { + "epoch": 0.17, + "learning_rate": 1.5995556789780618e-05, + "loss": 6.7282, + "step": 14794 + }, + { + "epoch": 0.17, + "learning_rate": 1.5990928445802093e-05, + "loss": 0.6341, + "step": 14796 + }, + { + "epoch": 0.17, + "learning_rate": 1.598630010182357e-05, + "loss": 0.29, + "step": 14798 + }, + { + "epoch": 0.17, + "learning_rate": 1.5981671757845044e-05, + "loss": 5.2718, + "step": 14800 + }, + { + "epoch": 0.17, + "learning_rate": 1.597704341386652e-05, + "loss": 2.4817, + "step": 14802 + }, + { + "epoch": 0.17, + "learning_rate": 1.5972415069887995e-05, + "loss": 0.0114, + "step": 14804 + }, + { + "epoch": 0.17, + "learning_rate": 1.596778672590947e-05, + "loss": 1.8872, + "step": 14806 + }, + { + "epoch": 0.17, + "learning_rate": 1.5963158381930946e-05, + "loss": 2.8608, + "step": 14808 + }, + { + "epoch": 0.17, + "learning_rate": 1.595853003795242e-05, + "loss": 0.9467, + "step": 14810 + }, + { + "epoch": 0.17, + "learning_rate": 1.5953901693973896e-05, + "loss": 3.1035, + "step": 14812 + }, + { + "epoch": 0.17, + "learning_rate": 1.5949273349995372e-05, + "loss": 0.6448, + "step": 14814 + }, + { + "epoch": 0.17, + "learning_rate": 1.5944645006016847e-05, + "loss": 0.0015, + "step": 14816 + }, + { + "epoch": 0.17, + "learning_rate": 1.5940016662038323e-05, + "loss": 4.2871, + "step": 14818 + }, + { + "epoch": 0.17, + "learning_rate": 1.5935388318059798e-05, + "loss": 7.0314, + "step": 14820 + }, + { + "epoch": 0.17, + "learning_rate": 1.5930759974081274e-05, + "loss": 0.0866, + "step": 14822 + }, + { + "epoch": 0.17, + "learning_rate": 1.592613163010275e-05, + "loss": 1.7823, + "step": 14824 + }, + { + "epoch": 0.17, + "learning_rate": 1.5921503286124224e-05, + "loss": 0.2491, + "step": 14826 + }, + { + "epoch": 0.17, + "learning_rate": 1.59168749421457e-05, + "loss": 1.9227, + "step": 14828 + }, + { + "epoch": 0.17, + "learning_rate": 1.591224659816718e-05, + "loss": 1.9496, + "step": 14830 + }, + { + "epoch": 0.17, + "learning_rate": 1.5907618254188654e-05, + "loss": 1.9059, + "step": 14832 + }, + { + "epoch": 0.17, + "learning_rate": 1.590298991021013e-05, + "loss": 2.865, + "step": 14834 + }, + { + "epoch": 0.17, + "learning_rate": 1.5898361566231605e-05, + "loss": 1.7889, + "step": 14836 + }, + { + "epoch": 0.17, + "learning_rate": 1.589373322225308e-05, + "loss": 0.8653, + "step": 14838 + }, + { + "epoch": 0.17, + "learning_rate": 1.5889104878274556e-05, + "loss": 0.0018, + "step": 14840 + }, + { + "epoch": 0.17, + "learning_rate": 1.588447653429603e-05, + "loss": 3.8934, + "step": 14842 + }, + { + "epoch": 0.17, + "learning_rate": 1.5879848190317507e-05, + "loss": 2.5471, + "step": 14844 + }, + { + "epoch": 0.17, + "learning_rate": 1.5875219846338982e-05, + "loss": 0.9303, + "step": 14846 + }, + { + "epoch": 0.17, + "learning_rate": 1.5870591502360457e-05, + "loss": 1.5211, + "step": 14848 + }, + { + "epoch": 0.17, + "learning_rate": 1.5865963158381933e-05, + "loss": 1.9147, + "step": 14850 + }, + { + "epoch": 0.17, + "learning_rate": 1.5861334814403408e-05, + "loss": 0.417, + "step": 14852 + }, + { + "epoch": 0.17, + "learning_rate": 1.5856706470424884e-05, + "loss": 1.183, + "step": 14854 + }, + { + "epoch": 0.17, + "learning_rate": 1.5852078126446356e-05, + "loss": 1.8229, + "step": 14856 + }, + { + "epoch": 0.17, + "learning_rate": 1.584744978246783e-05, + "loss": 1.2163, + "step": 14858 + }, + { + "epoch": 0.17, + "learning_rate": 1.5842821438489307e-05, + "loss": 0.5313, + "step": 14860 + }, + { + "epoch": 0.17, + "learning_rate": 1.5838193094510782e-05, + "loss": 7.5661, + "step": 14862 + }, + { + "epoch": 0.17, + "learning_rate": 1.583356475053226e-05, + "loss": 0.3784, + "step": 14864 + }, + { + "epoch": 0.17, + "learning_rate": 1.5828936406553736e-05, + "loss": 1.8726, + "step": 14866 + }, + { + "epoch": 0.17, + "learning_rate": 1.582430806257521e-05, + "loss": 0.0016, + "step": 14868 + }, + { + "epoch": 0.17, + "learning_rate": 1.5819679718596687e-05, + "loss": 2.6977, + "step": 14870 + }, + { + "epoch": 0.17, + "learning_rate": 1.5815051374618163e-05, + "loss": 0.9002, + "step": 14872 + }, + { + "epoch": 0.17, + "learning_rate": 1.5810423030639638e-05, + "loss": 0.6695, + "step": 14874 + }, + { + "epoch": 0.17, + "learning_rate": 1.5805794686661113e-05, + "loss": 2.54, + "step": 14876 + }, + { + "epoch": 0.17, + "learning_rate": 1.580116634268259e-05, + "loss": 4.1447, + "step": 14878 + }, + { + "epoch": 0.17, + "learning_rate": 1.5796537998704064e-05, + "loss": 0.0044, + "step": 14880 + }, + { + "epoch": 0.17, + "learning_rate": 1.579190965472554e-05, + "loss": 0.8857, + "step": 14882 + }, + { + "epoch": 0.17, + "learning_rate": 1.5787281310747015e-05, + "loss": 1.6281, + "step": 14884 + }, + { + "epoch": 0.17, + "learning_rate": 1.578265296676849e-05, + "loss": 4.4905, + "step": 14886 + }, + { + "epoch": 0.17, + "learning_rate": 1.5778024622789966e-05, + "loss": 1.1369, + "step": 14888 + }, + { + "epoch": 0.17, + "learning_rate": 1.577339627881144e-05, + "loss": 0.0006, + "step": 14890 + }, + { + "epoch": 0.17, + "learning_rate": 1.5768767934832917e-05, + "loss": 4.5379, + "step": 14892 + }, + { + "epoch": 0.17, + "learning_rate": 1.5764139590854392e-05, + "loss": 0.4364, + "step": 14894 + }, + { + "epoch": 0.17, + "learning_rate": 1.5759511246875868e-05, + "loss": 0.0557, + "step": 14896 + }, + { + "epoch": 0.17, + "learning_rate": 1.5754882902897343e-05, + "loss": 1.1453, + "step": 14898 + }, + { + "epoch": 0.17, + "learning_rate": 1.575025455891882e-05, + "loss": 2.5199, + "step": 14900 + }, + { + "epoch": 0.17, + "learning_rate": 1.5745626214940294e-05, + "loss": 3.7374, + "step": 14902 + }, + { + "epoch": 0.17, + "learning_rate": 1.5740997870961773e-05, + "loss": 4.4743, + "step": 14904 + }, + { + "epoch": 0.17, + "learning_rate": 1.5736369526983248e-05, + "loss": 0.0123, + "step": 14906 + }, + { + "epoch": 0.17, + "learning_rate": 1.5731741183004724e-05, + "loss": 0.167, + "step": 14908 + }, + { + "epoch": 0.17, + "learning_rate": 1.57271128390262e-05, + "loss": 2.6547, + "step": 14910 + }, + { + "epoch": 0.17, + "learning_rate": 1.5722484495047674e-05, + "loss": 1.7532, + "step": 14912 + }, + { + "epoch": 0.17, + "learning_rate": 1.571785615106915e-05, + "loss": 3.5342, + "step": 14914 + }, + { + "epoch": 0.17, + "learning_rate": 1.5713227807090625e-05, + "loss": 4.8889, + "step": 14916 + }, + { + "epoch": 0.17, + "learning_rate": 1.57085994631121e-05, + "loss": 3.4013, + "step": 14918 + }, + { + "epoch": 0.17, + "learning_rate": 1.5703971119133576e-05, + "loss": 2.7721, + "step": 14920 + }, + { + "epoch": 0.17, + "learning_rate": 1.569934277515505e-05, + "loss": 1.8611, + "step": 14922 + }, + { + "epoch": 0.17, + "learning_rate": 1.5694714431176527e-05, + "loss": 0.0009, + "step": 14924 + }, + { + "epoch": 0.17, + "learning_rate": 1.5690086087198002e-05, + "loss": 1.7731, + "step": 14926 + }, + { + "epoch": 0.17, + "learning_rate": 1.5685457743219474e-05, + "loss": 0.7718, + "step": 14928 + }, + { + "epoch": 0.17, + "learning_rate": 1.568082939924095e-05, + "loss": 1.0689, + "step": 14930 + }, + { + "epoch": 0.17, + "learning_rate": 1.5676201055262425e-05, + "loss": 2.8077, + "step": 14932 + }, + { + "epoch": 0.17, + "learning_rate": 1.56715727112839e-05, + "loss": 2.9833, + "step": 14934 + }, + { + "epoch": 0.17, + "learning_rate": 1.566694436730538e-05, + "loss": 0.9209, + "step": 14936 + }, + { + "epoch": 0.17, + "learning_rate": 1.5662316023326855e-05, + "loss": 2.9415, + "step": 14938 + }, + { + "epoch": 0.17, + "learning_rate": 1.565768767934833e-05, + "loss": 2.8763, + "step": 14940 + }, + { + "epoch": 0.17, + "learning_rate": 1.5653059335369806e-05, + "loss": 6.0491, + "step": 14942 + }, + { + "epoch": 0.17, + "learning_rate": 1.564843099139128e-05, + "loss": 1.1783, + "step": 14944 + }, + { + "epoch": 0.17, + "learning_rate": 1.5643802647412757e-05, + "loss": 1.5141, + "step": 14946 + }, + { + "epoch": 0.17, + "learning_rate": 1.5639174303434232e-05, + "loss": 0.447, + "step": 14948 + }, + { + "epoch": 0.17, + "learning_rate": 1.5634545959455707e-05, + "loss": 0.584, + "step": 14950 + }, + { + "epoch": 0.17, + "learning_rate": 1.5629917615477183e-05, + "loss": 1.4335, + "step": 14952 + }, + { + "epoch": 0.17, + "learning_rate": 1.562528927149866e-05, + "loss": 1.4051, + "step": 14954 + }, + { + "epoch": 0.17, + "learning_rate": 1.5620660927520134e-05, + "loss": 1.2567, + "step": 14956 + }, + { + "epoch": 0.17, + "learning_rate": 1.561603258354161e-05, + "loss": 2.5531, + "step": 14958 + }, + { + "epoch": 0.17, + "learning_rate": 1.5611404239563085e-05, + "loss": 1.7414, + "step": 14960 + }, + { + "epoch": 0.17, + "learning_rate": 1.560677589558456e-05, + "loss": 0.8479, + "step": 14962 + }, + { + "epoch": 0.17, + "learning_rate": 1.5602147551606035e-05, + "loss": 0.6427, + "step": 14964 + }, + { + "epoch": 0.17, + "learning_rate": 1.559751920762751e-05, + "loss": 1.2743, + "step": 14966 + }, + { + "epoch": 0.17, + "learning_rate": 1.5592890863648986e-05, + "loss": 3.4845, + "step": 14968 + }, + { + "epoch": 0.17, + "learning_rate": 1.5588262519670462e-05, + "loss": 1.5812, + "step": 14970 + }, + { + "epoch": 0.17, + "learning_rate": 1.5583634175691937e-05, + "loss": 0.9783, + "step": 14972 + }, + { + "epoch": 0.17, + "learning_rate": 1.5579005831713413e-05, + "loss": 1.109, + "step": 14974 + }, + { + "epoch": 0.17, + "learning_rate": 1.557437748773489e-05, + "loss": 2.2722, + "step": 14976 + }, + { + "epoch": 0.17, + "learning_rate": 1.5569749143756367e-05, + "loss": 0.8635, + "step": 14978 + }, + { + "epoch": 0.17, + "learning_rate": 1.5565120799777842e-05, + "loss": 2.9446, + "step": 14980 + }, + { + "epoch": 0.17, + "learning_rate": 1.5560492455799318e-05, + "loss": 1.0367, + "step": 14982 + }, + { + "epoch": 0.17, + "learning_rate": 1.5555864111820793e-05, + "loss": 0.2211, + "step": 14984 + }, + { + "epoch": 0.17, + "learning_rate": 1.555123576784227e-05, + "loss": 2.8136, + "step": 14986 + }, + { + "epoch": 0.17, + "learning_rate": 1.5546607423863744e-05, + "loss": 2.019, + "step": 14988 + }, + { + "epoch": 0.17, + "learning_rate": 1.554197907988522e-05, + "loss": 1.6492, + "step": 14990 + }, + { + "epoch": 0.17, + "learning_rate": 1.5537350735906695e-05, + "loss": 1.0376, + "step": 14992 + }, + { + "epoch": 0.17, + "learning_rate": 1.553272239192817e-05, + "loss": 0.0006, + "step": 14994 + }, + { + "epoch": 0.17, + "learning_rate": 1.5528094047949646e-05, + "loss": 0.4591, + "step": 14996 + }, + { + "epoch": 0.17, + "learning_rate": 1.552346570397112e-05, + "loss": 0.0004, + "step": 14998 + }, + { + "epoch": 0.17, + "learning_rate": 1.5518837359992593e-05, + "loss": 4.7593, + "step": 15000 + }, + { + "epoch": 0.17, + "learning_rate": 1.551420901601407e-05, + "loss": 4.4031, + "step": 15002 + }, + { + "epoch": 0.17, + "learning_rate": 1.5509580672035544e-05, + "loss": 2.078, + "step": 15004 + }, + { + "epoch": 0.17, + "learning_rate": 1.550495232805702e-05, + "loss": 0.8005, + "step": 15006 + }, + { + "epoch": 0.17, + "learning_rate": 1.5500323984078495e-05, + "loss": 3.2617, + "step": 15008 + }, + { + "epoch": 0.17, + "learning_rate": 1.5495695640099974e-05, + "loss": 0.0712, + "step": 15010 + }, + { + "epoch": 0.17, + "learning_rate": 1.549106729612145e-05, + "loss": 8.2118, + "step": 15012 + }, + { + "epoch": 0.17, + "learning_rate": 1.5486438952142924e-05, + "loss": 2.207, + "step": 15014 + }, + { + "epoch": 0.17, + "learning_rate": 1.54818106081644e-05, + "loss": 0.0015, + "step": 15016 + }, + { + "epoch": 0.17, + "learning_rate": 1.5477182264185875e-05, + "loss": 2.6837, + "step": 15018 + }, + { + "epoch": 0.17, + "learning_rate": 1.547255392020735e-05, + "loss": 0.8953, + "step": 15020 + }, + { + "epoch": 0.17, + "learning_rate": 1.5467925576228826e-05, + "loss": 2.1521, + "step": 15022 + }, + { + "epoch": 0.17, + "learning_rate": 1.54632972322503e-05, + "loss": 1.2342, + "step": 15024 + }, + { + "epoch": 0.17, + "learning_rate": 1.5458668888271777e-05, + "loss": 0.4517, + "step": 15026 + }, + { + "epoch": 0.17, + "learning_rate": 1.5454040544293252e-05, + "loss": 2.7703, + "step": 15028 + }, + { + "epoch": 0.17, + "learning_rate": 1.5449412200314728e-05, + "loss": 1.1862, + "step": 15030 + }, + { + "epoch": 0.17, + "learning_rate": 1.5444783856336203e-05, + "loss": 0.3233, + "step": 15032 + }, + { + "epoch": 0.17, + "learning_rate": 1.544015551235768e-05, + "loss": 0.0008, + "step": 15034 + }, + { + "epoch": 0.17, + "learning_rate": 1.5435527168379154e-05, + "loss": 3.1864, + "step": 15036 + }, + { + "epoch": 0.17, + "learning_rate": 1.543089882440063e-05, + "loss": 3.3178, + "step": 15038 + }, + { + "epoch": 0.17, + "learning_rate": 1.5426270480422105e-05, + "loss": 0.4009, + "step": 15040 + }, + { + "epoch": 0.17, + "learning_rate": 1.542164213644358e-05, + "loss": 0.3432, + "step": 15042 + }, + { + "epoch": 0.17, + "learning_rate": 1.5417013792465056e-05, + "loss": 6.693, + "step": 15044 + }, + { + "epoch": 0.17, + "learning_rate": 1.541238544848653e-05, + "loss": 3.753, + "step": 15046 + }, + { + "epoch": 0.17, + "learning_rate": 1.5407757104508007e-05, + "loss": 0.4408, + "step": 15048 + }, + { + "epoch": 0.17, + "learning_rate": 1.5403128760529485e-05, + "loss": 0.5058, + "step": 15050 + }, + { + "epoch": 0.17, + "learning_rate": 1.539850041655096e-05, + "loss": 0.0007, + "step": 15052 + }, + { + "epoch": 0.17, + "learning_rate": 1.5393872072572436e-05, + "loss": 0.9045, + "step": 15054 + }, + { + "epoch": 0.17, + "learning_rate": 1.5389243728593912e-05, + "loss": 1.6803, + "step": 15056 + }, + { + "epoch": 0.17, + "learning_rate": 1.5384615384615387e-05, + "loss": 3.0789, + "step": 15058 + }, + { + "epoch": 0.17, + "learning_rate": 1.5379987040636863e-05, + "loss": 2.062, + "step": 15060 + }, + { + "epoch": 0.17, + "learning_rate": 1.5375358696658338e-05, + "loss": 2.3429, + "step": 15062 + }, + { + "epoch": 0.17, + "learning_rate": 1.5370730352679813e-05, + "loss": 0.0027, + "step": 15064 + }, + { + "epoch": 0.17, + "learning_rate": 1.536610200870129e-05, + "loss": 2.4577, + "step": 15066 + }, + { + "epoch": 0.17, + "learning_rate": 1.5361473664722764e-05, + "loss": 0.4035, + "step": 15068 + }, + { + "epoch": 0.17, + "learning_rate": 1.535684532074424e-05, + "loss": 5.5753, + "step": 15070 + }, + { + "epoch": 0.17, + "learning_rate": 1.5352216976765712e-05, + "loss": 0.6447, + "step": 15072 + }, + { + "epoch": 0.17, + "learning_rate": 1.5347588632787187e-05, + "loss": 1.3972, + "step": 15074 + }, + { + "epoch": 0.17, + "learning_rate": 1.5342960288808663e-05, + "loss": 0.7886, + "step": 15076 + }, + { + "epoch": 0.17, + "learning_rate": 1.5338331944830138e-05, + "loss": 0.0246, + "step": 15078 + }, + { + "epoch": 0.17, + "learning_rate": 1.5333703600851613e-05, + "loss": 0.1108, + "step": 15080 + }, + { + "epoch": 0.17, + "learning_rate": 1.532907525687309e-05, + "loss": 0.8593, + "step": 15082 + }, + { + "epoch": 0.17, + "learning_rate": 1.5324446912894568e-05, + "loss": 0.9502, + "step": 15084 + }, + { + "epoch": 0.17, + "learning_rate": 1.5319818568916043e-05, + "loss": 1.7536, + "step": 15086 + }, + { + "epoch": 0.17, + "learning_rate": 1.531519022493752e-05, + "loss": 7.9619, + "step": 15088 + }, + { + "epoch": 0.17, + "learning_rate": 1.5310561880958994e-05, + "loss": 0.3611, + "step": 15090 + }, + { + "epoch": 0.17, + "learning_rate": 1.530593353698047e-05, + "loss": 5.1475, + "step": 15092 + }, + { + "epoch": 0.17, + "learning_rate": 1.5301305193001945e-05, + "loss": 2.215, + "step": 15094 + }, + { + "epoch": 0.17, + "learning_rate": 1.529667684902342e-05, + "loss": 0.0014, + "step": 15096 + }, + { + "epoch": 0.17, + "learning_rate": 1.5292048505044896e-05, + "loss": 0.4632, + "step": 15098 + }, + { + "epoch": 0.17, + "learning_rate": 1.528742016106637e-05, + "loss": 2.9346, + "step": 15100 + }, + { + "epoch": 0.17, + "learning_rate": 1.5282791817087847e-05, + "loss": 1.3603, + "step": 15102 + }, + { + "epoch": 0.17, + "learning_rate": 1.5278163473109322e-05, + "loss": 0.0003, + "step": 15104 + }, + { + "epoch": 0.17, + "learning_rate": 1.5273535129130797e-05, + "loss": 2.4566, + "step": 15106 + }, + { + "epoch": 0.17, + "learning_rate": 1.5268906785152273e-05, + "loss": 1.2595, + "step": 15108 + }, + { + "epoch": 0.17, + "learning_rate": 1.5264278441173748e-05, + "loss": 2.9438, + "step": 15110 + }, + { + "epoch": 0.17, + "learning_rate": 1.5259650097195224e-05, + "loss": 0.0203, + "step": 15112 + }, + { + "epoch": 0.17, + "learning_rate": 1.52550217532167e-05, + "loss": 0.2005, + "step": 15114 + }, + { + "epoch": 0.17, + "learning_rate": 1.5250393409238176e-05, + "loss": 1.3519, + "step": 15116 + }, + { + "epoch": 0.17, + "learning_rate": 1.5245765065259652e-05, + "loss": 0.3705, + "step": 15118 + }, + { + "epoch": 0.17, + "learning_rate": 1.5241136721281127e-05, + "loss": 3.411, + "step": 15120 + }, + { + "epoch": 0.17, + "learning_rate": 1.5236508377302602e-05, + "loss": 4.527, + "step": 15122 + }, + { + "epoch": 0.17, + "learning_rate": 1.5231880033324078e-05, + "loss": 1.4769, + "step": 15124 + }, + { + "epoch": 0.17, + "learning_rate": 1.5227251689345553e-05, + "loss": 0.6804, + "step": 15126 + }, + { + "epoch": 0.17, + "learning_rate": 1.5222623345367029e-05, + "loss": 0.2117, + "step": 15128 + }, + { + "epoch": 0.17, + "learning_rate": 1.5217995001388504e-05, + "loss": 1.1775, + "step": 15130 + }, + { + "epoch": 0.17, + "learning_rate": 1.521336665740998e-05, + "loss": 0.8773, + "step": 15132 + }, + { + "epoch": 0.17, + "learning_rate": 1.5208738313431457e-05, + "loss": 1.8963, + "step": 15134 + }, + { + "epoch": 0.17, + "learning_rate": 1.5204109969452932e-05, + "loss": 2.9274, + "step": 15136 + }, + { + "epoch": 0.17, + "learning_rate": 1.5199481625474408e-05, + "loss": 0.5219, + "step": 15138 + }, + { + "epoch": 0.17, + "learning_rate": 1.5194853281495883e-05, + "loss": 0.8237, + "step": 15140 + }, + { + "epoch": 0.17, + "learning_rate": 1.5190224937517358e-05, + "loss": 1.0221, + "step": 15142 + }, + { + "epoch": 0.17, + "learning_rate": 1.5185596593538834e-05, + "loss": 1.384, + "step": 15144 + }, + { + "epoch": 0.17, + "learning_rate": 1.5180968249560308e-05, + "loss": 0.1141, + "step": 15146 + }, + { + "epoch": 0.17, + "learning_rate": 1.5176339905581783e-05, + "loss": 2.7436, + "step": 15148 + }, + { + "epoch": 0.17, + "learning_rate": 1.5171711561603258e-05, + "loss": 0.0956, + "step": 15150 + }, + { + "epoch": 0.17, + "learning_rate": 1.5167083217624734e-05, + "loss": 0.7073, + "step": 15152 + }, + { + "epoch": 0.17, + "learning_rate": 1.516245487364621e-05, + "loss": 0.0009, + "step": 15154 + }, + { + "epoch": 0.17, + "learning_rate": 1.5157826529667685e-05, + "loss": 4.3875, + "step": 15156 + }, + { + "epoch": 0.17, + "learning_rate": 1.515319818568916e-05, + "loss": 5.8297, + "step": 15158 + }, + { + "epoch": 0.17, + "learning_rate": 1.5148569841710636e-05, + "loss": 0.2733, + "step": 15160 + }, + { + "epoch": 0.17, + "learning_rate": 1.5143941497732111e-05, + "loss": 1.9004, + "step": 15162 + }, + { + "epoch": 0.17, + "learning_rate": 1.5139313153753586e-05, + "loss": 0.0005, + "step": 15164 + }, + { + "epoch": 0.17, + "learning_rate": 1.5134684809775063e-05, + "loss": 0.6707, + "step": 15166 + }, + { + "epoch": 0.17, + "learning_rate": 1.5130056465796539e-05, + "loss": 0.8294, + "step": 15168 + }, + { + "epoch": 0.17, + "learning_rate": 1.5125428121818014e-05, + "loss": 2.0475, + "step": 15170 + }, + { + "epoch": 0.17, + "learning_rate": 1.512079977783949e-05, + "loss": 1.2308, + "step": 15172 + }, + { + "epoch": 0.17, + "learning_rate": 1.5116171433860965e-05, + "loss": 0.5062, + "step": 15174 + }, + { + "epoch": 0.17, + "learning_rate": 1.511154308988244e-05, + "loss": 6.1508, + "step": 15176 + }, + { + "epoch": 0.17, + "learning_rate": 1.5106914745903916e-05, + "loss": 4.5625, + "step": 15178 + }, + { + "epoch": 0.17, + "learning_rate": 1.5102286401925391e-05, + "loss": 0.1493, + "step": 15180 + }, + { + "epoch": 0.17, + "learning_rate": 1.5097658057946867e-05, + "loss": 0.979, + "step": 15182 + }, + { + "epoch": 0.17, + "learning_rate": 1.5093029713968342e-05, + "loss": 0.6347, + "step": 15184 + }, + { + "epoch": 0.17, + "learning_rate": 1.5088401369989818e-05, + "loss": 0.6137, + "step": 15186 + }, + { + "epoch": 0.17, + "learning_rate": 1.5083773026011295e-05, + "loss": 0.0011, + "step": 15188 + }, + { + "epoch": 0.17, + "learning_rate": 1.507914468203277e-05, + "loss": 1.8091, + "step": 15190 + }, + { + "epoch": 0.17, + "learning_rate": 1.5074516338054246e-05, + "loss": 1.7792, + "step": 15192 + }, + { + "epoch": 0.18, + "learning_rate": 1.5069887994075721e-05, + "loss": 5.2022, + "step": 15194 + }, + { + "epoch": 0.18, + "learning_rate": 1.5065259650097197e-05, + "loss": 0.2749, + "step": 15196 + }, + { + "epoch": 0.18, + "learning_rate": 1.5060631306118672e-05, + "loss": 0.7508, + "step": 15198 + }, + { + "epoch": 0.18, + "learning_rate": 1.5056002962140147e-05, + "loss": 4.1284, + "step": 15200 + }, + { + "epoch": 0.18, + "learning_rate": 1.5051374618161623e-05, + "loss": 0.0007, + "step": 15202 + }, + { + "epoch": 0.18, + "learning_rate": 1.5046746274183098e-05, + "loss": 1.8323, + "step": 15204 + }, + { + "epoch": 0.18, + "learning_rate": 1.5042117930204574e-05, + "loss": 0.0008, + "step": 15206 + }, + { + "epoch": 0.18, + "learning_rate": 1.503748958622605e-05, + "loss": 0.9159, + "step": 15208 + }, + { + "epoch": 0.18, + "learning_rate": 1.5032861242247526e-05, + "loss": 0.3176, + "step": 15210 + }, + { + "epoch": 0.18, + "learning_rate": 1.5028232898269002e-05, + "loss": 0.1621, + "step": 15212 + }, + { + "epoch": 0.18, + "learning_rate": 1.5023604554290477e-05, + "loss": 0.3267, + "step": 15214 + }, + { + "epoch": 0.18, + "learning_rate": 1.5018976210311952e-05, + "loss": 3.8394, + "step": 15216 + }, + { + "epoch": 0.18, + "learning_rate": 1.5014347866333425e-05, + "loss": 0.0036, + "step": 15218 + }, + { + "epoch": 0.18, + "learning_rate": 1.5009719522354902e-05, + "loss": 1.6717, + "step": 15220 + }, + { + "epoch": 0.18, + "learning_rate": 1.5005091178376377e-05, + "loss": 3.3102, + "step": 15222 + }, + { + "epoch": 0.18, + "learning_rate": 1.5000462834397852e-05, + "loss": 0.0023, + "step": 15224 + }, + { + "epoch": 0.18, + "learning_rate": 1.4995834490419328e-05, + "loss": 0.1041, + "step": 15226 + }, + { + "epoch": 0.18, + "learning_rate": 1.4991206146440803e-05, + "loss": 0.4419, + "step": 15228 + }, + { + "epoch": 0.18, + "learning_rate": 1.4986577802462279e-05, + "loss": 0.0018, + "step": 15230 + }, + { + "epoch": 0.18, + "learning_rate": 1.4981949458483754e-05, + "loss": 7.1431, + "step": 15232 + }, + { + "epoch": 0.18, + "learning_rate": 1.497732111450523e-05, + "loss": 1.955, + "step": 15234 + }, + { + "epoch": 0.18, + "learning_rate": 1.4972692770526705e-05, + "loss": 1.8349, + "step": 15236 + }, + { + "epoch": 0.18, + "learning_rate": 1.496806442654818e-05, + "loss": 0.0261, + "step": 15238 + }, + { + "epoch": 0.18, + "learning_rate": 1.4963436082569658e-05, + "loss": 0.2695, + "step": 15240 + }, + { + "epoch": 0.18, + "learning_rate": 1.4958807738591133e-05, + "loss": 0.3752, + "step": 15242 + }, + { + "epoch": 0.18, + "learning_rate": 1.4954179394612608e-05, + "loss": 3.3142, + "step": 15244 + }, + { + "epoch": 0.18, + "learning_rate": 1.4949551050634084e-05, + "loss": 0.9772, + "step": 15246 + }, + { + "epoch": 0.18, + "learning_rate": 1.494492270665556e-05, + "loss": 1.4415, + "step": 15248 + }, + { + "epoch": 0.18, + "learning_rate": 1.4940294362677035e-05, + "loss": 0.9614, + "step": 15250 + }, + { + "epoch": 0.18, + "learning_rate": 1.493566601869851e-05, + "loss": 3.4091, + "step": 15252 + }, + { + "epoch": 0.18, + "learning_rate": 1.4931037674719986e-05, + "loss": 0.0034, + "step": 15254 + }, + { + "epoch": 0.18, + "learning_rate": 1.4926409330741461e-05, + "loss": 0.0015, + "step": 15256 + }, + { + "epoch": 0.18, + "learning_rate": 1.4921780986762936e-05, + "loss": 1.1872, + "step": 15258 + }, + { + "epoch": 0.18, + "learning_rate": 1.4917152642784414e-05, + "loss": 0.2279, + "step": 15260 + }, + { + "epoch": 0.18, + "learning_rate": 1.4912524298805889e-05, + "loss": 0.0008, + "step": 15262 + }, + { + "epoch": 0.18, + "learning_rate": 1.4907895954827364e-05, + "loss": 1.0994, + "step": 15264 + }, + { + "epoch": 0.18, + "learning_rate": 1.490326761084884e-05, + "loss": 3.6229, + "step": 15266 + }, + { + "epoch": 0.18, + "learning_rate": 1.4898639266870315e-05, + "loss": 1.8237, + "step": 15268 + }, + { + "epoch": 0.18, + "learning_rate": 1.489401092289179e-05, + "loss": 2.1466, + "step": 15270 + }, + { + "epoch": 0.18, + "learning_rate": 1.4889382578913266e-05, + "loss": 0.0007, + "step": 15272 + }, + { + "epoch": 0.18, + "learning_rate": 1.4884754234934742e-05, + "loss": 3.9632, + "step": 15274 + }, + { + "epoch": 0.18, + "learning_rate": 1.4880125890956217e-05, + "loss": 0.0834, + "step": 15276 + }, + { + "epoch": 0.18, + "learning_rate": 1.4875497546977692e-05, + "loss": 4.6524, + "step": 15278 + }, + { + "epoch": 0.18, + "learning_rate": 1.487086920299917e-05, + "loss": 0.0007, + "step": 15280 + }, + { + "epoch": 0.18, + "learning_rate": 1.4866240859020645e-05, + "loss": 4.8768, + "step": 15282 + }, + { + "epoch": 0.18, + "learning_rate": 1.486161251504212e-05, + "loss": 4.5969, + "step": 15284 + }, + { + "epoch": 0.18, + "learning_rate": 1.4856984171063596e-05, + "loss": 0.3139, + "step": 15286 + }, + { + "epoch": 0.18, + "learning_rate": 1.4852355827085071e-05, + "loss": 0.0004, + "step": 15288 + }, + { + "epoch": 0.18, + "learning_rate": 1.4847727483106543e-05, + "loss": 0.1105, + "step": 15290 + }, + { + "epoch": 0.18, + "learning_rate": 1.4843099139128019e-05, + "loss": 1.3638, + "step": 15292 + }, + { + "epoch": 0.18, + "learning_rate": 1.4838470795149496e-05, + "loss": 3.459, + "step": 15294 + }, + { + "epoch": 0.18, + "learning_rate": 1.4833842451170971e-05, + "loss": 1.9777, + "step": 15296 + }, + { + "epoch": 0.18, + "learning_rate": 1.4829214107192447e-05, + "loss": 1.6767, + "step": 15298 + }, + { + "epoch": 0.18, + "learning_rate": 1.4824585763213922e-05, + "loss": 2.4635, + "step": 15300 + }, + { + "epoch": 0.18, + "learning_rate": 1.4819957419235397e-05, + "loss": 2.2707, + "step": 15302 + }, + { + "epoch": 0.18, + "learning_rate": 1.4815329075256873e-05, + "loss": 0.1486, + "step": 15304 + }, + { + "epoch": 0.18, + "learning_rate": 1.4810700731278348e-05, + "loss": 1.4079, + "step": 15306 + }, + { + "epoch": 0.18, + "learning_rate": 1.4806072387299824e-05, + "loss": 2.5098, + "step": 15308 + }, + { + "epoch": 0.18, + "learning_rate": 1.4801444043321299e-05, + "loss": 0.1462, + "step": 15310 + }, + { + "epoch": 0.18, + "learning_rate": 1.4796815699342775e-05, + "loss": 0.5792, + "step": 15312 + }, + { + "epoch": 0.18, + "learning_rate": 1.4792187355364252e-05, + "loss": 2.6547, + "step": 15314 + }, + { + "epoch": 0.18, + "learning_rate": 1.4787559011385727e-05, + "loss": 4.0119, + "step": 15316 + }, + { + "epoch": 0.18, + "learning_rate": 1.4782930667407203e-05, + "loss": 1.0354, + "step": 15318 + }, + { + "epoch": 0.18, + "learning_rate": 1.4778302323428678e-05, + "loss": 1.8484, + "step": 15320 + }, + { + "epoch": 0.18, + "learning_rate": 1.4773673979450153e-05, + "loss": 2.1038, + "step": 15322 + }, + { + "epoch": 0.18, + "learning_rate": 1.4769045635471629e-05, + "loss": 0.2219, + "step": 15324 + }, + { + "epoch": 0.18, + "learning_rate": 1.4764417291493104e-05, + "loss": 1.8255, + "step": 15326 + }, + { + "epoch": 0.18, + "learning_rate": 1.475978894751458e-05, + "loss": 3.1899, + "step": 15328 + }, + { + "epoch": 0.18, + "learning_rate": 1.4755160603536055e-05, + "loss": 3.6035, + "step": 15330 + }, + { + "epoch": 0.18, + "learning_rate": 1.475053225955753e-05, + "loss": 1.4895, + "step": 15332 + }, + { + "epoch": 0.18, + "learning_rate": 1.4745903915579008e-05, + "loss": 4.164, + "step": 15334 + }, + { + "epoch": 0.18, + "learning_rate": 1.4741275571600483e-05, + "loss": 1.4289, + "step": 15336 + }, + { + "epoch": 0.18, + "learning_rate": 1.4736647227621958e-05, + "loss": 4.2977, + "step": 15338 + }, + { + "epoch": 0.18, + "learning_rate": 1.4732018883643434e-05, + "loss": 4.9745, + "step": 15340 + }, + { + "epoch": 0.18, + "learning_rate": 1.472739053966491e-05, + "loss": 1.4664, + "step": 15342 + }, + { + "epoch": 0.18, + "learning_rate": 1.4722762195686385e-05, + "loss": 6.3983, + "step": 15344 + }, + { + "epoch": 0.18, + "learning_rate": 1.471813385170786e-05, + "loss": 0.1655, + "step": 15346 + }, + { + "epoch": 0.18, + "learning_rate": 1.4713505507729336e-05, + "loss": 0.8937, + "step": 15348 + }, + { + "epoch": 0.18, + "learning_rate": 1.4708877163750811e-05, + "loss": 0.026, + "step": 15350 + }, + { + "epoch": 0.18, + "learning_rate": 1.4704248819772286e-05, + "loss": 0.001, + "step": 15352 + }, + { + "epoch": 0.18, + "learning_rate": 1.4699620475793764e-05, + "loss": 0.5701, + "step": 15354 + }, + { + "epoch": 0.18, + "learning_rate": 1.4694992131815239e-05, + "loss": 1.8426, + "step": 15356 + }, + { + "epoch": 0.18, + "learning_rate": 1.4690363787836714e-05, + "loss": 4.6585, + "step": 15358 + }, + { + "epoch": 0.18, + "learning_rate": 1.468573544385819e-05, + "loss": 3.1048, + "step": 15360 + }, + { + "epoch": 0.18, + "learning_rate": 1.4681107099879662e-05, + "loss": 2.3582, + "step": 15362 + }, + { + "epoch": 0.18, + "learning_rate": 1.4676478755901137e-05, + "loss": 0.0032, + "step": 15364 + }, + { + "epoch": 0.18, + "learning_rate": 1.4671850411922613e-05, + "loss": 1.4687, + "step": 15366 + }, + { + "epoch": 0.18, + "learning_rate": 1.466722206794409e-05, + "loss": 0.001, + "step": 15368 + }, + { + "epoch": 0.18, + "learning_rate": 1.4662593723965565e-05, + "loss": 4.0862, + "step": 15370 + }, + { + "epoch": 0.18, + "learning_rate": 1.465796537998704e-05, + "loss": 0.2431, + "step": 15372 + }, + { + "epoch": 0.18, + "learning_rate": 1.4653337036008516e-05, + "loss": 0.0169, + "step": 15374 + }, + { + "epoch": 0.18, + "learning_rate": 1.4648708692029992e-05, + "loss": 3.369, + "step": 15376 + }, + { + "epoch": 0.18, + "learning_rate": 1.4644080348051467e-05, + "loss": 2.2996, + "step": 15378 + }, + { + "epoch": 0.18, + "learning_rate": 1.4639452004072942e-05, + "loss": 0.5851, + "step": 15380 + }, + { + "epoch": 0.18, + "learning_rate": 1.4634823660094418e-05, + "loss": 1.1961, + "step": 15382 + }, + { + "epoch": 0.18, + "learning_rate": 1.4630195316115893e-05, + "loss": 3.1466, + "step": 15384 + }, + { + "epoch": 0.18, + "learning_rate": 1.4625566972137369e-05, + "loss": 1.0878, + "step": 15386 + }, + { + "epoch": 0.18, + "learning_rate": 1.4620938628158846e-05, + "loss": 2.5726, + "step": 15388 + }, + { + "epoch": 0.18, + "learning_rate": 1.4616310284180321e-05, + "loss": 1.3574, + "step": 15390 + }, + { + "epoch": 0.18, + "learning_rate": 1.4611681940201797e-05, + "loss": 0.9666, + "step": 15392 + }, + { + "epoch": 0.18, + "learning_rate": 1.4607053596223272e-05, + "loss": 0.5896, + "step": 15394 + }, + { + "epoch": 0.18, + "learning_rate": 1.4602425252244747e-05, + "loss": 0.0021, + "step": 15396 + }, + { + "epoch": 0.18, + "learning_rate": 1.4597796908266223e-05, + "loss": 1.5038, + "step": 15398 + }, + { + "epoch": 0.18, + "learning_rate": 1.4593168564287698e-05, + "loss": 0.0622, + "step": 15400 + }, + { + "epoch": 0.18, + "learning_rate": 1.4588540220309174e-05, + "loss": 0.873, + "step": 15402 + }, + { + "epoch": 0.18, + "learning_rate": 1.458391187633065e-05, + "loss": 2.4386, + "step": 15404 + }, + { + "epoch": 0.18, + "learning_rate": 1.4579283532352125e-05, + "loss": 5.1184, + "step": 15406 + }, + { + "epoch": 0.18, + "learning_rate": 1.4574655188373602e-05, + "loss": 7.5952, + "step": 15408 + }, + { + "epoch": 0.18, + "learning_rate": 1.4570026844395077e-05, + "loss": 0.007, + "step": 15410 + }, + { + "epoch": 0.18, + "learning_rate": 1.4565398500416553e-05, + "loss": 0.9466, + "step": 15412 + }, + { + "epoch": 0.18, + "learning_rate": 1.4560770156438028e-05, + "loss": 3.1908, + "step": 15414 + }, + { + "epoch": 0.18, + "learning_rate": 1.4556141812459503e-05, + "loss": 7.3198, + "step": 15416 + }, + { + "epoch": 0.18, + "learning_rate": 1.4551513468480979e-05, + "loss": 2.0221, + "step": 15418 + }, + { + "epoch": 0.18, + "learning_rate": 1.4546885124502454e-05, + "loss": 0.0004, + "step": 15420 + }, + { + "epoch": 0.18, + "learning_rate": 1.454225678052393e-05, + "loss": 0.0018, + "step": 15422 + }, + { + "epoch": 0.18, + "learning_rate": 1.4537628436545405e-05, + "loss": 0.0014, + "step": 15424 + }, + { + "epoch": 0.18, + "learning_rate": 1.453300009256688e-05, + "loss": 0.7562, + "step": 15426 + }, + { + "epoch": 0.18, + "learning_rate": 1.4528371748588358e-05, + "loss": 0.0006, + "step": 15428 + }, + { + "epoch": 0.18, + "learning_rate": 1.4523743404609833e-05, + "loss": 0.4712, + "step": 15430 + }, + { + "epoch": 0.18, + "learning_rate": 1.4519115060631309e-05, + "loss": 0.0033, + "step": 15432 + }, + { + "epoch": 0.18, + "learning_rate": 1.4514486716652784e-05, + "loss": 0.1669, + "step": 15434 + }, + { + "epoch": 0.18, + "learning_rate": 1.4509858372674256e-05, + "loss": 2.1042, + "step": 15436 + }, + { + "epoch": 0.18, + "learning_rate": 1.4505230028695731e-05, + "loss": 1.7426, + "step": 15438 + }, + { + "epoch": 0.18, + "learning_rate": 1.4500601684717209e-05, + "loss": 1.3541, + "step": 15440 + }, + { + "epoch": 0.18, + "learning_rate": 1.4495973340738684e-05, + "loss": 0.7745, + "step": 15442 + }, + { + "epoch": 0.18, + "learning_rate": 1.449134499676016e-05, + "loss": 0.6909, + "step": 15444 + }, + { + "epoch": 0.18, + "learning_rate": 1.4486716652781635e-05, + "loss": 0.4365, + "step": 15446 + }, + { + "epoch": 0.18, + "learning_rate": 1.448208830880311e-05, + "loss": 2.4793, + "step": 15448 + }, + { + "epoch": 0.18, + "learning_rate": 1.4477459964824586e-05, + "loss": 1.0311, + "step": 15450 + }, + { + "epoch": 0.18, + "learning_rate": 1.4472831620846061e-05, + "loss": 0.0002, + "step": 15452 + }, + { + "epoch": 0.18, + "learning_rate": 1.4468203276867536e-05, + "loss": 0.0003, + "step": 15454 + }, + { + "epoch": 0.18, + "learning_rate": 1.4463574932889012e-05, + "loss": 6.2751, + "step": 15456 + }, + { + "epoch": 0.18, + "learning_rate": 1.4458946588910487e-05, + "loss": 5.8581, + "step": 15458 + }, + { + "epoch": 0.18, + "learning_rate": 1.4454318244931964e-05, + "loss": 3.5022, + "step": 15460 + }, + { + "epoch": 0.18, + "learning_rate": 1.444968990095344e-05, + "loss": 0.0004, + "step": 15462 + }, + { + "epoch": 0.18, + "learning_rate": 1.4445061556974915e-05, + "loss": 0.0003, + "step": 15464 + }, + { + "epoch": 0.18, + "learning_rate": 1.444043321299639e-05, + "loss": 1.3267, + "step": 15466 + }, + { + "epoch": 0.18, + "learning_rate": 1.4435804869017866e-05, + "loss": 0.7139, + "step": 15468 + }, + { + "epoch": 0.18, + "learning_rate": 1.4431176525039342e-05, + "loss": 1.3044, + "step": 15470 + }, + { + "epoch": 0.18, + "learning_rate": 1.4426548181060817e-05, + "loss": 2.6736, + "step": 15472 + }, + { + "epoch": 0.18, + "learning_rate": 1.4421919837082292e-05, + "loss": 5.14, + "step": 15474 + }, + { + "epoch": 0.18, + "learning_rate": 1.4417291493103768e-05, + "loss": 0.2602, + "step": 15476 + }, + { + "epoch": 0.18, + "learning_rate": 1.4412663149125243e-05, + "loss": 0.2546, + "step": 15478 + }, + { + "epoch": 0.18, + "learning_rate": 1.440803480514672e-05, + "loss": 1.2572, + "step": 15480 + }, + { + "epoch": 0.18, + "learning_rate": 1.4403406461168196e-05, + "loss": 2.4543, + "step": 15482 + }, + { + "epoch": 0.18, + "learning_rate": 1.4398778117189671e-05, + "loss": 0.629, + "step": 15484 + }, + { + "epoch": 0.18, + "learning_rate": 1.4394149773211147e-05, + "loss": 4.9821, + "step": 15486 + }, + { + "epoch": 0.18, + "learning_rate": 1.4389521429232622e-05, + "loss": 1.0008, + "step": 15488 + }, + { + "epoch": 0.18, + "learning_rate": 1.4384893085254098e-05, + "loss": 1.0793, + "step": 15490 + }, + { + "epoch": 0.18, + "learning_rate": 1.4380264741275573e-05, + "loss": 0.0829, + "step": 15492 + }, + { + "epoch": 0.18, + "learning_rate": 1.4375636397297048e-05, + "loss": 4.233, + "step": 15494 + }, + { + "epoch": 0.18, + "learning_rate": 1.4371008053318524e-05, + "loss": 0.0005, + "step": 15496 + }, + { + "epoch": 0.18, + "learning_rate": 1.436637970934e-05, + "loss": 2.071, + "step": 15498 + }, + { + "epoch": 0.18, + "learning_rate": 1.4361751365361475e-05, + "loss": 0.042, + "step": 15500 + }, + { + "epoch": 0.18, + "learning_rate": 1.4357123021382952e-05, + "loss": 2.921, + "step": 15502 + }, + { + "epoch": 0.18, + "learning_rate": 1.4352494677404427e-05, + "loss": 2.0097, + "step": 15504 + }, + { + "epoch": 0.18, + "learning_rate": 1.4347866333425903e-05, + "loss": 5.3953, + "step": 15506 + }, + { + "epoch": 0.18, + "learning_rate": 1.4343237989447375e-05, + "loss": 1.2576, + "step": 15508 + }, + { + "epoch": 0.18, + "learning_rate": 1.433860964546885e-05, + "loss": 2.8586, + "step": 15510 + }, + { + "epoch": 0.18, + "learning_rate": 1.4333981301490325e-05, + "loss": 2.0089, + "step": 15512 + }, + { + "epoch": 0.18, + "learning_rate": 1.4329352957511803e-05, + "loss": 0.3712, + "step": 15514 + }, + { + "epoch": 0.18, + "learning_rate": 1.4324724613533278e-05, + "loss": 2.9465, + "step": 15516 + }, + { + "epoch": 0.18, + "learning_rate": 1.4320096269554753e-05, + "loss": 1.4289, + "step": 15518 + }, + { + "epoch": 0.18, + "learning_rate": 1.4315467925576229e-05, + "loss": 0.4502, + "step": 15520 + }, + { + "epoch": 0.18, + "learning_rate": 1.4310839581597704e-05, + "loss": 5.2281, + "step": 15522 + }, + { + "epoch": 0.18, + "learning_rate": 1.430621123761918e-05, + "loss": 1.0918, + "step": 15524 + }, + { + "epoch": 0.18, + "learning_rate": 1.4301582893640655e-05, + "loss": 0.9605, + "step": 15526 + }, + { + "epoch": 0.18, + "learning_rate": 1.429695454966213e-05, + "loss": 2.6894, + "step": 15528 + }, + { + "epoch": 0.18, + "learning_rate": 1.4292326205683606e-05, + "loss": 3.6249, + "step": 15530 + }, + { + "epoch": 0.18, + "learning_rate": 1.4287697861705081e-05, + "loss": 1.8281, + "step": 15532 + }, + { + "epoch": 0.18, + "learning_rate": 1.4283069517726559e-05, + "loss": 2.181, + "step": 15534 + }, + { + "epoch": 0.18, + "learning_rate": 1.4278441173748034e-05, + "loss": 0.8032, + "step": 15536 + }, + { + "epoch": 0.18, + "learning_rate": 1.427381282976951e-05, + "loss": 1.1125, + "step": 15538 + }, + { + "epoch": 0.18, + "learning_rate": 1.4269184485790985e-05, + "loss": 1.8308, + "step": 15540 + }, + { + "epoch": 0.18, + "learning_rate": 1.426455614181246e-05, + "loss": 2.2997, + "step": 15542 + }, + { + "epoch": 0.18, + "learning_rate": 1.4259927797833936e-05, + "loss": 0.9325, + "step": 15544 + }, + { + "epoch": 0.18, + "learning_rate": 1.4255299453855411e-05, + "loss": 1.4333, + "step": 15546 + }, + { + "epoch": 0.18, + "learning_rate": 1.4250671109876887e-05, + "loss": 2.7233, + "step": 15548 + }, + { + "epoch": 0.18, + "learning_rate": 1.4246042765898362e-05, + "loss": 0.5034, + "step": 15550 + }, + { + "epoch": 0.18, + "learning_rate": 1.4241414421919837e-05, + "loss": 2.8277, + "step": 15552 + }, + { + "epoch": 0.18, + "learning_rate": 1.4236786077941314e-05, + "loss": 2.4032, + "step": 15554 + }, + { + "epoch": 0.18, + "learning_rate": 1.423215773396279e-05, + "loss": 1.0395, + "step": 15556 + }, + { + "epoch": 0.18, + "learning_rate": 1.4227529389984265e-05, + "loss": 1.7913, + "step": 15558 + }, + { + "epoch": 0.18, + "learning_rate": 1.422290104600574e-05, + "loss": 0.0105, + "step": 15560 + }, + { + "epoch": 0.18, + "learning_rate": 1.4218272702027216e-05, + "loss": 0.1428, + "step": 15562 + }, + { + "epoch": 0.18, + "learning_rate": 1.4213644358048692e-05, + "loss": 0.2416, + "step": 15564 + }, + { + "epoch": 0.18, + "learning_rate": 1.4209016014070167e-05, + "loss": 0.554, + "step": 15566 + }, + { + "epoch": 0.18, + "learning_rate": 1.4204387670091642e-05, + "loss": 0.2534, + "step": 15568 + }, + { + "epoch": 0.18, + "learning_rate": 1.4199759326113118e-05, + "loss": 1.5591, + "step": 15570 + }, + { + "epoch": 0.18, + "learning_rate": 1.4195130982134593e-05, + "loss": 0.0018, + "step": 15572 + }, + { + "epoch": 0.18, + "learning_rate": 1.419050263815607e-05, + "loss": 4.878, + "step": 15574 + }, + { + "epoch": 0.18, + "learning_rate": 1.4185874294177546e-05, + "loss": 2.474, + "step": 15576 + }, + { + "epoch": 0.18, + "learning_rate": 1.4181245950199021e-05, + "loss": 1.6068, + "step": 15578 + }, + { + "epoch": 0.18, + "learning_rate": 1.4176617606220493e-05, + "loss": 6.4687, + "step": 15580 + }, + { + "epoch": 0.18, + "learning_rate": 1.4171989262241969e-05, + "loss": 4.7057, + "step": 15582 + }, + { + "epoch": 0.18, + "learning_rate": 1.4167360918263444e-05, + "loss": 0.6187, + "step": 15584 + }, + { + "epoch": 0.18, + "learning_rate": 1.416273257428492e-05, + "loss": 3.5633, + "step": 15586 + }, + { + "epoch": 0.18, + "learning_rate": 1.4158104230306397e-05, + "loss": 1.8648, + "step": 15588 + }, + { + "epoch": 0.18, + "learning_rate": 1.4153475886327872e-05, + "loss": 1.1538, + "step": 15590 + }, + { + "epoch": 0.18, + "learning_rate": 1.4148847542349348e-05, + "loss": 0.8771, + "step": 15592 + }, + { + "epoch": 0.18, + "learning_rate": 1.4144219198370823e-05, + "loss": 0.0012, + "step": 15594 + }, + { + "epoch": 0.18, + "learning_rate": 1.4139590854392298e-05, + "loss": 1.4604, + "step": 15596 + }, + { + "epoch": 0.18, + "learning_rate": 1.4134962510413774e-05, + "loss": 0.063, + "step": 15598 + }, + { + "epoch": 0.18, + "learning_rate": 1.413033416643525e-05, + "loss": 0.1224, + "step": 15600 + }, + { + "epoch": 0.18, + "learning_rate": 1.4125705822456725e-05, + "loss": 0.961, + "step": 15602 + }, + { + "epoch": 0.18, + "learning_rate": 1.41210774784782e-05, + "loss": 0.0003, + "step": 15604 + }, + { + "epoch": 0.18, + "learning_rate": 1.4116449134499676e-05, + "loss": 0.7512, + "step": 15606 + }, + { + "epoch": 0.18, + "learning_rate": 1.4111820790521153e-05, + "loss": 2.2854, + "step": 15608 + }, + { + "epoch": 0.18, + "learning_rate": 1.4107192446542628e-05, + "loss": 0.7176, + "step": 15610 + }, + { + "epoch": 0.18, + "learning_rate": 1.4102564102564104e-05, + "loss": 1.8438, + "step": 15612 + }, + { + "epoch": 0.18, + "learning_rate": 1.4097935758585579e-05, + "loss": 2.9494, + "step": 15614 + }, + { + "epoch": 0.18, + "learning_rate": 1.4093307414607054e-05, + "loss": 4.2931, + "step": 15616 + }, + { + "epoch": 0.18, + "learning_rate": 1.408867907062853e-05, + "loss": 0.005, + "step": 15618 + }, + { + "epoch": 0.18, + "learning_rate": 1.4084050726650005e-05, + "loss": 0.001, + "step": 15620 + }, + { + "epoch": 0.18, + "learning_rate": 1.407942238267148e-05, + "loss": 0.8712, + "step": 15622 + }, + { + "epoch": 0.18, + "learning_rate": 1.4074794038692956e-05, + "loss": 0.0006, + "step": 15624 + }, + { + "epoch": 0.18, + "learning_rate": 1.4070165694714431e-05, + "loss": 0.0037, + "step": 15626 + }, + { + "epoch": 0.18, + "learning_rate": 1.4065537350735909e-05, + "loss": 0.5691, + "step": 15628 + }, + { + "epoch": 0.18, + "learning_rate": 1.4060909006757384e-05, + "loss": 4.5427, + "step": 15630 + }, + { + "epoch": 0.18, + "learning_rate": 1.405628066277886e-05, + "loss": 0.3646, + "step": 15632 + }, + { + "epoch": 0.18, + "learning_rate": 1.4051652318800335e-05, + "loss": 1.7155, + "step": 15634 + }, + { + "epoch": 0.18, + "learning_rate": 1.404702397482181e-05, + "loss": 3.9707, + "step": 15636 + }, + { + "epoch": 0.18, + "learning_rate": 1.4042395630843286e-05, + "loss": 2.9425, + "step": 15638 + }, + { + "epoch": 0.18, + "learning_rate": 1.4037767286864761e-05, + "loss": 2.4522, + "step": 15640 + }, + { + "epoch": 0.18, + "learning_rate": 1.4033138942886237e-05, + "loss": 2.0615, + "step": 15642 + }, + { + "epoch": 0.18, + "learning_rate": 1.4028510598907712e-05, + "loss": 0.5035, + "step": 15644 + }, + { + "epoch": 0.18, + "learning_rate": 1.4023882254929187e-05, + "loss": 0.0009, + "step": 15646 + }, + { + "epoch": 0.18, + "learning_rate": 1.4019253910950665e-05, + "loss": 2.6186, + "step": 15648 + }, + { + "epoch": 0.18, + "learning_rate": 1.401462556697214e-05, + "loss": 0.8079, + "step": 15650 + }, + { + "epoch": 0.18, + "learning_rate": 1.4009997222993612e-05, + "loss": 1.7268, + "step": 15652 + }, + { + "epoch": 0.18, + "learning_rate": 1.4005368879015087e-05, + "loss": 6.8937, + "step": 15654 + }, + { + "epoch": 0.18, + "learning_rate": 1.4000740535036563e-05, + "loss": 2.0419, + "step": 15656 + }, + { + "epoch": 0.18, + "learning_rate": 1.3996112191058038e-05, + "loss": 1.668, + "step": 15658 + }, + { + "epoch": 0.18, + "learning_rate": 1.3991483847079515e-05, + "loss": 0.0003, + "step": 15660 + }, + { + "epoch": 0.18, + "learning_rate": 1.398685550310099e-05, + "loss": 0.6324, + "step": 15662 + }, + { + "epoch": 0.18, + "learning_rate": 1.3982227159122466e-05, + "loss": 0.0005, + "step": 15664 + }, + { + "epoch": 0.18, + "learning_rate": 1.3977598815143942e-05, + "loss": 4.147, + "step": 15666 + }, + { + "epoch": 0.18, + "learning_rate": 1.3972970471165417e-05, + "loss": 0.6323, + "step": 15668 + }, + { + "epoch": 0.18, + "learning_rate": 1.3968342127186893e-05, + "loss": 2.4483, + "step": 15670 + }, + { + "epoch": 0.18, + "learning_rate": 1.3963713783208368e-05, + "loss": 0.088, + "step": 15672 + }, + { + "epoch": 0.18, + "learning_rate": 1.3959085439229843e-05, + "loss": 0.2946, + "step": 15674 + }, + { + "epoch": 0.18, + "learning_rate": 1.3954457095251319e-05, + "loss": 0.0005, + "step": 15676 + }, + { + "epoch": 0.18, + "learning_rate": 1.3949828751272794e-05, + "loss": 0.0029, + "step": 15678 + }, + { + "epoch": 0.18, + "learning_rate": 1.394520040729427e-05, + "loss": 2.1261, + "step": 15680 + }, + { + "epoch": 0.18, + "learning_rate": 1.3940572063315747e-05, + "loss": 0.333, + "step": 15682 + }, + { + "epoch": 0.18, + "learning_rate": 1.3935943719337222e-05, + "loss": 0.0002, + "step": 15684 + }, + { + "epoch": 0.18, + "learning_rate": 1.3931315375358698e-05, + "loss": 0.0002, + "step": 15686 + }, + { + "epoch": 0.18, + "learning_rate": 1.3926687031380173e-05, + "loss": 0.0004, + "step": 15688 + }, + { + "epoch": 0.18, + "learning_rate": 1.3922058687401648e-05, + "loss": 0.0811, + "step": 15690 + }, + { + "epoch": 0.18, + "learning_rate": 1.3917430343423124e-05, + "loss": 3.4585, + "step": 15692 + }, + { + "epoch": 0.18, + "learning_rate": 1.39128019994446e-05, + "loss": 2.108, + "step": 15694 + }, + { + "epoch": 0.18, + "learning_rate": 1.3908173655466075e-05, + "loss": 0.7527, + "step": 15696 + }, + { + "epoch": 0.18, + "learning_rate": 1.390354531148755e-05, + "loss": 1.3462, + "step": 15698 + }, + { + "epoch": 0.18, + "learning_rate": 1.3898916967509026e-05, + "loss": 2.3154, + "step": 15700 + }, + { + "epoch": 0.18, + "learning_rate": 1.3894288623530503e-05, + "loss": 0.5342, + "step": 15702 + }, + { + "epoch": 0.18, + "learning_rate": 1.3889660279551978e-05, + "loss": 0.5448, + "step": 15704 + }, + { + "epoch": 0.18, + "learning_rate": 1.3885031935573454e-05, + "loss": 0.0001, + "step": 15706 + }, + { + "epoch": 0.18, + "learning_rate": 1.3880403591594929e-05, + "loss": 0.9911, + "step": 15708 + }, + { + "epoch": 0.18, + "learning_rate": 1.3875775247616404e-05, + "loss": 1.6248, + "step": 15710 + }, + { + "epoch": 0.18, + "learning_rate": 1.387114690363788e-05, + "loss": 9.7773, + "step": 15712 + }, + { + "epoch": 0.18, + "learning_rate": 1.3866518559659355e-05, + "loss": 0.0193, + "step": 15714 + }, + { + "epoch": 0.18, + "learning_rate": 1.386189021568083e-05, + "loss": 3.2518, + "step": 15716 + }, + { + "epoch": 0.18, + "learning_rate": 1.3857261871702306e-05, + "loss": 0.0338, + "step": 15718 + }, + { + "epoch": 0.18, + "learning_rate": 1.3852633527723782e-05, + "loss": 2.1929, + "step": 15720 + }, + { + "epoch": 0.18, + "learning_rate": 1.3848005183745259e-05, + "loss": 0.0002, + "step": 15722 + }, + { + "epoch": 0.18, + "learning_rate": 1.3843376839766734e-05, + "loss": 0.6903, + "step": 15724 + }, + { + "epoch": 0.18, + "learning_rate": 1.3838748495788206e-05, + "loss": 1.8321, + "step": 15726 + }, + { + "epoch": 0.18, + "learning_rate": 1.3834120151809682e-05, + "loss": 0.3929, + "step": 15728 + }, + { + "epoch": 0.18, + "learning_rate": 1.3829491807831157e-05, + "loss": 1.7359, + "step": 15730 + }, + { + "epoch": 0.18, + "learning_rate": 1.3824863463852632e-05, + "loss": 0.8298, + "step": 15732 + }, + { + "epoch": 0.18, + "learning_rate": 1.382023511987411e-05, + "loss": 3.5124, + "step": 15734 + }, + { + "epoch": 0.18, + "learning_rate": 1.3815606775895585e-05, + "loss": 0.0003, + "step": 15736 + }, + { + "epoch": 0.18, + "learning_rate": 1.381097843191706e-05, + "loss": 5.7601, + "step": 15738 + }, + { + "epoch": 0.18, + "learning_rate": 1.3806350087938536e-05, + "loss": 1.37, + "step": 15740 + }, + { + "epoch": 0.18, + "learning_rate": 1.3801721743960011e-05, + "loss": 4.3003, + "step": 15742 + }, + { + "epoch": 0.18, + "learning_rate": 1.3797093399981487e-05, + "loss": 1.6919, + "step": 15744 + }, + { + "epoch": 0.18, + "learning_rate": 1.3792465056002962e-05, + "loss": 0.0002, + "step": 15746 + }, + { + "epoch": 0.18, + "learning_rate": 1.3787836712024437e-05, + "loss": 0.2357, + "step": 15748 + }, + { + "epoch": 0.18, + "learning_rate": 1.3783208368045913e-05, + "loss": 0.121, + "step": 15750 + }, + { + "epoch": 0.18, + "learning_rate": 1.3778580024067388e-05, + "loss": 0.8663, + "step": 15752 + }, + { + "epoch": 0.18, + "learning_rate": 1.3773951680088865e-05, + "loss": 0.0026, + "step": 15754 + }, + { + "epoch": 0.18, + "learning_rate": 1.376932333611034e-05, + "loss": 6.2195, + "step": 15756 + }, + { + "epoch": 0.18, + "learning_rate": 1.3764694992131816e-05, + "loss": 3.873, + "step": 15758 + }, + { + "epoch": 0.18, + "learning_rate": 1.3760066648153292e-05, + "loss": 2.7381, + "step": 15760 + }, + { + "epoch": 0.18, + "learning_rate": 1.3755438304174767e-05, + "loss": 0.0004, + "step": 15762 + }, + { + "epoch": 0.18, + "learning_rate": 1.3750809960196243e-05, + "loss": 1.738, + "step": 15764 + }, + { + "epoch": 0.18, + "learning_rate": 1.3746181616217718e-05, + "loss": 0.1132, + "step": 15766 + }, + { + "epoch": 0.18, + "learning_rate": 1.3741553272239193e-05, + "loss": 0.6325, + "step": 15768 + }, + { + "epoch": 0.18, + "learning_rate": 1.3736924928260669e-05, + "loss": 2.3684, + "step": 15770 + }, + { + "epoch": 0.18, + "learning_rate": 1.3732296584282144e-05, + "loss": 2.2323, + "step": 15772 + }, + { + "epoch": 0.18, + "learning_rate": 1.3727668240303621e-05, + "loss": 0.0341, + "step": 15774 + }, + { + "epoch": 0.18, + "learning_rate": 1.3723039896325097e-05, + "loss": 2.6818, + "step": 15776 + }, + { + "epoch": 0.18, + "learning_rate": 1.3718411552346572e-05, + "loss": 5.4052, + "step": 15778 + }, + { + "epoch": 0.18, + "learning_rate": 1.3713783208368048e-05, + "loss": 2.1789, + "step": 15780 + }, + { + "epoch": 0.18, + "learning_rate": 1.3709154864389523e-05, + "loss": 5.1371, + "step": 15782 + }, + { + "epoch": 0.18, + "learning_rate": 1.3704526520410998e-05, + "loss": 0.7577, + "step": 15784 + }, + { + "epoch": 0.18, + "learning_rate": 1.3699898176432474e-05, + "loss": 0.0005, + "step": 15786 + }, + { + "epoch": 0.18, + "learning_rate": 1.369526983245395e-05, + "loss": 1.4599, + "step": 15788 + }, + { + "epoch": 0.18, + "learning_rate": 1.3690641488475425e-05, + "loss": 0.001, + "step": 15790 + }, + { + "epoch": 0.18, + "learning_rate": 1.36860131444969e-05, + "loss": 1.1588, + "step": 15792 + }, + { + "epoch": 0.18, + "learning_rate": 1.3681384800518377e-05, + "loss": 1.5208, + "step": 15794 + }, + { + "epoch": 0.18, + "learning_rate": 1.3676756456539853e-05, + "loss": 0.0029, + "step": 15796 + }, + { + "epoch": 0.18, + "learning_rate": 1.3672128112561325e-05, + "loss": 1.0548, + "step": 15798 + }, + { + "epoch": 0.18, + "learning_rate": 1.36674997685828e-05, + "loss": 0.6044, + "step": 15800 + }, + { + "epoch": 0.18, + "learning_rate": 1.3662871424604276e-05, + "loss": 0.9302, + "step": 15802 + }, + { + "epoch": 0.18, + "learning_rate": 1.3658243080625751e-05, + "loss": 3.4166, + "step": 15804 + }, + { + "epoch": 0.18, + "learning_rate": 1.3653614736647226e-05, + "loss": 2.5832, + "step": 15806 + }, + { + "epoch": 0.18, + "learning_rate": 1.3648986392668704e-05, + "loss": 0.0005, + "step": 15808 + }, + { + "epoch": 0.18, + "learning_rate": 1.3644358048690179e-05, + "loss": 1.918, + "step": 15810 + }, + { + "epoch": 0.18, + "learning_rate": 1.3639729704711654e-05, + "loss": 0.2688, + "step": 15812 + }, + { + "epoch": 0.18, + "learning_rate": 1.363510136073313e-05, + "loss": 1.7442, + "step": 15814 + }, + { + "epoch": 0.18, + "learning_rate": 1.3630473016754605e-05, + "loss": 0.0752, + "step": 15816 + }, + { + "epoch": 0.18, + "learning_rate": 1.362584467277608e-05, + "loss": 1.0415, + "step": 15818 + }, + { + "epoch": 0.18, + "learning_rate": 1.3621216328797556e-05, + "loss": 0.4182, + "step": 15820 + }, + { + "epoch": 0.18, + "learning_rate": 1.3616587984819032e-05, + "loss": 5.0892, + "step": 15822 + }, + { + "epoch": 0.18, + "learning_rate": 1.3611959640840507e-05, + "loss": 4.0952, + "step": 15824 + }, + { + "epoch": 0.18, + "learning_rate": 1.3607331296861982e-05, + "loss": 5.4301, + "step": 15826 + }, + { + "epoch": 0.18, + "learning_rate": 1.360270295288346e-05, + "loss": 2.0956, + "step": 15828 + }, + { + "epoch": 0.18, + "learning_rate": 1.3598074608904935e-05, + "loss": 8.2639, + "step": 15830 + }, + { + "epoch": 0.18, + "learning_rate": 1.359344626492641e-05, + "loss": 1.7535, + "step": 15832 + }, + { + "epoch": 0.18, + "learning_rate": 1.3588817920947886e-05, + "loss": 2.8819, + "step": 15834 + }, + { + "epoch": 0.18, + "learning_rate": 1.3584189576969361e-05, + "loss": 1.7211, + "step": 15836 + }, + { + "epoch": 0.18, + "learning_rate": 1.3579561232990837e-05, + "loss": 0.15, + "step": 15838 + }, + { + "epoch": 0.18, + "learning_rate": 1.3574932889012312e-05, + "loss": 0.7273, + "step": 15840 + }, + { + "epoch": 0.18, + "learning_rate": 1.3570304545033787e-05, + "loss": 2.0509, + "step": 15842 + }, + { + "epoch": 0.18, + "learning_rate": 1.3565676201055263e-05, + "loss": 1.164, + "step": 15844 + }, + { + "epoch": 0.18, + "learning_rate": 1.3561047857076738e-05, + "loss": 2.1521, + "step": 15846 + }, + { + "epoch": 0.18, + "learning_rate": 1.3556419513098215e-05, + "loss": 0.0058, + "step": 15848 + }, + { + "epoch": 0.18, + "learning_rate": 1.3551791169119691e-05, + "loss": 0.1912, + "step": 15850 + }, + { + "epoch": 0.18, + "learning_rate": 1.3547162825141166e-05, + "loss": 1.5257, + "step": 15852 + }, + { + "epoch": 0.18, + "learning_rate": 1.3542534481162642e-05, + "loss": 1.0621, + "step": 15854 + }, + { + "epoch": 0.18, + "learning_rate": 1.3537906137184117e-05, + "loss": 2.7447, + "step": 15856 + }, + { + "epoch": 0.18, + "learning_rate": 1.3533277793205593e-05, + "loss": 0.5258, + "step": 15858 + }, + { + "epoch": 0.18, + "learning_rate": 1.3528649449227068e-05, + "loss": 6.6669, + "step": 15860 + }, + { + "epoch": 0.18, + "learning_rate": 1.3524021105248543e-05, + "loss": 2.4953, + "step": 15862 + }, + { + "epoch": 0.18, + "learning_rate": 1.3519392761270019e-05, + "loss": 0.6499, + "step": 15864 + }, + { + "epoch": 0.18, + "learning_rate": 1.3514764417291494e-05, + "loss": 0.0024, + "step": 15866 + }, + { + "epoch": 0.18, + "learning_rate": 1.3510136073312971e-05, + "loss": 6.9857, + "step": 15868 + }, + { + "epoch": 0.18, + "learning_rate": 1.3505507729334443e-05, + "loss": 2.1838, + "step": 15870 + }, + { + "epoch": 0.18, + "learning_rate": 1.3500879385355919e-05, + "loss": 0.2249, + "step": 15872 + }, + { + "epoch": 0.18, + "learning_rate": 1.3496251041377394e-05, + "loss": 1.8471, + "step": 15874 + }, + { + "epoch": 0.18, + "learning_rate": 1.349162269739887e-05, + "loss": 0.2182, + "step": 15876 + }, + { + "epoch": 0.18, + "learning_rate": 1.3486994353420345e-05, + "loss": 2.1904, + "step": 15878 + }, + { + "epoch": 0.18, + "learning_rate": 1.348236600944182e-05, + "loss": 1.5118, + "step": 15880 + }, + { + "epoch": 0.18, + "learning_rate": 1.3477737665463298e-05, + "loss": 0.0024, + "step": 15882 + }, + { + "epoch": 0.18, + "learning_rate": 1.3473109321484773e-05, + "loss": 2.283, + "step": 15884 + }, + { + "epoch": 0.18, + "learning_rate": 1.3468480977506249e-05, + "loss": 0.003, + "step": 15886 + }, + { + "epoch": 0.18, + "learning_rate": 1.3463852633527724e-05, + "loss": 4.2903, + "step": 15888 + }, + { + "epoch": 0.18, + "learning_rate": 1.34592242895492e-05, + "loss": 3.3813, + "step": 15890 + }, + { + "epoch": 0.18, + "learning_rate": 1.3454595945570675e-05, + "loss": 1.3725, + "step": 15892 + }, + { + "epoch": 0.18, + "learning_rate": 1.344996760159215e-05, + "loss": 0.0016, + "step": 15894 + }, + { + "epoch": 0.18, + "learning_rate": 1.3445339257613626e-05, + "loss": 0.2818, + "step": 15896 + }, + { + "epoch": 0.18, + "learning_rate": 1.3440710913635101e-05, + "loss": 0.0524, + "step": 15898 + }, + { + "epoch": 0.18, + "learning_rate": 1.3436082569656576e-05, + "loss": 0.0028, + "step": 15900 + }, + { + "epoch": 0.18, + "learning_rate": 1.3431454225678054e-05, + "loss": 2.2432, + "step": 15902 + }, + { + "epoch": 0.18, + "learning_rate": 1.3426825881699529e-05, + "loss": 1.8349, + "step": 15904 + }, + { + "epoch": 0.18, + "learning_rate": 1.3422197537721004e-05, + "loss": 2.2411, + "step": 15906 + }, + { + "epoch": 0.18, + "learning_rate": 1.341756919374248e-05, + "loss": 1.285, + "step": 15908 + }, + { + "epoch": 0.18, + "learning_rate": 1.3412940849763955e-05, + "loss": 4.3582, + "step": 15910 + }, + { + "epoch": 0.18, + "learning_rate": 1.340831250578543e-05, + "loss": 1.7554, + "step": 15912 + }, + { + "epoch": 0.18, + "learning_rate": 1.3403684161806906e-05, + "loss": 3.1451, + "step": 15914 + }, + { + "epoch": 0.18, + "learning_rate": 1.3399055817828382e-05, + "loss": 5.1497, + "step": 15916 + }, + { + "epoch": 0.18, + "learning_rate": 1.3394427473849857e-05, + "loss": 0.0003, + "step": 15918 + }, + { + "epoch": 0.18, + "learning_rate": 1.3389799129871332e-05, + "loss": 0.0222, + "step": 15920 + }, + { + "epoch": 0.18, + "learning_rate": 1.338517078589281e-05, + "loss": 0.4647, + "step": 15922 + }, + { + "epoch": 0.18, + "learning_rate": 1.3380542441914285e-05, + "loss": 1.0441, + "step": 15924 + }, + { + "epoch": 0.18, + "learning_rate": 1.337591409793576e-05, + "loss": 3.6885, + "step": 15926 + }, + { + "epoch": 0.18, + "learning_rate": 1.3371285753957236e-05, + "loss": 4.63, + "step": 15928 + }, + { + "epoch": 0.18, + "learning_rate": 1.3366657409978711e-05, + "loss": 0.8465, + "step": 15930 + }, + { + "epoch": 0.18, + "learning_rate": 1.3362029066000187e-05, + "loss": 7.1532, + "step": 15932 + }, + { + "epoch": 0.18, + "learning_rate": 1.3357400722021662e-05, + "loss": 0.7371, + "step": 15934 + }, + { + "epoch": 0.18, + "learning_rate": 1.3352772378043138e-05, + "loss": 1.3584, + "step": 15936 + }, + { + "epoch": 0.18, + "learning_rate": 1.3348144034064613e-05, + "loss": 1.4629, + "step": 15938 + }, + { + "epoch": 0.18, + "learning_rate": 1.3343515690086088e-05, + "loss": 1.2693, + "step": 15940 + }, + { + "epoch": 0.18, + "learning_rate": 1.3338887346107562e-05, + "loss": 0.5137, + "step": 15942 + }, + { + "epoch": 0.18, + "learning_rate": 1.3334259002129038e-05, + "loss": 1.9534, + "step": 15944 + }, + { + "epoch": 0.18, + "learning_rate": 1.3329630658150513e-05, + "loss": 0.1781, + "step": 15946 + }, + { + "epoch": 0.18, + "learning_rate": 1.3325002314171988e-05, + "loss": 0.0004, + "step": 15948 + }, + { + "epoch": 0.18, + "learning_rate": 1.3320373970193464e-05, + "loss": 5.9152, + "step": 15950 + }, + { + "epoch": 0.18, + "learning_rate": 1.331574562621494e-05, + "loss": 0.0096, + "step": 15952 + }, + { + "epoch": 0.18, + "learning_rate": 1.3311117282236416e-05, + "loss": 0.4117, + "step": 15954 + }, + { + "epoch": 0.18, + "learning_rate": 1.3306488938257892e-05, + "loss": 3.2104, + "step": 15956 + }, + { + "epoch": 0.18, + "learning_rate": 1.3301860594279367e-05, + "loss": 5.098, + "step": 15958 + }, + { + "epoch": 0.18, + "learning_rate": 1.3297232250300843e-05, + "loss": 2.4288, + "step": 15960 + }, + { + "epoch": 0.18, + "learning_rate": 1.3292603906322318e-05, + "loss": 0.0799, + "step": 15962 + }, + { + "epoch": 0.18, + "learning_rate": 1.3287975562343793e-05, + "loss": 0.0017, + "step": 15964 + }, + { + "epoch": 0.18, + "learning_rate": 1.3283347218365269e-05, + "loss": 1.1921, + "step": 15966 + }, + { + "epoch": 0.18, + "learning_rate": 1.3278718874386744e-05, + "loss": 1.2666, + "step": 15968 + }, + { + "epoch": 0.18, + "learning_rate": 1.327409053040822e-05, + "loss": 0.185, + "step": 15970 + }, + { + "epoch": 0.18, + "learning_rate": 1.3269462186429695e-05, + "loss": 1.2516, + "step": 15972 + }, + { + "epoch": 0.18, + "learning_rate": 1.3264833842451172e-05, + "loss": 0.0017, + "step": 15974 + }, + { + "epoch": 0.18, + "learning_rate": 1.3260205498472648e-05, + "loss": 2.2354, + "step": 15976 + }, + { + "epoch": 0.18, + "learning_rate": 1.3255577154494123e-05, + "loss": 0.0082, + "step": 15978 + }, + { + "epoch": 0.18, + "learning_rate": 1.3250948810515599e-05, + "loss": 0.1558, + "step": 15980 + }, + { + "epoch": 0.18, + "learning_rate": 1.3246320466537074e-05, + "loss": 4.6608, + "step": 15982 + }, + { + "epoch": 0.18, + "learning_rate": 1.324169212255855e-05, + "loss": 5.5011, + "step": 15984 + }, + { + "epoch": 0.18, + "learning_rate": 1.3237063778580025e-05, + "loss": 5.771, + "step": 15986 + }, + { + "epoch": 0.18, + "learning_rate": 1.32324354346015e-05, + "loss": 0.0231, + "step": 15988 + }, + { + "epoch": 0.18, + "learning_rate": 1.3227807090622976e-05, + "loss": 0.2771, + "step": 15990 + }, + { + "epoch": 0.18, + "learning_rate": 1.3223178746644451e-05, + "loss": 0.0056, + "step": 15992 + }, + { + "epoch": 0.18, + "learning_rate": 1.3218550402665927e-05, + "loss": 2.3276, + "step": 15994 + }, + { + "epoch": 0.18, + "learning_rate": 1.3213922058687404e-05, + "loss": 0.1362, + "step": 15996 + }, + { + "epoch": 0.18, + "learning_rate": 1.3209293714708879e-05, + "loss": 3.909, + "step": 15998 + }, + { + "epoch": 0.18, + "learning_rate": 1.3204665370730355e-05, + "loss": 0.0016, + "step": 16000 + }, + { + "epoch": 0.18, + "learning_rate": 1.320003702675183e-05, + "loss": 5.3176, + "step": 16002 + }, + { + "epoch": 0.18, + "learning_rate": 1.3195408682773305e-05, + "loss": 5.2605, + "step": 16004 + }, + { + "epoch": 0.18, + "learning_rate": 1.319078033879478e-05, + "loss": 3.5027, + "step": 16006 + }, + { + "epoch": 0.18, + "learning_rate": 1.3186151994816256e-05, + "loss": 2.2293, + "step": 16008 + }, + { + "epoch": 0.18, + "learning_rate": 1.3181523650837732e-05, + "loss": 1.5591, + "step": 16010 + }, + { + "epoch": 0.18, + "learning_rate": 1.3176895306859207e-05, + "loss": 0.6815, + "step": 16012 + }, + { + "epoch": 0.18, + "learning_rate": 1.3172266962880682e-05, + "loss": 0.0021, + "step": 16014 + }, + { + "epoch": 0.18, + "learning_rate": 1.3167638618902156e-05, + "loss": 8.9284, + "step": 16016 + }, + { + "epoch": 0.18, + "learning_rate": 1.3163010274923632e-05, + "loss": 1.3165, + "step": 16018 + }, + { + "epoch": 0.18, + "learning_rate": 1.3158381930945107e-05, + "loss": 1.7395, + "step": 16020 + }, + { + "epoch": 0.18, + "learning_rate": 1.3153753586966582e-05, + "loss": 1.0839, + "step": 16022 + }, + { + "epoch": 0.18, + "learning_rate": 1.3149125242988058e-05, + "loss": 1.2738, + "step": 16024 + }, + { + "epoch": 0.18, + "learning_rate": 1.3144496899009533e-05, + "loss": 0.9717, + "step": 16026 + }, + { + "epoch": 0.18, + "learning_rate": 1.313986855503101e-05, + "loss": 0.6932, + "step": 16028 + }, + { + "epoch": 0.18, + "learning_rate": 1.3135240211052486e-05, + "loss": 1.1069, + "step": 16030 + }, + { + "epoch": 0.18, + "learning_rate": 1.3130611867073961e-05, + "loss": 3.4873, + "step": 16032 + }, + { + "epoch": 0.18, + "learning_rate": 1.3125983523095437e-05, + "loss": 1.0881, + "step": 16034 + }, + { + "epoch": 0.18, + "learning_rate": 1.3121355179116912e-05, + "loss": 1.9712, + "step": 16036 + }, + { + "epoch": 0.18, + "learning_rate": 1.3116726835138388e-05, + "loss": 5.8077, + "step": 16038 + }, + { + "epoch": 0.18, + "learning_rate": 1.3112098491159863e-05, + "loss": 1.0496, + "step": 16040 + }, + { + "epoch": 0.18, + "learning_rate": 1.3107470147181338e-05, + "loss": 2.3881, + "step": 16042 + }, + { + "epoch": 0.18, + "learning_rate": 1.3102841803202814e-05, + "loss": 1.4778, + "step": 16044 + }, + { + "epoch": 0.18, + "learning_rate": 1.309821345922429e-05, + "loss": 0.3003, + "step": 16046 + }, + { + "epoch": 0.18, + "learning_rate": 1.3093585115245766e-05, + "loss": 0.0007, + "step": 16048 + }, + { + "epoch": 0.18, + "learning_rate": 1.3088956771267242e-05, + "loss": 2.2471, + "step": 16050 + }, + { + "epoch": 0.18, + "learning_rate": 1.3084328427288717e-05, + "loss": 1.6322, + "step": 16052 + }, + { + "epoch": 0.18, + "learning_rate": 1.3079700083310193e-05, + "loss": 2.1509, + "step": 16054 + }, + { + "epoch": 0.18, + "learning_rate": 1.3075071739331668e-05, + "loss": 1.1375, + "step": 16056 + }, + { + "epoch": 0.18, + "learning_rate": 1.3070443395353144e-05, + "loss": 2.5298, + "step": 16058 + }, + { + "epoch": 0.18, + "learning_rate": 1.3065815051374619e-05, + "loss": 1.3087, + "step": 16060 + }, + { + "epoch": 0.19, + "learning_rate": 1.3061186707396094e-05, + "loss": 0.763, + "step": 16062 + }, + { + "epoch": 0.19, + "learning_rate": 1.305655836341757e-05, + "loss": 1.0048, + "step": 16064 + }, + { + "epoch": 0.19, + "learning_rate": 1.3051930019439045e-05, + "loss": 2.2645, + "step": 16066 + }, + { + "epoch": 0.19, + "learning_rate": 1.3047301675460522e-05, + "loss": 1.8442, + "step": 16068 + }, + { + "epoch": 0.19, + "learning_rate": 1.3042673331481998e-05, + "loss": 0.5738, + "step": 16070 + }, + { + "epoch": 0.19, + "learning_rate": 1.3038044987503473e-05, + "loss": 2.7756, + "step": 16072 + }, + { + "epoch": 0.19, + "learning_rate": 1.3033416643524949e-05, + "loss": 1.6373, + "step": 16074 + }, + { + "epoch": 0.19, + "learning_rate": 1.3028788299546424e-05, + "loss": 1.2451, + "step": 16076 + }, + { + "epoch": 0.19, + "learning_rate": 1.30241599555679e-05, + "loss": 2.0081, + "step": 16078 + }, + { + "epoch": 0.19, + "learning_rate": 1.3019531611589375e-05, + "loss": 2.2573, + "step": 16080 + }, + { + "epoch": 0.19, + "learning_rate": 1.301490326761085e-05, + "loss": 1.9359, + "step": 16082 + }, + { + "epoch": 0.19, + "learning_rate": 1.3010274923632326e-05, + "loss": 4.5271, + "step": 16084 + }, + { + "epoch": 0.19, + "learning_rate": 1.3005646579653801e-05, + "loss": 3.0531, + "step": 16086 + }, + { + "epoch": 0.19, + "learning_rate": 1.3001018235675275e-05, + "loss": 3.2231, + "step": 16088 + }, + { + "epoch": 0.19, + "learning_rate": 1.299638989169675e-05, + "loss": 6.7481, + "step": 16090 + }, + { + "epoch": 0.19, + "learning_rate": 1.2991761547718226e-05, + "loss": 2.171, + "step": 16092 + }, + { + "epoch": 0.19, + "learning_rate": 1.2987133203739701e-05, + "loss": 1.0035, + "step": 16094 + }, + { + "epoch": 0.19, + "learning_rate": 1.2982504859761177e-05, + "loss": 2.182, + "step": 16096 + }, + { + "epoch": 0.19, + "learning_rate": 1.2977876515782652e-05, + "loss": 1.3414, + "step": 16098 + }, + { + "epoch": 0.19, + "learning_rate": 1.2973248171804127e-05, + "loss": 2.2192, + "step": 16100 + }, + { + "epoch": 0.19, + "learning_rate": 1.2968619827825605e-05, + "loss": 0.0075, + "step": 16102 + }, + { + "epoch": 0.19, + "learning_rate": 1.296399148384708e-05, + "loss": 2.0304, + "step": 16104 + }, + { + "epoch": 0.19, + "learning_rate": 1.2959363139868555e-05, + "loss": 3.2822, + "step": 16106 + }, + { + "epoch": 0.19, + "learning_rate": 1.295473479589003e-05, + "loss": 1.4108, + "step": 16108 + }, + { + "epoch": 0.19, + "learning_rate": 1.2950106451911506e-05, + "loss": 2.1248, + "step": 16110 + }, + { + "epoch": 0.19, + "learning_rate": 1.2945478107932982e-05, + "loss": 0.8374, + "step": 16112 + }, + { + "epoch": 0.19, + "learning_rate": 1.2940849763954457e-05, + "loss": 1.8325, + "step": 16114 + }, + { + "epoch": 0.19, + "learning_rate": 1.2936221419975933e-05, + "loss": 0.182, + "step": 16116 + }, + { + "epoch": 0.19, + "learning_rate": 1.2931593075997408e-05, + "loss": 2.7985, + "step": 16118 + }, + { + "epoch": 0.19, + "learning_rate": 1.2926964732018883e-05, + "loss": 1.4231, + "step": 16120 + }, + { + "epoch": 0.19, + "learning_rate": 1.292233638804036e-05, + "loss": 0.8151, + "step": 16122 + }, + { + "epoch": 0.19, + "learning_rate": 1.2917708044061836e-05, + "loss": 6.6907, + "step": 16124 + }, + { + "epoch": 0.19, + "learning_rate": 1.2913079700083311e-05, + "loss": 1.3617, + "step": 16126 + }, + { + "epoch": 0.19, + "learning_rate": 1.2908451356104787e-05, + "loss": 0.2355, + "step": 16128 + }, + { + "epoch": 0.19, + "learning_rate": 1.2903823012126262e-05, + "loss": 1.4244, + "step": 16130 + }, + { + "epoch": 0.19, + "learning_rate": 1.2899194668147738e-05, + "loss": 2.9212, + "step": 16132 + }, + { + "epoch": 0.19, + "learning_rate": 1.2894566324169213e-05, + "loss": 1.1398, + "step": 16134 + }, + { + "epoch": 0.19, + "learning_rate": 1.2889937980190688e-05, + "loss": 1.2627, + "step": 16136 + }, + { + "epoch": 0.19, + "learning_rate": 1.2885309636212164e-05, + "loss": 3.4292, + "step": 16138 + }, + { + "epoch": 0.19, + "learning_rate": 1.288068129223364e-05, + "loss": 2.6377, + "step": 16140 + }, + { + "epoch": 0.19, + "learning_rate": 1.2876052948255116e-05, + "loss": 0.0656, + "step": 16142 + }, + { + "epoch": 0.19, + "learning_rate": 1.2871424604276592e-05, + "loss": 3.7439, + "step": 16144 + }, + { + "epoch": 0.19, + "learning_rate": 1.2866796260298067e-05, + "loss": 1.91, + "step": 16146 + }, + { + "epoch": 0.19, + "learning_rate": 1.2862167916319543e-05, + "loss": 2.9145, + "step": 16148 + }, + { + "epoch": 0.19, + "learning_rate": 1.2857539572341018e-05, + "loss": 0.1616, + "step": 16150 + }, + { + "epoch": 0.19, + "learning_rate": 1.2852911228362494e-05, + "loss": 0.6317, + "step": 16152 + }, + { + "epoch": 0.19, + "learning_rate": 1.2848282884383969e-05, + "loss": 5.9546, + "step": 16154 + }, + { + "epoch": 0.19, + "learning_rate": 1.2843654540405444e-05, + "loss": 0.132, + "step": 16156 + }, + { + "epoch": 0.19, + "learning_rate": 1.283902619642692e-05, + "loss": 0.1423, + "step": 16158 + }, + { + "epoch": 0.19, + "learning_rate": 1.2834397852448394e-05, + "loss": 0.6536, + "step": 16160 + }, + { + "epoch": 0.19, + "learning_rate": 1.2829769508469869e-05, + "loss": 0.0051, + "step": 16162 + }, + { + "epoch": 0.19, + "learning_rate": 1.2825141164491344e-05, + "loss": 7.444, + "step": 16164 + }, + { + "epoch": 0.19, + "learning_rate": 1.282051282051282e-05, + "loss": 4.8365, + "step": 16166 + }, + { + "epoch": 0.19, + "learning_rate": 1.2815884476534295e-05, + "loss": 1.6826, + "step": 16168 + }, + { + "epoch": 0.19, + "learning_rate": 1.281125613255577e-05, + "loss": 0.382, + "step": 16170 + }, + { + "epoch": 0.19, + "learning_rate": 1.2806627788577246e-05, + "loss": 5.7161, + "step": 16172 + }, + { + "epoch": 0.19, + "learning_rate": 1.2801999444598722e-05, + "loss": 3.6571, + "step": 16174 + }, + { + "epoch": 0.19, + "learning_rate": 1.2797371100620199e-05, + "loss": 4.0811, + "step": 16176 + }, + { + "epoch": 0.19, + "learning_rate": 1.2792742756641674e-05, + "loss": 0.7795, + "step": 16178 + }, + { + "epoch": 0.19, + "learning_rate": 1.278811441266315e-05, + "loss": 2.6022, + "step": 16180 + }, + { + "epoch": 0.19, + "learning_rate": 1.2783486068684625e-05, + "loss": 0.3845, + "step": 16182 + }, + { + "epoch": 0.19, + "learning_rate": 1.27788577247061e-05, + "loss": 5.0095, + "step": 16184 + }, + { + "epoch": 0.19, + "learning_rate": 1.2774229380727576e-05, + "loss": 3.7153, + "step": 16186 + }, + { + "epoch": 0.19, + "learning_rate": 1.2769601036749051e-05, + "loss": 0.0018, + "step": 16188 + }, + { + "epoch": 0.19, + "learning_rate": 1.2764972692770527e-05, + "loss": 2.5138, + "step": 16190 + }, + { + "epoch": 0.19, + "learning_rate": 1.2760344348792002e-05, + "loss": 0.0343, + "step": 16192 + }, + { + "epoch": 0.19, + "learning_rate": 1.2755716004813477e-05, + "loss": 2.9664, + "step": 16194 + }, + { + "epoch": 0.19, + "learning_rate": 1.2751087660834955e-05, + "loss": 2.5488, + "step": 16196 + }, + { + "epoch": 0.19, + "learning_rate": 1.274645931685643e-05, + "loss": 7.4165, + "step": 16198 + }, + { + "epoch": 0.19, + "learning_rate": 1.2741830972877905e-05, + "loss": 1.2041, + "step": 16200 + }, + { + "epoch": 0.19, + "learning_rate": 1.2737202628899381e-05, + "loss": 0.5951, + "step": 16202 + }, + { + "epoch": 0.19, + "learning_rate": 1.2732574284920856e-05, + "loss": 5.3302, + "step": 16204 + }, + { + "epoch": 0.19, + "learning_rate": 1.2727945940942332e-05, + "loss": 0.0444, + "step": 16206 + }, + { + "epoch": 0.19, + "learning_rate": 1.2723317596963807e-05, + "loss": 0.0047, + "step": 16208 + }, + { + "epoch": 0.19, + "learning_rate": 1.2718689252985283e-05, + "loss": 5.362, + "step": 16210 + }, + { + "epoch": 0.19, + "learning_rate": 1.2714060909006758e-05, + "loss": 0.0131, + "step": 16212 + }, + { + "epoch": 0.19, + "learning_rate": 1.2709432565028233e-05, + "loss": 5.2705, + "step": 16214 + }, + { + "epoch": 0.19, + "learning_rate": 1.270480422104971e-05, + "loss": 1.1378, + "step": 16216 + }, + { + "epoch": 0.19, + "learning_rate": 1.2700175877071186e-05, + "loss": 0.0027, + "step": 16218 + }, + { + "epoch": 0.19, + "learning_rate": 1.2695547533092661e-05, + "loss": 0.1544, + "step": 16220 + }, + { + "epoch": 0.19, + "learning_rate": 1.2690919189114137e-05, + "loss": 2.0515, + "step": 16222 + }, + { + "epoch": 0.19, + "learning_rate": 1.2686290845135612e-05, + "loss": 1.4813, + "step": 16224 + }, + { + "epoch": 0.19, + "learning_rate": 1.2681662501157088e-05, + "loss": 0.4557, + "step": 16226 + }, + { + "epoch": 0.19, + "learning_rate": 1.2677034157178563e-05, + "loss": 0.0069, + "step": 16228 + }, + { + "epoch": 0.19, + "learning_rate": 1.2672405813200038e-05, + "loss": 0.3617, + "step": 16230 + }, + { + "epoch": 0.19, + "learning_rate": 1.2667777469221512e-05, + "loss": 1.0313, + "step": 16232 + }, + { + "epoch": 0.19, + "learning_rate": 1.2663149125242988e-05, + "loss": 0.0036, + "step": 16234 + }, + { + "epoch": 0.19, + "learning_rate": 1.2658520781264463e-05, + "loss": 2.0536, + "step": 16236 + }, + { + "epoch": 0.19, + "learning_rate": 1.2653892437285938e-05, + "loss": 0.709, + "step": 16238 + }, + { + "epoch": 0.19, + "learning_rate": 1.2649264093307414e-05, + "loss": 1.2628, + "step": 16240 + }, + { + "epoch": 0.19, + "learning_rate": 1.264463574932889e-05, + "loss": 1.6647, + "step": 16242 + }, + { + "epoch": 0.19, + "learning_rate": 1.2640007405350365e-05, + "loss": 0.0007, + "step": 16244 + }, + { + "epoch": 0.19, + "learning_rate": 1.263537906137184e-05, + "loss": 0.0006, + "step": 16246 + }, + { + "epoch": 0.19, + "learning_rate": 1.2630750717393317e-05, + "loss": 0.4192, + "step": 16248 + }, + { + "epoch": 0.19, + "learning_rate": 1.2626122373414793e-05, + "loss": 0.3129, + "step": 16250 + }, + { + "epoch": 0.19, + "learning_rate": 1.2621494029436268e-05, + "loss": 2.0106, + "step": 16252 + }, + { + "epoch": 0.19, + "learning_rate": 1.2616865685457744e-05, + "loss": 0.0012, + "step": 16254 + }, + { + "epoch": 0.19, + "learning_rate": 1.2612237341479219e-05, + "loss": 0.0012, + "step": 16256 + }, + { + "epoch": 0.19, + "learning_rate": 1.2607608997500694e-05, + "loss": 1.7085, + "step": 16258 + }, + { + "epoch": 0.19, + "learning_rate": 1.260298065352217e-05, + "loss": 1.1681, + "step": 16260 + }, + { + "epoch": 0.19, + "learning_rate": 1.2598352309543645e-05, + "loss": 0.0013, + "step": 16262 + }, + { + "epoch": 0.19, + "learning_rate": 1.259372396556512e-05, + "loss": 0.6368, + "step": 16264 + }, + { + "epoch": 0.19, + "learning_rate": 1.2589095621586596e-05, + "loss": 2.5544, + "step": 16266 + }, + { + "epoch": 0.19, + "learning_rate": 1.2584467277608073e-05, + "loss": 1.3257, + "step": 16268 + }, + { + "epoch": 0.19, + "learning_rate": 1.2579838933629549e-05, + "loss": 0.0749, + "step": 16270 + }, + { + "epoch": 0.19, + "learning_rate": 1.2575210589651024e-05, + "loss": 2.3704, + "step": 16272 + }, + { + "epoch": 0.19, + "learning_rate": 1.25705822456725e-05, + "loss": 1.2646, + "step": 16274 + }, + { + "epoch": 0.19, + "learning_rate": 1.2565953901693975e-05, + "loss": 3.0724, + "step": 16276 + }, + { + "epoch": 0.19, + "learning_rate": 1.256132555771545e-05, + "loss": 1.9161, + "step": 16278 + }, + { + "epoch": 0.19, + "learning_rate": 1.2556697213736926e-05, + "loss": 1.2891, + "step": 16280 + }, + { + "epoch": 0.19, + "learning_rate": 1.2552068869758401e-05, + "loss": 0.9119, + "step": 16282 + }, + { + "epoch": 0.19, + "learning_rate": 1.2547440525779877e-05, + "loss": 0.0072, + "step": 16284 + }, + { + "epoch": 0.19, + "learning_rate": 1.2542812181801352e-05, + "loss": 0.0004, + "step": 16286 + }, + { + "epoch": 0.19, + "learning_rate": 1.253818383782283e-05, + "loss": 1.7973, + "step": 16288 + }, + { + "epoch": 0.19, + "learning_rate": 1.2533555493844305e-05, + "loss": 4.8028, + "step": 16290 + }, + { + "epoch": 0.19, + "learning_rate": 1.252892714986578e-05, + "loss": 1.171, + "step": 16292 + }, + { + "epoch": 0.19, + "learning_rate": 1.2524298805887255e-05, + "loss": 3.5493, + "step": 16294 + }, + { + "epoch": 0.19, + "learning_rate": 1.2519670461908731e-05, + "loss": 0.0005, + "step": 16296 + }, + { + "epoch": 0.19, + "learning_rate": 1.2515042117930206e-05, + "loss": 0.453, + "step": 16298 + }, + { + "epoch": 0.19, + "learning_rate": 1.2510413773951682e-05, + "loss": 0.3644, + "step": 16300 + }, + { + "epoch": 0.19, + "learning_rate": 1.2505785429973157e-05, + "loss": 2.8214, + "step": 16302 + }, + { + "epoch": 0.19, + "learning_rate": 1.2501157085994633e-05, + "loss": 2.7972, + "step": 16304 + }, + { + "epoch": 0.19, + "learning_rate": 1.2496528742016108e-05, + "loss": 0.1817, + "step": 16306 + }, + { + "epoch": 0.19, + "learning_rate": 1.2491900398037583e-05, + "loss": 0.717, + "step": 16308 + }, + { + "epoch": 0.19, + "learning_rate": 1.2487272054059059e-05, + "loss": 2.1301, + "step": 16310 + }, + { + "epoch": 0.19, + "learning_rate": 1.2482643710080534e-05, + "loss": 0.6788, + "step": 16312 + }, + { + "epoch": 0.19, + "learning_rate": 1.247801536610201e-05, + "loss": 2.0209, + "step": 16314 + }, + { + "epoch": 0.19, + "learning_rate": 1.2473387022123485e-05, + "loss": 2.4101, + "step": 16316 + }, + { + "epoch": 0.19, + "learning_rate": 1.246875867814496e-05, + "loss": 1.9066, + "step": 16318 + }, + { + "epoch": 0.19, + "learning_rate": 1.2464130334166436e-05, + "loss": 1.5182, + "step": 16320 + }, + { + "epoch": 0.19, + "learning_rate": 1.2459501990187911e-05, + "loss": 0.3171, + "step": 16322 + }, + { + "epoch": 0.19, + "learning_rate": 1.2454873646209387e-05, + "loss": 0.0004, + "step": 16324 + }, + { + "epoch": 0.19, + "learning_rate": 1.2450245302230862e-05, + "loss": 1.8526, + "step": 16326 + }, + { + "epoch": 0.19, + "learning_rate": 1.2445616958252338e-05, + "loss": 0.1933, + "step": 16328 + }, + { + "epoch": 0.19, + "learning_rate": 1.2440988614273813e-05, + "loss": 0.7673, + "step": 16330 + }, + { + "epoch": 0.19, + "learning_rate": 1.2436360270295289e-05, + "loss": 2.2558, + "step": 16332 + }, + { + "epoch": 0.19, + "learning_rate": 1.2431731926316764e-05, + "loss": 2.5224, + "step": 16334 + }, + { + "epoch": 0.19, + "learning_rate": 1.242710358233824e-05, + "loss": 1.3792, + "step": 16336 + }, + { + "epoch": 0.19, + "learning_rate": 1.2422475238359715e-05, + "loss": 1.6133, + "step": 16338 + }, + { + "epoch": 0.19, + "learning_rate": 1.241784689438119e-05, + "loss": 3.953, + "step": 16340 + }, + { + "epoch": 0.19, + "learning_rate": 1.2413218550402667e-05, + "loss": 0.4873, + "step": 16342 + }, + { + "epoch": 0.19, + "learning_rate": 1.2408590206424143e-05, + "loss": 0.3369, + "step": 16344 + }, + { + "epoch": 0.19, + "learning_rate": 1.2403961862445618e-05, + "loss": 2.4693, + "step": 16346 + }, + { + "epoch": 0.19, + "learning_rate": 1.2399333518467094e-05, + "loss": 3.9227, + "step": 16348 + }, + { + "epoch": 0.19, + "learning_rate": 1.2394705174488569e-05, + "loss": 2.6552, + "step": 16350 + }, + { + "epoch": 0.19, + "learning_rate": 1.2390076830510044e-05, + "loss": 3.5149, + "step": 16352 + }, + { + "epoch": 0.19, + "learning_rate": 1.238544848653152e-05, + "loss": 4.2598, + "step": 16354 + }, + { + "epoch": 0.19, + "learning_rate": 1.2380820142552995e-05, + "loss": 0.0009, + "step": 16356 + }, + { + "epoch": 0.19, + "learning_rate": 1.237619179857447e-05, + "loss": 0.0039, + "step": 16358 + }, + { + "epoch": 0.19, + "learning_rate": 1.2371563454595946e-05, + "loss": 2.0261, + "step": 16360 + }, + { + "epoch": 0.19, + "learning_rate": 1.2366935110617422e-05, + "loss": 4.1267, + "step": 16362 + }, + { + "epoch": 0.19, + "learning_rate": 1.2362306766638897e-05, + "loss": 0.93, + "step": 16364 + }, + { + "epoch": 0.19, + "learning_rate": 1.2357678422660372e-05, + "loss": 2.5928, + "step": 16366 + }, + { + "epoch": 0.19, + "learning_rate": 1.2353050078681848e-05, + "loss": 0.6581, + "step": 16368 + }, + { + "epoch": 0.19, + "learning_rate": 1.2348421734703323e-05, + "loss": 0.3768, + "step": 16370 + }, + { + "epoch": 0.19, + "learning_rate": 1.2343793390724799e-05, + "loss": 2.1599, + "step": 16372 + }, + { + "epoch": 0.19, + "learning_rate": 1.2339165046746274e-05, + "loss": 1.7589, + "step": 16374 + }, + { + "epoch": 0.19, + "learning_rate": 1.233453670276775e-05, + "loss": 0.4627, + "step": 16376 + }, + { + "epoch": 0.19, + "learning_rate": 1.2329908358789227e-05, + "loss": 0.0032, + "step": 16378 + }, + { + "epoch": 0.19, + "learning_rate": 1.2325280014810702e-05, + "loss": 0.2507, + "step": 16380 + }, + { + "epoch": 0.19, + "learning_rate": 1.2320651670832178e-05, + "loss": 0.5998, + "step": 16382 + }, + { + "epoch": 0.19, + "learning_rate": 1.2316023326853653e-05, + "loss": 0.0202, + "step": 16384 + }, + { + "epoch": 0.19, + "learning_rate": 1.2311394982875128e-05, + "loss": 0.8916, + "step": 16386 + }, + { + "epoch": 0.19, + "learning_rate": 1.2306766638896604e-05, + "loss": 3.5778, + "step": 16388 + }, + { + "epoch": 0.19, + "learning_rate": 1.230213829491808e-05, + "loss": 3.7081, + "step": 16390 + }, + { + "epoch": 0.19, + "learning_rate": 1.2297509950939555e-05, + "loss": 3.846, + "step": 16392 + }, + { + "epoch": 0.19, + "learning_rate": 1.229288160696103e-05, + "loss": 0.3014, + "step": 16394 + }, + { + "epoch": 0.19, + "learning_rate": 1.2288253262982506e-05, + "loss": 0.0888, + "step": 16396 + }, + { + "epoch": 0.19, + "learning_rate": 1.2283624919003981e-05, + "loss": 0.0003, + "step": 16398 + }, + { + "epoch": 0.19, + "learning_rate": 1.2278996575025456e-05, + "loss": 0.0392, + "step": 16400 + }, + { + "epoch": 0.19, + "learning_rate": 1.2274368231046932e-05, + "loss": 2.896, + "step": 16402 + }, + { + "epoch": 0.19, + "learning_rate": 1.2269739887068407e-05, + "loss": 0.4299, + "step": 16404 + }, + { + "epoch": 0.19, + "learning_rate": 1.2265111543089883e-05, + "loss": 0.1854, + "step": 16406 + }, + { + "epoch": 0.19, + "learning_rate": 1.2260483199111358e-05, + "loss": 1.6227, + "step": 16408 + }, + { + "epoch": 0.19, + "learning_rate": 1.2255854855132833e-05, + "loss": 5.167, + "step": 16410 + }, + { + "epoch": 0.19, + "learning_rate": 1.2251226511154309e-05, + "loss": 0.8971, + "step": 16412 + }, + { + "epoch": 0.19, + "learning_rate": 1.2246598167175784e-05, + "loss": 1.3134, + "step": 16414 + }, + { + "epoch": 0.19, + "learning_rate": 1.2241969823197261e-05, + "loss": 3.2339, + "step": 16416 + }, + { + "epoch": 0.19, + "learning_rate": 1.2237341479218737e-05, + "loss": 2.5472, + "step": 16418 + }, + { + "epoch": 0.19, + "learning_rate": 1.2232713135240212e-05, + "loss": 1.8026, + "step": 16420 + }, + { + "epoch": 0.19, + "learning_rate": 1.2228084791261688e-05, + "loss": 2.9392, + "step": 16422 + }, + { + "epoch": 0.19, + "learning_rate": 1.2223456447283163e-05, + "loss": 0.8704, + "step": 16424 + }, + { + "epoch": 0.19, + "learning_rate": 1.2218828103304639e-05, + "loss": 5.0245, + "step": 16426 + }, + { + "epoch": 0.19, + "learning_rate": 1.2214199759326114e-05, + "loss": 0.3228, + "step": 16428 + }, + { + "epoch": 0.19, + "learning_rate": 1.220957141534759e-05, + "loss": 0.2864, + "step": 16430 + }, + { + "epoch": 0.19, + "learning_rate": 1.2204943071369065e-05, + "loss": 2.5672, + "step": 16432 + }, + { + "epoch": 0.19, + "learning_rate": 1.220031472739054e-05, + "loss": 6.8949, + "step": 16434 + }, + { + "epoch": 0.19, + "learning_rate": 1.2195686383412016e-05, + "loss": 0.4981, + "step": 16436 + }, + { + "epoch": 0.19, + "learning_rate": 1.2191058039433491e-05, + "loss": 1.4934, + "step": 16438 + }, + { + "epoch": 0.19, + "learning_rate": 1.2186429695454967e-05, + "loss": 2.1996, + "step": 16440 + }, + { + "epoch": 0.19, + "learning_rate": 1.2181801351476442e-05, + "loss": 0.0487, + "step": 16442 + }, + { + "epoch": 0.19, + "learning_rate": 1.2177173007497917e-05, + "loss": 1.8961, + "step": 16444 + }, + { + "epoch": 0.19, + "learning_rate": 1.2172544663519393e-05, + "loss": 0.0873, + "step": 16446 + }, + { + "epoch": 0.19, + "learning_rate": 1.2167916319540868e-05, + "loss": 2.9106, + "step": 16448 + }, + { + "epoch": 0.19, + "learning_rate": 1.2163287975562344e-05, + "loss": 2.3383, + "step": 16450 + }, + { + "epoch": 0.19, + "learning_rate": 1.215865963158382e-05, + "loss": 0.4736, + "step": 16452 + }, + { + "epoch": 0.19, + "learning_rate": 1.2154031287605296e-05, + "loss": 2.3421, + "step": 16454 + }, + { + "epoch": 0.19, + "learning_rate": 1.2149402943626772e-05, + "loss": 4.6944, + "step": 16456 + }, + { + "epoch": 0.19, + "learning_rate": 1.2144774599648247e-05, + "loss": 5.225, + "step": 16458 + }, + { + "epoch": 0.19, + "learning_rate": 1.2140146255669722e-05, + "loss": 0.7766, + "step": 16460 + }, + { + "epoch": 0.19, + "learning_rate": 1.2135517911691198e-05, + "loss": 0.3341, + "step": 16462 + }, + { + "epoch": 0.19, + "learning_rate": 1.2130889567712673e-05, + "loss": 1.8148, + "step": 16464 + }, + { + "epoch": 0.19, + "learning_rate": 1.2126261223734149e-05, + "loss": 1.7897, + "step": 16466 + }, + { + "epoch": 0.19, + "learning_rate": 1.2121632879755624e-05, + "loss": 0.1112, + "step": 16468 + }, + { + "epoch": 0.19, + "learning_rate": 1.21170045357771e-05, + "loss": 1.1565, + "step": 16470 + }, + { + "epoch": 0.19, + "learning_rate": 1.2112376191798575e-05, + "loss": 1.8882, + "step": 16472 + }, + { + "epoch": 0.19, + "learning_rate": 1.210774784782005e-05, + "loss": 2.0379, + "step": 16474 + }, + { + "epoch": 0.19, + "learning_rate": 1.2103119503841526e-05, + "loss": 1.2443, + "step": 16476 + }, + { + "epoch": 0.19, + "learning_rate": 1.2098491159863001e-05, + "loss": 2.5731, + "step": 16478 + }, + { + "epoch": 0.19, + "learning_rate": 1.2093862815884477e-05, + "loss": 4.5285, + "step": 16480 + }, + { + "epoch": 0.19, + "learning_rate": 1.2089234471905952e-05, + "loss": 0.2478, + "step": 16482 + }, + { + "epoch": 0.19, + "learning_rate": 1.2084606127927428e-05, + "loss": 4.6885, + "step": 16484 + }, + { + "epoch": 0.19, + "learning_rate": 1.2079977783948903e-05, + "loss": 1.5709, + "step": 16486 + }, + { + "epoch": 0.19, + "learning_rate": 1.2075349439970378e-05, + "loss": 1.2734, + "step": 16488 + }, + { + "epoch": 0.19, + "learning_rate": 1.2070721095991856e-05, + "loss": 1.1364, + "step": 16490 + }, + { + "epoch": 0.19, + "learning_rate": 1.2066092752013331e-05, + "loss": 2.8797, + "step": 16492 + }, + { + "epoch": 0.19, + "learning_rate": 1.2061464408034806e-05, + "loss": 5.6982, + "step": 16494 + }, + { + "epoch": 0.19, + "learning_rate": 1.2056836064056282e-05, + "loss": 0.0048, + "step": 16496 + }, + { + "epoch": 0.19, + "learning_rate": 1.2052207720077757e-05, + "loss": 0.0034, + "step": 16498 + }, + { + "epoch": 0.19, + "learning_rate": 1.2047579376099233e-05, + "loss": 4.0809, + "step": 16500 + }, + { + "epoch": 0.19, + "learning_rate": 1.2042951032120708e-05, + "loss": 0.7263, + "step": 16502 + }, + { + "epoch": 0.19, + "learning_rate": 1.2038322688142182e-05, + "loss": 3.4813, + "step": 16504 + }, + { + "epoch": 0.19, + "learning_rate": 1.2033694344163659e-05, + "loss": 0.0033, + "step": 16506 + }, + { + "epoch": 0.19, + "learning_rate": 1.2029066000185134e-05, + "loss": 0.1044, + "step": 16508 + }, + { + "epoch": 0.19, + "learning_rate": 1.202443765620661e-05, + "loss": 0.1088, + "step": 16510 + }, + { + "epoch": 0.19, + "learning_rate": 1.2019809312228085e-05, + "loss": 0.328, + "step": 16512 + }, + { + "epoch": 0.19, + "learning_rate": 1.201518096824956e-05, + "loss": 0.923, + "step": 16514 + }, + { + "epoch": 0.19, + "learning_rate": 1.2010552624271036e-05, + "loss": 0.6846, + "step": 16516 + }, + { + "epoch": 0.19, + "learning_rate": 1.2005924280292511e-05, + "loss": 1.1644, + "step": 16518 + }, + { + "epoch": 0.19, + "learning_rate": 1.2001295936313987e-05, + "loss": 0.7502, + "step": 16520 + }, + { + "epoch": 0.19, + "learning_rate": 1.1996667592335462e-05, + "loss": 1.7316, + "step": 16522 + }, + { + "epoch": 0.19, + "learning_rate": 1.1992039248356938e-05, + "loss": 4.5402, + "step": 16524 + }, + { + "epoch": 0.19, + "learning_rate": 1.1987410904378415e-05, + "loss": 2.7595, + "step": 16526 + }, + { + "epoch": 0.19, + "learning_rate": 1.198278256039989e-05, + "loss": 1.9295, + "step": 16528 + }, + { + "epoch": 0.19, + "learning_rate": 1.1978154216421366e-05, + "loss": 0.8337, + "step": 16530 + }, + { + "epoch": 0.19, + "learning_rate": 1.1973525872442841e-05, + "loss": 1.1085, + "step": 16532 + }, + { + "epoch": 0.19, + "learning_rate": 1.1968897528464317e-05, + "loss": 3.8741, + "step": 16534 + }, + { + "epoch": 0.19, + "learning_rate": 1.1964269184485792e-05, + "loss": 3.8946, + "step": 16536 + }, + { + "epoch": 0.19, + "learning_rate": 1.1959640840507267e-05, + "loss": 0.8686, + "step": 16538 + }, + { + "epoch": 0.19, + "learning_rate": 1.1955012496528743e-05, + "loss": 0.4987, + "step": 16540 + }, + { + "epoch": 0.19, + "learning_rate": 1.1950384152550218e-05, + "loss": 0.1846, + "step": 16542 + }, + { + "epoch": 0.19, + "learning_rate": 1.1945755808571694e-05, + "loss": 0.0581, + "step": 16544 + }, + { + "epoch": 0.19, + "learning_rate": 1.1941127464593169e-05, + "loss": 0.0682, + "step": 16546 + }, + { + "epoch": 0.19, + "learning_rate": 1.1936499120614645e-05, + "loss": 1.3629, + "step": 16548 + }, + { + "epoch": 0.19, + "learning_rate": 1.193187077663612e-05, + "loss": 2.2684, + "step": 16550 + }, + { + "epoch": 0.19, + "learning_rate": 1.1927242432657595e-05, + "loss": 0.1936, + "step": 16552 + }, + { + "epoch": 0.19, + "learning_rate": 1.192261408867907e-05, + "loss": 0.338, + "step": 16554 + }, + { + "epoch": 0.19, + "learning_rate": 1.1917985744700546e-05, + "loss": 1.2696, + "step": 16556 + }, + { + "epoch": 0.19, + "learning_rate": 1.1913357400722022e-05, + "loss": 2.9663, + "step": 16558 + }, + { + "epoch": 0.19, + "learning_rate": 1.1908729056743497e-05, + "loss": 0.5837, + "step": 16560 + }, + { + "epoch": 0.19, + "learning_rate": 1.1904100712764974e-05, + "loss": 3.6227, + "step": 16562 + }, + { + "epoch": 0.19, + "learning_rate": 1.189947236878645e-05, + "loss": 2.5318, + "step": 16564 + }, + { + "epoch": 0.19, + "learning_rate": 1.1894844024807925e-05, + "loss": 5.9708, + "step": 16566 + }, + { + "epoch": 0.19, + "learning_rate": 1.18902156808294e-05, + "loss": 0.0158, + "step": 16568 + }, + { + "epoch": 0.19, + "learning_rate": 1.1885587336850876e-05, + "loss": 0.0574, + "step": 16570 + }, + { + "epoch": 0.19, + "learning_rate": 1.1880958992872351e-05, + "loss": 0.003, + "step": 16572 + }, + { + "epoch": 0.19, + "learning_rate": 1.1876330648893827e-05, + "loss": 4.6307, + "step": 16574 + }, + { + "epoch": 0.19, + "learning_rate": 1.1871702304915302e-05, + "loss": 1.5728, + "step": 16576 + }, + { + "epoch": 0.19, + "learning_rate": 1.1867073960936776e-05, + "loss": 0.3273, + "step": 16578 + }, + { + "epoch": 0.19, + "learning_rate": 1.1862445616958253e-05, + "loss": 4.7165, + "step": 16580 + }, + { + "epoch": 0.19, + "learning_rate": 1.1857817272979728e-05, + "loss": 0.3485, + "step": 16582 + }, + { + "epoch": 0.19, + "learning_rate": 1.1853188929001204e-05, + "loss": 1.0407, + "step": 16584 + }, + { + "epoch": 0.19, + "learning_rate": 1.184856058502268e-05, + "loss": 1.8701, + "step": 16586 + }, + { + "epoch": 0.19, + "learning_rate": 1.1843932241044155e-05, + "loss": 0.0249, + "step": 16588 + }, + { + "epoch": 0.19, + "learning_rate": 1.183930389706563e-05, + "loss": 0.5137, + "step": 16590 + }, + { + "epoch": 0.19, + "learning_rate": 1.1834675553087106e-05, + "loss": 1.0001, + "step": 16592 + }, + { + "epoch": 0.19, + "learning_rate": 1.1830047209108581e-05, + "loss": 0.0262, + "step": 16594 + }, + { + "epoch": 0.19, + "learning_rate": 1.1825418865130056e-05, + "loss": 0.505, + "step": 16596 + }, + { + "epoch": 0.19, + "learning_rate": 1.1820790521151532e-05, + "loss": 1.4374, + "step": 16598 + }, + { + "epoch": 0.19, + "learning_rate": 1.1816162177173009e-05, + "loss": 0.1597, + "step": 16600 + }, + { + "epoch": 0.19, + "learning_rate": 1.1811533833194484e-05, + "loss": 1.433, + "step": 16602 + }, + { + "epoch": 0.19, + "learning_rate": 1.180690548921596e-05, + "loss": 0.0011, + "step": 16604 + }, + { + "epoch": 0.19, + "learning_rate": 1.1802277145237435e-05, + "loss": 4.9954, + "step": 16606 + }, + { + "epoch": 0.19, + "learning_rate": 1.179764880125891e-05, + "loss": 0.0657, + "step": 16608 + }, + { + "epoch": 0.19, + "learning_rate": 1.1793020457280386e-05, + "loss": 0.444, + "step": 16610 + }, + { + "epoch": 0.19, + "learning_rate": 1.1788392113301862e-05, + "loss": 0.1062, + "step": 16612 + }, + { + "epoch": 0.19, + "learning_rate": 1.1783763769323335e-05, + "loss": 0.0892, + "step": 16614 + }, + { + "epoch": 0.19, + "learning_rate": 1.1779135425344812e-05, + "loss": 4.7708, + "step": 16616 + }, + { + "epoch": 0.19, + "learning_rate": 1.1774507081366288e-05, + "loss": 5.361, + "step": 16618 + }, + { + "epoch": 0.19, + "learning_rate": 1.1769878737387763e-05, + "loss": 0.2873, + "step": 16620 + }, + { + "epoch": 0.19, + "learning_rate": 1.1765250393409239e-05, + "loss": 0.9596, + "step": 16622 + }, + { + "epoch": 0.19, + "learning_rate": 1.1760622049430714e-05, + "loss": 0.012, + "step": 16624 + }, + { + "epoch": 0.19, + "learning_rate": 1.175599370545219e-05, + "loss": 0.0078, + "step": 16626 + }, + { + "epoch": 0.19, + "learning_rate": 1.1751365361473665e-05, + "loss": 0.4386, + "step": 16628 + }, + { + "epoch": 0.19, + "learning_rate": 1.174673701749514e-05, + "loss": 4.1576, + "step": 16630 + }, + { + "epoch": 0.19, + "learning_rate": 1.1742108673516616e-05, + "loss": 1.8449, + "step": 16632 + }, + { + "epoch": 0.19, + "learning_rate": 1.1737480329538091e-05, + "loss": 0.5614, + "step": 16634 + }, + { + "epoch": 0.19, + "learning_rate": 1.1732851985559568e-05, + "loss": 5.4648, + "step": 16636 + }, + { + "epoch": 0.19, + "learning_rate": 1.1728223641581044e-05, + "loss": 1.1665, + "step": 16638 + }, + { + "epoch": 0.19, + "learning_rate": 1.172359529760252e-05, + "loss": 0.0337, + "step": 16640 + }, + { + "epoch": 0.19, + "learning_rate": 1.1718966953623995e-05, + "loss": 2.5813, + "step": 16642 + }, + { + "epoch": 0.19, + "learning_rate": 1.171433860964547e-05, + "loss": 1.9368, + "step": 16644 + }, + { + "epoch": 0.19, + "learning_rate": 1.1709710265666945e-05, + "loss": 0.0087, + "step": 16646 + }, + { + "epoch": 0.19, + "learning_rate": 1.1705081921688421e-05, + "loss": 0.7183, + "step": 16648 + }, + { + "epoch": 0.19, + "learning_rate": 1.1700453577709895e-05, + "loss": 7.2425, + "step": 16650 + }, + { + "epoch": 0.19, + "learning_rate": 1.1695825233731372e-05, + "loss": 1.0771, + "step": 16652 + }, + { + "epoch": 0.19, + "learning_rate": 1.1691196889752847e-05, + "loss": 1.4651, + "step": 16654 + }, + { + "epoch": 0.19, + "learning_rate": 1.1686568545774323e-05, + "loss": 2.6842, + "step": 16656 + }, + { + "epoch": 0.19, + "learning_rate": 1.1681940201795798e-05, + "loss": 2.3543, + "step": 16658 + }, + { + "epoch": 0.19, + "learning_rate": 1.1677311857817273e-05, + "loss": 0.2186, + "step": 16660 + }, + { + "epoch": 0.19, + "learning_rate": 1.1672683513838749e-05, + "loss": 0.2574, + "step": 16662 + }, + { + "epoch": 0.19, + "learning_rate": 1.1668055169860224e-05, + "loss": 0.0228, + "step": 16664 + }, + { + "epoch": 0.19, + "learning_rate": 1.16634268258817e-05, + "loss": 1.4801, + "step": 16666 + }, + { + "epoch": 0.19, + "learning_rate": 1.1658798481903175e-05, + "loss": 2.8312, + "step": 16668 + }, + { + "epoch": 0.19, + "learning_rate": 1.165417013792465e-05, + "loss": 2.5087, + "step": 16670 + }, + { + "epoch": 0.19, + "learning_rate": 1.1649541793946128e-05, + "loss": 0.0004, + "step": 16672 + }, + { + "epoch": 0.19, + "learning_rate": 1.1644913449967603e-05, + "loss": 0.6064, + "step": 16674 + }, + { + "epoch": 0.19, + "learning_rate": 1.1640285105989079e-05, + "loss": 2.7847, + "step": 16676 + }, + { + "epoch": 0.19, + "learning_rate": 1.1635656762010554e-05, + "loss": 1.2125, + "step": 16678 + }, + { + "epoch": 0.19, + "learning_rate": 1.163102841803203e-05, + "loss": 0.8196, + "step": 16680 + }, + { + "epoch": 0.19, + "learning_rate": 1.1626400074053505e-05, + "loss": 0.2525, + "step": 16682 + }, + { + "epoch": 0.19, + "learning_rate": 1.162177173007498e-05, + "loss": 0.1103, + "step": 16684 + }, + { + "epoch": 0.19, + "learning_rate": 1.1617143386096454e-05, + "loss": 0.5634, + "step": 16686 + }, + { + "epoch": 0.19, + "learning_rate": 1.161251504211793e-05, + "loss": 0.007, + "step": 16688 + }, + { + "epoch": 0.19, + "learning_rate": 1.1607886698139406e-05, + "loss": 0.7245, + "step": 16690 + }, + { + "epoch": 0.19, + "learning_rate": 1.1603258354160882e-05, + "loss": 3.683, + "step": 16692 + }, + { + "epoch": 0.19, + "learning_rate": 1.1598630010182357e-05, + "loss": 0.0213, + "step": 16694 + }, + { + "epoch": 0.19, + "learning_rate": 1.1594001666203833e-05, + "loss": 0.47, + "step": 16696 + }, + { + "epoch": 0.19, + "learning_rate": 1.1589373322225308e-05, + "loss": 6.8546, + "step": 16698 + }, + { + "epoch": 0.19, + "learning_rate": 1.1584744978246784e-05, + "loss": 2.8546, + "step": 16700 + }, + { + "epoch": 0.19, + "learning_rate": 1.1580116634268259e-05, + "loss": 0.1099, + "step": 16702 + }, + { + "epoch": 0.19, + "learning_rate": 1.1575488290289734e-05, + "loss": 3.5163, + "step": 16704 + }, + { + "epoch": 0.19, + "learning_rate": 1.157085994631121e-05, + "loss": 2.7102, + "step": 16706 + }, + { + "epoch": 0.19, + "learning_rate": 1.1566231602332685e-05, + "loss": 1.1725, + "step": 16708 + }, + { + "epoch": 0.19, + "learning_rate": 1.1561603258354162e-05, + "loss": 1.6796, + "step": 16710 + }, + { + "epoch": 0.19, + "learning_rate": 1.1556974914375638e-05, + "loss": 1.7868, + "step": 16712 + }, + { + "epoch": 0.19, + "learning_rate": 1.1552346570397113e-05, + "loss": 0.6459, + "step": 16714 + }, + { + "epoch": 0.19, + "learning_rate": 1.1547718226418589e-05, + "loss": 0.0272, + "step": 16716 + }, + { + "epoch": 0.19, + "learning_rate": 1.1543089882440064e-05, + "loss": 0.1399, + "step": 16718 + }, + { + "epoch": 0.19, + "learning_rate": 1.153846153846154e-05, + "loss": 3.499, + "step": 16720 + }, + { + "epoch": 0.19, + "learning_rate": 1.1533833194483013e-05, + "loss": 3.0178, + "step": 16722 + }, + { + "epoch": 0.19, + "learning_rate": 1.1529204850504489e-05, + "loss": 0.1781, + "step": 16724 + }, + { + "epoch": 0.19, + "learning_rate": 1.1524576506525966e-05, + "loss": 5.3839, + "step": 16726 + }, + { + "epoch": 0.19, + "learning_rate": 1.1519948162547441e-05, + "loss": 4.1366, + "step": 16728 + }, + { + "epoch": 0.19, + "learning_rate": 1.1515319818568917e-05, + "loss": 0.035, + "step": 16730 + }, + { + "epoch": 0.19, + "learning_rate": 1.1510691474590392e-05, + "loss": 3.4906, + "step": 16732 + }, + { + "epoch": 0.19, + "learning_rate": 1.1506063130611868e-05, + "loss": 0.1058, + "step": 16734 + }, + { + "epoch": 0.19, + "learning_rate": 1.1501434786633343e-05, + "loss": 0.0003, + "step": 16736 + }, + { + "epoch": 0.19, + "learning_rate": 1.1496806442654818e-05, + "loss": 2.1523, + "step": 16738 + }, + { + "epoch": 0.19, + "learning_rate": 1.1492178098676294e-05, + "loss": 5.2351, + "step": 16740 + }, + { + "epoch": 0.19, + "learning_rate": 1.148754975469777e-05, + "loss": 1.0037, + "step": 16742 + }, + { + "epoch": 0.19, + "learning_rate": 1.1482921410719245e-05, + "loss": 0.0064, + "step": 16744 + }, + { + "epoch": 0.19, + "learning_rate": 1.1478293066740722e-05, + "loss": 0.0008, + "step": 16746 + }, + { + "epoch": 0.19, + "learning_rate": 1.1473664722762197e-05, + "loss": 6.8848, + "step": 16748 + }, + { + "epoch": 0.19, + "learning_rate": 1.1469036378783673e-05, + "loss": 1.3045, + "step": 16750 + }, + { + "epoch": 0.19, + "learning_rate": 1.1464408034805148e-05, + "loss": 0.7471, + "step": 16752 + }, + { + "epoch": 0.19, + "learning_rate": 1.1459779690826623e-05, + "loss": 2.5351, + "step": 16754 + }, + { + "epoch": 0.19, + "learning_rate": 1.1455151346848099e-05, + "loss": 0.2926, + "step": 16756 + }, + { + "epoch": 0.19, + "learning_rate": 1.1450523002869573e-05, + "loss": 1.9691, + "step": 16758 + }, + { + "epoch": 0.19, + "learning_rate": 1.1445894658891048e-05, + "loss": 2.5232, + "step": 16760 + }, + { + "epoch": 0.19, + "learning_rate": 1.1441266314912525e-05, + "loss": 2.3937, + "step": 16762 + }, + { + "epoch": 0.19, + "learning_rate": 1.1436637970934e-05, + "loss": 0.4834, + "step": 16764 + }, + { + "epoch": 0.19, + "learning_rate": 1.1432009626955476e-05, + "loss": 3.8064, + "step": 16766 + }, + { + "epoch": 0.19, + "learning_rate": 1.1427381282976951e-05, + "loss": 1.5588, + "step": 16768 + }, + { + "epoch": 0.19, + "learning_rate": 1.1422752938998427e-05, + "loss": 0.835, + "step": 16770 + }, + { + "epoch": 0.19, + "learning_rate": 1.1418124595019902e-05, + "loss": 4.7609, + "step": 16772 + }, + { + "epoch": 0.19, + "learning_rate": 1.1413496251041378e-05, + "loss": 1.2548, + "step": 16774 + }, + { + "epoch": 0.19, + "learning_rate": 1.1408867907062853e-05, + "loss": 0.0192, + "step": 16776 + }, + { + "epoch": 0.19, + "learning_rate": 1.1404239563084329e-05, + "loss": 0.2838, + "step": 16778 + }, + { + "epoch": 0.19, + "learning_rate": 1.1399611219105804e-05, + "loss": 5.571, + "step": 16780 + }, + { + "epoch": 0.19, + "learning_rate": 1.1394982875127281e-05, + "loss": 0.0974, + "step": 16782 + }, + { + "epoch": 0.19, + "learning_rate": 1.1390354531148757e-05, + "loss": 3.1831, + "step": 16784 + }, + { + "epoch": 0.19, + "learning_rate": 1.1385726187170232e-05, + "loss": 4.732, + "step": 16786 + }, + { + "epoch": 0.19, + "learning_rate": 1.1381097843191707e-05, + "loss": 5.1802, + "step": 16788 + }, + { + "epoch": 0.19, + "learning_rate": 1.1376469499213183e-05, + "loss": 1.9624, + "step": 16790 + }, + { + "epoch": 0.19, + "learning_rate": 1.1371841155234658e-05, + "loss": 0.2076, + "step": 16792 + }, + { + "epoch": 0.19, + "learning_rate": 1.1367212811256132e-05, + "loss": 1.2407, + "step": 16794 + }, + { + "epoch": 0.19, + "learning_rate": 1.1362584467277607e-05, + "loss": 4.5882, + "step": 16796 + }, + { + "epoch": 0.19, + "learning_rate": 1.1357956123299083e-05, + "loss": 1.9915, + "step": 16798 + }, + { + "epoch": 0.19, + "learning_rate": 1.135332777932056e-05, + "loss": 0.069, + "step": 16800 + }, + { + "epoch": 0.19, + "learning_rate": 1.1348699435342035e-05, + "loss": 6.3115, + "step": 16802 + }, + { + "epoch": 0.19, + "learning_rate": 1.134407109136351e-05, + "loss": 3.0309, + "step": 16804 + }, + { + "epoch": 0.19, + "learning_rate": 1.1339442747384986e-05, + "loss": 4.2865, + "step": 16806 + }, + { + "epoch": 0.19, + "learning_rate": 1.1334814403406462e-05, + "loss": 1.5716, + "step": 16808 + }, + { + "epoch": 0.19, + "learning_rate": 1.1330186059427937e-05, + "loss": 1.7851, + "step": 16810 + }, + { + "epoch": 0.19, + "learning_rate": 1.1325557715449412e-05, + "loss": 2.1949, + "step": 16812 + }, + { + "epoch": 0.19, + "learning_rate": 1.1320929371470888e-05, + "loss": 3.8254, + "step": 16814 + }, + { + "epoch": 0.19, + "learning_rate": 1.1316301027492363e-05, + "loss": 2.1017, + "step": 16816 + }, + { + "epoch": 0.19, + "learning_rate": 1.1311672683513839e-05, + "loss": 1.881, + "step": 16818 + }, + { + "epoch": 0.19, + "learning_rate": 1.1307044339535316e-05, + "loss": 1.5557, + "step": 16820 + }, + { + "epoch": 0.19, + "learning_rate": 1.1302415995556791e-05, + "loss": 2.0854, + "step": 16822 + }, + { + "epoch": 0.19, + "learning_rate": 1.1297787651578267e-05, + "loss": 0.8488, + "step": 16824 + }, + { + "epoch": 0.19, + "learning_rate": 1.1293159307599742e-05, + "loss": 4.9022, + "step": 16826 + }, + { + "epoch": 0.19, + "learning_rate": 1.1288530963621218e-05, + "loss": 0.7587, + "step": 16828 + }, + { + "epoch": 0.19, + "learning_rate": 1.1283902619642693e-05, + "loss": 2.4087, + "step": 16830 + }, + { + "epoch": 0.19, + "learning_rate": 1.1279274275664167e-05, + "loss": 1.3881, + "step": 16832 + }, + { + "epoch": 0.19, + "learning_rate": 1.1274645931685642e-05, + "loss": 1.7943, + "step": 16834 + }, + { + "epoch": 0.19, + "learning_rate": 1.127001758770712e-05, + "loss": 2.0763, + "step": 16836 + }, + { + "epoch": 0.19, + "learning_rate": 1.1265389243728595e-05, + "loss": 0.1258, + "step": 16838 + }, + { + "epoch": 0.19, + "learning_rate": 1.126076089975007e-05, + "loss": 1.7021, + "step": 16840 + }, + { + "epoch": 0.19, + "learning_rate": 1.1256132555771546e-05, + "loss": 7.253, + "step": 16842 + }, + { + "epoch": 0.19, + "learning_rate": 1.1251504211793021e-05, + "loss": 0.2867, + "step": 16844 + }, + { + "epoch": 0.19, + "learning_rate": 1.1246875867814496e-05, + "loss": 0.3349, + "step": 16846 + }, + { + "epoch": 0.19, + "learning_rate": 1.1242247523835972e-05, + "loss": 1.7549, + "step": 16848 + }, + { + "epoch": 0.19, + "learning_rate": 1.1237619179857447e-05, + "loss": 3.2753, + "step": 16850 + }, + { + "epoch": 0.19, + "learning_rate": 1.1232990835878923e-05, + "loss": 0.6607, + "step": 16852 + }, + { + "epoch": 0.19, + "learning_rate": 1.1228362491900398e-05, + "loss": 1.084, + "step": 16854 + }, + { + "epoch": 0.19, + "learning_rate": 1.1223734147921875e-05, + "loss": 0.413, + "step": 16856 + }, + { + "epoch": 0.19, + "learning_rate": 1.121910580394335e-05, + "loss": 3.0113, + "step": 16858 + }, + { + "epoch": 0.19, + "learning_rate": 1.1214477459964826e-05, + "loss": 0.1664, + "step": 16860 + }, + { + "epoch": 0.19, + "learning_rate": 1.1209849115986301e-05, + "loss": 0.4163, + "step": 16862 + }, + { + "epoch": 0.19, + "learning_rate": 1.1205220772007777e-05, + "loss": 3.4949, + "step": 16864 + }, + { + "epoch": 0.19, + "learning_rate": 1.1200592428029252e-05, + "loss": 0.5388, + "step": 16866 + }, + { + "epoch": 0.19, + "learning_rate": 1.1195964084050726e-05, + "loss": 2.1036, + "step": 16868 + }, + { + "epoch": 0.19, + "learning_rate": 1.1191335740072201e-05, + "loss": 0.6572, + "step": 16870 + }, + { + "epoch": 0.19, + "learning_rate": 1.1186707396093679e-05, + "loss": 0.6285, + "step": 16872 + }, + { + "epoch": 0.19, + "learning_rate": 1.1182079052115154e-05, + "loss": 1.5634, + "step": 16874 + }, + { + "epoch": 0.19, + "learning_rate": 1.117745070813663e-05, + "loss": 3.8342, + "step": 16876 + }, + { + "epoch": 0.19, + "learning_rate": 1.1172822364158105e-05, + "loss": 3.3739, + "step": 16878 + }, + { + "epoch": 0.19, + "learning_rate": 1.116819402017958e-05, + "loss": 0.2247, + "step": 16880 + }, + { + "epoch": 0.19, + "learning_rate": 1.1163565676201056e-05, + "loss": 2.1792, + "step": 16882 + }, + { + "epoch": 0.19, + "learning_rate": 1.1158937332222531e-05, + "loss": 4.7664, + "step": 16884 + }, + { + "epoch": 0.19, + "learning_rate": 1.1154308988244007e-05, + "loss": 0.0346, + "step": 16886 + }, + { + "epoch": 0.19, + "learning_rate": 1.1149680644265482e-05, + "loss": 2.2282, + "step": 16888 + }, + { + "epoch": 0.19, + "learning_rate": 1.1145052300286957e-05, + "loss": 6.1701, + "step": 16890 + }, + { + "epoch": 0.19, + "learning_rate": 1.1140423956308433e-05, + "loss": 1.5605, + "step": 16892 + }, + { + "epoch": 0.19, + "learning_rate": 1.113579561232991e-05, + "loss": 0.2654, + "step": 16894 + }, + { + "epoch": 0.19, + "learning_rate": 1.1131167268351385e-05, + "loss": 2.2673, + "step": 16896 + }, + { + "epoch": 0.19, + "learning_rate": 1.112653892437286e-05, + "loss": 0.0198, + "step": 16898 + }, + { + "epoch": 0.19, + "learning_rate": 1.1121910580394336e-05, + "loss": 3.0247, + "step": 16900 + }, + { + "epoch": 0.19, + "learning_rate": 1.1117282236415812e-05, + "loss": 4.1963, + "step": 16902 + }, + { + "epoch": 0.19, + "learning_rate": 1.1112653892437285e-05, + "loss": 0.443, + "step": 16904 + }, + { + "epoch": 0.19, + "learning_rate": 1.110802554845876e-05, + "loss": 1.8901, + "step": 16906 + }, + { + "epoch": 0.19, + "learning_rate": 1.1103397204480236e-05, + "loss": 0.8102, + "step": 16908 + }, + { + "epoch": 0.19, + "learning_rate": 1.1098768860501713e-05, + "loss": 1.529, + "step": 16910 + }, + { + "epoch": 0.19, + "learning_rate": 1.1094140516523189e-05, + "loss": 0.1012, + "step": 16912 + }, + { + "epoch": 0.19, + "learning_rate": 1.1089512172544664e-05, + "loss": 4.1772, + "step": 16914 + }, + { + "epoch": 0.19, + "learning_rate": 1.108488382856614e-05, + "loss": 0.0005, + "step": 16916 + }, + { + "epoch": 0.19, + "learning_rate": 1.1080255484587615e-05, + "loss": 0.7784, + "step": 16918 + }, + { + "epoch": 0.19, + "learning_rate": 1.107562714060909e-05, + "loss": 0.1371, + "step": 16920 + }, + { + "epoch": 0.19, + "learning_rate": 1.1070998796630566e-05, + "loss": 8.0062, + "step": 16922 + }, + { + "epoch": 0.19, + "learning_rate": 1.1066370452652041e-05, + "loss": 0.2997, + "step": 16924 + }, + { + "epoch": 0.19, + "learning_rate": 1.1061742108673517e-05, + "loss": 2.1511, + "step": 16926 + }, + { + "epoch": 0.19, + "learning_rate": 1.1057113764694992e-05, + "loss": 4.0759, + "step": 16928 + }, + { + "epoch": 0.19, + "learning_rate": 1.105248542071647e-05, + "loss": 2.0988, + "step": 16930 + }, + { + "epoch": 0.2, + "learning_rate": 1.1047857076737945e-05, + "loss": 0.191, + "step": 16932 + }, + { + "epoch": 0.2, + "learning_rate": 1.104322873275942e-05, + "loss": 0.0869, + "step": 16934 + }, + { + "epoch": 0.2, + "learning_rate": 1.1038600388780896e-05, + "loss": 0.3508, + "step": 16936 + }, + { + "epoch": 0.2, + "learning_rate": 1.1033972044802371e-05, + "loss": 1.0143, + "step": 16938 + }, + { + "epoch": 0.2, + "learning_rate": 1.1029343700823845e-05, + "loss": 0.0005, + "step": 16940 + }, + { + "epoch": 0.2, + "learning_rate": 1.102471535684532e-05, + "loss": 6.1118, + "step": 16942 + }, + { + "epoch": 0.2, + "learning_rate": 1.1020087012866796e-05, + "loss": 1.1614, + "step": 16944 + }, + { + "epoch": 0.2, + "learning_rate": 1.1015458668888273e-05, + "loss": 7.184, + "step": 16946 + }, + { + "epoch": 0.2, + "learning_rate": 1.1010830324909748e-05, + "loss": 1.3113, + "step": 16948 + }, + { + "epoch": 0.2, + "learning_rate": 1.1006201980931224e-05, + "loss": 1.1853, + "step": 16950 + }, + { + "epoch": 0.2, + "learning_rate": 1.1001573636952699e-05, + "loss": 2.208, + "step": 16952 + }, + { + "epoch": 0.2, + "learning_rate": 1.0996945292974174e-05, + "loss": 0.1307, + "step": 16954 + }, + { + "epoch": 0.2, + "learning_rate": 1.099231694899565e-05, + "loss": 2.4308, + "step": 16956 + }, + { + "epoch": 0.2, + "learning_rate": 1.0987688605017125e-05, + "loss": 4.2964, + "step": 16958 + }, + { + "epoch": 0.2, + "learning_rate": 1.09830602610386e-05, + "loss": 1.2497, + "step": 16960 + }, + { + "epoch": 0.2, + "learning_rate": 1.0978431917060076e-05, + "loss": 2.1841, + "step": 16962 + }, + { + "epoch": 0.2, + "learning_rate": 1.0973803573081551e-05, + "loss": 0.4181, + "step": 16964 + }, + { + "epoch": 0.2, + "learning_rate": 1.0969175229103029e-05, + "loss": 0.0009, + "step": 16966 + }, + { + "epoch": 0.2, + "learning_rate": 1.0964546885124504e-05, + "loss": 1.4341, + "step": 16968 + }, + { + "epoch": 0.2, + "learning_rate": 1.095991854114598e-05, + "loss": 1.075, + "step": 16970 + }, + { + "epoch": 0.2, + "learning_rate": 1.0955290197167455e-05, + "loss": 0.2607, + "step": 16972 + }, + { + "epoch": 0.2, + "learning_rate": 1.095066185318893e-05, + "loss": 0.537, + "step": 16974 + }, + { + "epoch": 0.2, + "learning_rate": 1.0946033509210404e-05, + "loss": 0.0009, + "step": 16976 + }, + { + "epoch": 0.2, + "learning_rate": 1.094140516523188e-05, + "loss": 0.442, + "step": 16978 + }, + { + "epoch": 0.2, + "learning_rate": 1.0936776821253355e-05, + "loss": 2.023, + "step": 16980 + }, + { + "epoch": 0.2, + "learning_rate": 1.093214847727483e-05, + "loss": 0.8847, + "step": 16982 + }, + { + "epoch": 0.2, + "learning_rate": 1.0927520133296307e-05, + "loss": 0.6252, + "step": 16984 + }, + { + "epoch": 0.2, + "learning_rate": 1.0922891789317783e-05, + "loss": 0.1617, + "step": 16986 + }, + { + "epoch": 0.2, + "learning_rate": 1.0918263445339258e-05, + "loss": 2.3594, + "step": 16988 + }, + { + "epoch": 0.2, + "learning_rate": 1.0913635101360734e-05, + "loss": 0.6537, + "step": 16990 + }, + { + "epoch": 0.2, + "learning_rate": 1.0909006757382209e-05, + "loss": 0.0066, + "step": 16992 + }, + { + "epoch": 0.2, + "learning_rate": 1.0904378413403685e-05, + "loss": 4.1276, + "step": 16994 + }, + { + "epoch": 0.2, + "learning_rate": 1.089975006942516e-05, + "loss": 1.2777, + "step": 16996 + }, + { + "epoch": 0.2, + "learning_rate": 1.0895121725446635e-05, + "loss": 2.524, + "step": 16998 + }, + { + "epoch": 0.2, + "learning_rate": 1.089049338146811e-05, + "loss": 5.4557, + "step": 17000 + }, + { + "epoch": 0.2, + "learning_rate": 1.0885865037489586e-05, + "loss": 5.2187, + "step": 17002 + }, + { + "epoch": 0.2, + "learning_rate": 1.0881236693511063e-05, + "loss": 0.2405, + "step": 17004 + }, + { + "epoch": 0.2, + "learning_rate": 1.0876608349532539e-05, + "loss": 0.1773, + "step": 17006 + }, + { + "epoch": 0.2, + "learning_rate": 1.0871980005554014e-05, + "loss": 0.0012, + "step": 17008 + }, + { + "epoch": 0.2, + "learning_rate": 1.086735166157549e-05, + "loss": 0.0013, + "step": 17010 + }, + { + "epoch": 0.2, + "learning_rate": 1.0862723317596963e-05, + "loss": 3.792, + "step": 17012 + }, + { + "epoch": 0.2, + "learning_rate": 1.0858094973618439e-05, + "loss": 0.4929, + "step": 17014 + }, + { + "epoch": 0.2, + "learning_rate": 1.0853466629639914e-05, + "loss": 0.297, + "step": 17016 + }, + { + "epoch": 0.2, + "learning_rate": 1.084883828566139e-05, + "loss": 4.3301, + "step": 17018 + }, + { + "epoch": 0.2, + "learning_rate": 1.0844209941682867e-05, + "loss": 0.416, + "step": 17020 + }, + { + "epoch": 0.2, + "learning_rate": 1.0839581597704342e-05, + "loss": 4.7001, + "step": 17022 + }, + { + "epoch": 0.2, + "learning_rate": 1.0834953253725818e-05, + "loss": 3.4441, + "step": 17024 + }, + { + "epoch": 0.2, + "learning_rate": 1.0830324909747293e-05, + "loss": 2.8114, + "step": 17026 + }, + { + "epoch": 0.2, + "learning_rate": 1.0825696565768768e-05, + "loss": 1.7623, + "step": 17028 + }, + { + "epoch": 0.2, + "learning_rate": 1.0821068221790244e-05, + "loss": 2.2914, + "step": 17030 + }, + { + "epoch": 0.2, + "learning_rate": 1.081643987781172e-05, + "loss": 0.8645, + "step": 17032 + }, + { + "epoch": 0.2, + "learning_rate": 1.0811811533833195e-05, + "loss": 0.6164, + "step": 17034 + }, + { + "epoch": 0.2, + "learning_rate": 1.080718318985467e-05, + "loss": 0.0023, + "step": 17036 + }, + { + "epoch": 0.2, + "learning_rate": 1.0802554845876146e-05, + "loss": 1.8774, + "step": 17038 + }, + { + "epoch": 0.2, + "learning_rate": 1.0797926501897623e-05, + "loss": 2.6033, + "step": 17040 + }, + { + "epoch": 0.2, + "learning_rate": 1.0793298157919098e-05, + "loss": 1.7116, + "step": 17042 + }, + { + "epoch": 0.2, + "learning_rate": 1.0788669813940574e-05, + "loss": 2.423, + "step": 17044 + }, + { + "epoch": 0.2, + "learning_rate": 1.0784041469962049e-05, + "loss": 0.7875, + "step": 17046 + }, + { + "epoch": 0.2, + "learning_rate": 1.0779413125983523e-05, + "loss": 0.4087, + "step": 17048 + }, + { + "epoch": 0.2, + "learning_rate": 1.0774784782004998e-05, + "loss": 0.6632, + "step": 17050 + }, + { + "epoch": 0.2, + "learning_rate": 1.0770156438026474e-05, + "loss": 1.0605, + "step": 17052 + }, + { + "epoch": 0.2, + "learning_rate": 1.0765528094047949e-05, + "loss": 0.3403, + "step": 17054 + }, + { + "epoch": 0.2, + "learning_rate": 1.0760899750069426e-05, + "loss": 0.8363, + "step": 17056 + }, + { + "epoch": 0.2, + "learning_rate": 1.0756271406090902e-05, + "loss": 0.1571, + "step": 17058 + }, + { + "epoch": 0.2, + "learning_rate": 1.0751643062112377e-05, + "loss": 0.0029, + "step": 17060 + }, + { + "epoch": 0.2, + "learning_rate": 1.0747014718133852e-05, + "loss": 1.2687, + "step": 17062 + }, + { + "epoch": 0.2, + "learning_rate": 1.0742386374155328e-05, + "loss": 0.0008, + "step": 17064 + }, + { + "epoch": 0.2, + "learning_rate": 1.0737758030176803e-05, + "loss": 1.9382, + "step": 17066 + }, + { + "epoch": 0.2, + "learning_rate": 1.0733129686198279e-05, + "loss": 2.4272, + "step": 17068 + }, + { + "epoch": 0.2, + "learning_rate": 1.0728501342219754e-05, + "loss": 2.7795, + "step": 17070 + }, + { + "epoch": 0.2, + "learning_rate": 1.072387299824123e-05, + "loss": 3.1202, + "step": 17072 + }, + { + "epoch": 0.2, + "learning_rate": 1.0719244654262705e-05, + "loss": 2.2384, + "step": 17074 + }, + { + "epoch": 0.2, + "learning_rate": 1.0714616310284182e-05, + "loss": 0.0015, + "step": 17076 + }, + { + "epoch": 0.2, + "learning_rate": 1.0709987966305657e-05, + "loss": 0.0648, + "step": 17078 + }, + { + "epoch": 0.2, + "learning_rate": 1.0705359622327133e-05, + "loss": 0.1727, + "step": 17080 + }, + { + "epoch": 0.2, + "learning_rate": 1.0700731278348608e-05, + "loss": 2.861, + "step": 17082 + }, + { + "epoch": 0.2, + "learning_rate": 1.0696102934370082e-05, + "loss": 0.4665, + "step": 17084 + }, + { + "epoch": 0.2, + "learning_rate": 1.0691474590391557e-05, + "loss": 2.7212, + "step": 17086 + }, + { + "epoch": 0.2, + "learning_rate": 1.0686846246413033e-05, + "loss": 0.5035, + "step": 17088 + }, + { + "epoch": 0.2, + "learning_rate": 1.0682217902434508e-05, + "loss": 0.0019, + "step": 17090 + }, + { + "epoch": 0.2, + "learning_rate": 1.0677589558455984e-05, + "loss": 1.9052, + "step": 17092 + }, + { + "epoch": 0.2, + "learning_rate": 1.0672961214477461e-05, + "loss": 0.1834, + "step": 17094 + }, + { + "epoch": 0.2, + "learning_rate": 1.0668332870498936e-05, + "loss": 0.0013, + "step": 17096 + }, + { + "epoch": 0.2, + "learning_rate": 1.0663704526520412e-05, + "loss": 0.4291, + "step": 17098 + }, + { + "epoch": 0.2, + "learning_rate": 1.0659076182541887e-05, + "loss": 2.5832, + "step": 17100 + }, + { + "epoch": 0.2, + "learning_rate": 1.0654447838563363e-05, + "loss": 0.0298, + "step": 17102 + }, + { + "epoch": 0.2, + "learning_rate": 1.0649819494584838e-05, + "loss": 0.0005, + "step": 17104 + }, + { + "epoch": 0.2, + "learning_rate": 1.0645191150606313e-05, + "loss": 4.1944, + "step": 17106 + }, + { + "epoch": 0.2, + "learning_rate": 1.0640562806627789e-05, + "loss": 0.1975, + "step": 17108 + }, + { + "epoch": 0.2, + "learning_rate": 1.0635934462649264e-05, + "loss": 0.1438, + "step": 17110 + }, + { + "epoch": 0.2, + "learning_rate": 1.063130611867074e-05, + "loss": 2.902, + "step": 17112 + }, + { + "epoch": 0.2, + "learning_rate": 1.0626677774692217e-05, + "loss": 1.1331, + "step": 17114 + }, + { + "epoch": 0.2, + "learning_rate": 1.0622049430713692e-05, + "loss": 1.2102, + "step": 17116 + }, + { + "epoch": 0.2, + "learning_rate": 1.0617421086735168e-05, + "loss": 0.0052, + "step": 17118 + }, + { + "epoch": 0.2, + "learning_rate": 1.0612792742756643e-05, + "loss": 2.6528, + "step": 17120 + }, + { + "epoch": 0.2, + "learning_rate": 1.0608164398778117e-05, + "loss": 2.3246, + "step": 17122 + }, + { + "epoch": 0.2, + "learning_rate": 1.0603536054799592e-05, + "loss": 2.2081, + "step": 17124 + }, + { + "epoch": 0.2, + "learning_rate": 1.0598907710821068e-05, + "loss": 1.8387, + "step": 17126 + }, + { + "epoch": 0.2, + "learning_rate": 1.0594279366842543e-05, + "loss": 0.0257, + "step": 17128 + }, + { + "epoch": 0.2, + "learning_rate": 1.058965102286402e-05, + "loss": 1.2738, + "step": 17130 + }, + { + "epoch": 0.2, + "learning_rate": 1.0585022678885496e-05, + "loss": 0.3273, + "step": 17132 + }, + { + "epoch": 0.2, + "learning_rate": 1.0580394334906971e-05, + "loss": 1.6029, + "step": 17134 + }, + { + "epoch": 0.2, + "learning_rate": 1.0575765990928446e-05, + "loss": 2.624, + "step": 17136 + }, + { + "epoch": 0.2, + "learning_rate": 1.0571137646949922e-05, + "loss": 3.7664, + "step": 17138 + }, + { + "epoch": 0.2, + "learning_rate": 1.0566509302971397e-05, + "loss": 0.0021, + "step": 17140 + }, + { + "epoch": 0.2, + "learning_rate": 1.0561880958992873e-05, + "loss": 1.2749, + "step": 17142 + }, + { + "epoch": 0.2, + "learning_rate": 1.0557252615014348e-05, + "loss": 0.7057, + "step": 17144 + }, + { + "epoch": 0.2, + "learning_rate": 1.0552624271035824e-05, + "loss": 0.5493, + "step": 17146 + }, + { + "epoch": 0.2, + "learning_rate": 1.0547995927057299e-05, + "loss": 0.2028, + "step": 17148 + }, + { + "epoch": 0.2, + "learning_rate": 1.0543367583078776e-05, + "loss": 0.0337, + "step": 17150 + }, + { + "epoch": 0.2, + "learning_rate": 1.0538739239100252e-05, + "loss": 4.1301, + "step": 17152 + }, + { + "epoch": 0.2, + "learning_rate": 1.0534110895121727e-05, + "loss": 2.1113, + "step": 17154 + }, + { + "epoch": 0.2, + "learning_rate": 1.0529482551143202e-05, + "loss": 3.1187, + "step": 17156 + }, + { + "epoch": 0.2, + "learning_rate": 1.0524854207164676e-05, + "loss": 2.9963, + "step": 17158 + }, + { + "epoch": 0.2, + "learning_rate": 1.0520225863186152e-05, + "loss": 8.4848, + "step": 17160 + }, + { + "epoch": 0.2, + "learning_rate": 1.0515597519207627e-05, + "loss": 0.0461, + "step": 17162 + }, + { + "epoch": 0.2, + "learning_rate": 1.0510969175229102e-05, + "loss": 0.0011, + "step": 17164 + }, + { + "epoch": 0.2, + "learning_rate": 1.050634083125058e-05, + "loss": 1.894, + "step": 17166 + }, + { + "epoch": 0.2, + "learning_rate": 1.0501712487272055e-05, + "loss": 0.0844, + "step": 17168 + }, + { + "epoch": 0.2, + "learning_rate": 1.049708414329353e-05, + "loss": 1.6824, + "step": 17170 + }, + { + "epoch": 0.2, + "learning_rate": 1.0492455799315006e-05, + "loss": 0.7423, + "step": 17172 + }, + { + "epoch": 0.2, + "learning_rate": 1.0487827455336481e-05, + "loss": 0.0345, + "step": 17174 + }, + { + "epoch": 0.2, + "learning_rate": 1.0483199111357957e-05, + "loss": 0.4194, + "step": 17176 + }, + { + "epoch": 0.2, + "learning_rate": 1.0478570767379432e-05, + "loss": 0.5926, + "step": 17178 + }, + { + "epoch": 0.2, + "learning_rate": 1.0473942423400908e-05, + "loss": 0.4157, + "step": 17180 + }, + { + "epoch": 0.2, + "learning_rate": 1.0469314079422383e-05, + "loss": 1.061, + "step": 17182 + }, + { + "epoch": 0.2, + "learning_rate": 1.0464685735443858e-05, + "loss": 4.5157, + "step": 17184 + }, + { + "epoch": 0.2, + "learning_rate": 1.0460057391465335e-05, + "loss": 4.4674, + "step": 17186 + }, + { + "epoch": 0.2, + "learning_rate": 1.0455429047486811e-05, + "loss": 0.1391, + "step": 17188 + }, + { + "epoch": 0.2, + "learning_rate": 1.0450800703508286e-05, + "loss": 4.2405, + "step": 17190 + }, + { + "epoch": 0.2, + "learning_rate": 1.0446172359529762e-05, + "loss": 2.2707, + "step": 17192 + }, + { + "epoch": 0.2, + "learning_rate": 1.0441544015551235e-05, + "loss": 0.8526, + "step": 17194 + }, + { + "epoch": 0.2, + "learning_rate": 1.0436915671572711e-05, + "loss": 0.9887, + "step": 17196 + }, + { + "epoch": 0.2, + "learning_rate": 1.0432287327594186e-05, + "loss": 0.1156, + "step": 17198 + }, + { + "epoch": 0.2, + "learning_rate": 1.0427658983615662e-05, + "loss": 1.1315, + "step": 17200 + }, + { + "epoch": 0.2, + "learning_rate": 1.0423030639637137e-05, + "loss": 5.5751, + "step": 17202 + }, + { + "epoch": 0.2, + "learning_rate": 1.0418402295658614e-05, + "loss": 0.9999, + "step": 17204 + }, + { + "epoch": 0.2, + "learning_rate": 1.041377395168009e-05, + "loss": 4.0054, + "step": 17206 + }, + { + "epoch": 0.2, + "learning_rate": 1.0409145607701565e-05, + "loss": 1.5272, + "step": 17208 + }, + { + "epoch": 0.2, + "learning_rate": 1.040451726372304e-05, + "loss": 0.004, + "step": 17210 + }, + { + "epoch": 0.2, + "learning_rate": 1.0399888919744516e-05, + "loss": 4.2545, + "step": 17212 + }, + { + "epoch": 0.2, + "learning_rate": 1.0395260575765991e-05, + "loss": 1.2267, + "step": 17214 + }, + { + "epoch": 0.2, + "learning_rate": 1.0390632231787467e-05, + "loss": 0.0027, + "step": 17216 + }, + { + "epoch": 0.2, + "learning_rate": 1.0386003887808942e-05, + "loss": 2.9734, + "step": 17218 + }, + { + "epoch": 0.2, + "learning_rate": 1.0381375543830418e-05, + "loss": 4.8909, + "step": 17220 + }, + { + "epoch": 0.2, + "learning_rate": 1.0376747199851893e-05, + "loss": 3.7966, + "step": 17222 + }, + { + "epoch": 0.2, + "learning_rate": 1.037211885587337e-05, + "loss": 0.9652, + "step": 17224 + }, + { + "epoch": 0.2, + "learning_rate": 1.0367490511894846e-05, + "loss": 0.0185, + "step": 17226 + }, + { + "epoch": 0.2, + "learning_rate": 1.0362862167916321e-05, + "loss": 1.3028, + "step": 17228 + }, + { + "epoch": 0.2, + "learning_rate": 1.0358233823937795e-05, + "loss": 0.0013, + "step": 17230 + }, + { + "epoch": 0.2, + "learning_rate": 1.035360547995927e-05, + "loss": 0.0331, + "step": 17232 + }, + { + "epoch": 0.2, + "learning_rate": 1.0348977135980746e-05, + "loss": 1.0373, + "step": 17234 + }, + { + "epoch": 0.2, + "learning_rate": 1.0344348792002221e-05, + "loss": 1.5519, + "step": 17236 + }, + { + "epoch": 0.2, + "learning_rate": 1.0339720448023697e-05, + "loss": 0.1021, + "step": 17238 + }, + { + "epoch": 0.2, + "learning_rate": 1.0335092104045174e-05, + "loss": 2.202, + "step": 17240 + }, + { + "epoch": 0.2, + "learning_rate": 1.0330463760066649e-05, + "loss": 3.0737, + "step": 17242 + }, + { + "epoch": 0.2, + "learning_rate": 1.0325835416088124e-05, + "loss": 2.0077, + "step": 17244 + }, + { + "epoch": 0.2, + "learning_rate": 1.03212070721096e-05, + "loss": 1.53, + "step": 17246 + }, + { + "epoch": 0.2, + "learning_rate": 1.0316578728131075e-05, + "loss": 0.0721, + "step": 17248 + }, + { + "epoch": 0.2, + "learning_rate": 1.031195038415255e-05, + "loss": 1.406, + "step": 17250 + }, + { + "epoch": 0.2, + "learning_rate": 1.0307322040174026e-05, + "loss": 0.0992, + "step": 17252 + }, + { + "epoch": 0.2, + "learning_rate": 1.0302693696195502e-05, + "loss": 2.1148, + "step": 17254 + }, + { + "epoch": 0.2, + "learning_rate": 1.0298065352216977e-05, + "loss": 2.6455, + "step": 17256 + }, + { + "epoch": 0.2, + "learning_rate": 1.0293437008238452e-05, + "loss": 2.1695, + "step": 17258 + }, + { + "epoch": 0.2, + "learning_rate": 1.028880866425993e-05, + "loss": 0.1518, + "step": 17260 + }, + { + "epoch": 0.2, + "learning_rate": 1.0284180320281405e-05, + "loss": 0.7541, + "step": 17262 + }, + { + "epoch": 0.2, + "learning_rate": 1.027955197630288e-05, + "loss": 0.2293, + "step": 17264 + }, + { + "epoch": 0.2, + "learning_rate": 1.0274923632324354e-05, + "loss": 2.4381, + "step": 17266 + }, + { + "epoch": 0.2, + "learning_rate": 1.027029528834583e-05, + "loss": 0.0022, + "step": 17268 + }, + { + "epoch": 0.2, + "learning_rate": 1.0265666944367305e-05, + "loss": 1.4766, + "step": 17270 + }, + { + "epoch": 0.2, + "learning_rate": 1.026103860038878e-05, + "loss": 2.0336, + "step": 17272 + }, + { + "epoch": 0.2, + "learning_rate": 1.0256410256410256e-05, + "loss": 0.8232, + "step": 17274 + }, + { + "epoch": 0.2, + "learning_rate": 1.0251781912431733e-05, + "loss": 0.0052, + "step": 17276 + }, + { + "epoch": 0.2, + "learning_rate": 1.0247153568453208e-05, + "loss": 4.247, + "step": 17278 + }, + { + "epoch": 0.2, + "learning_rate": 1.0242525224474684e-05, + "loss": 7.6466, + "step": 17280 + }, + { + "epoch": 0.2, + "learning_rate": 1.023789688049616e-05, + "loss": 0.3169, + "step": 17282 + }, + { + "epoch": 0.2, + "learning_rate": 1.0233268536517635e-05, + "loss": 2.7099, + "step": 17284 + }, + { + "epoch": 0.2, + "learning_rate": 1.022864019253911e-05, + "loss": 1.4207, + "step": 17286 + }, + { + "epoch": 0.2, + "learning_rate": 1.0224011848560586e-05, + "loss": 1.4546, + "step": 17288 + }, + { + "epoch": 0.2, + "learning_rate": 1.0219383504582061e-05, + "loss": 1.584, + "step": 17290 + }, + { + "epoch": 0.2, + "learning_rate": 1.0214755160603536e-05, + "loss": 7.1214, + "step": 17292 + }, + { + "epoch": 0.2, + "learning_rate": 1.0210126816625012e-05, + "loss": 0.0042, + "step": 17294 + }, + { + "epoch": 0.2, + "learning_rate": 1.0205498472646487e-05, + "loss": 4.1859, + "step": 17296 + }, + { + "epoch": 0.2, + "learning_rate": 1.0200870128667964e-05, + "loss": 0.0013, + "step": 17298 + }, + { + "epoch": 0.2, + "learning_rate": 1.019624178468944e-05, + "loss": 0.0618, + "step": 17300 + }, + { + "epoch": 0.2, + "learning_rate": 1.0191613440710913e-05, + "loss": 0.0316, + "step": 17302 + }, + { + "epoch": 0.2, + "learning_rate": 1.0186985096732389e-05, + "loss": 1.8975, + "step": 17304 + }, + { + "epoch": 0.2, + "learning_rate": 1.0182356752753864e-05, + "loss": 1.4024, + "step": 17306 + }, + { + "epoch": 0.2, + "learning_rate": 1.017772840877534e-05, + "loss": 3.5665, + "step": 17308 + }, + { + "epoch": 0.2, + "learning_rate": 1.0173100064796815e-05, + "loss": 2.2342, + "step": 17310 + }, + { + "epoch": 0.2, + "learning_rate": 1.016847172081829e-05, + "loss": 3.6465, + "step": 17312 + }, + { + "epoch": 0.2, + "learning_rate": 1.0163843376839768e-05, + "loss": 0.5749, + "step": 17314 + }, + { + "epoch": 0.2, + "learning_rate": 1.0159215032861243e-05, + "loss": 2.5442, + "step": 17316 + }, + { + "epoch": 0.2, + "learning_rate": 1.0154586688882719e-05, + "loss": 2.3336, + "step": 17318 + }, + { + "epoch": 0.2, + "learning_rate": 1.0149958344904194e-05, + "loss": 0.3227, + "step": 17320 + }, + { + "epoch": 0.2, + "learning_rate": 1.014533000092567e-05, + "loss": 3.0819, + "step": 17322 + }, + { + "epoch": 0.2, + "learning_rate": 1.0140701656947145e-05, + "loss": 3.42, + "step": 17324 + }, + { + "epoch": 0.2, + "learning_rate": 1.013607331296862e-05, + "loss": 1.9748, + "step": 17326 + }, + { + "epoch": 0.2, + "learning_rate": 1.0131444968990096e-05, + "loss": 0.2678, + "step": 17328 + }, + { + "epoch": 0.2, + "learning_rate": 1.0126816625011571e-05, + "loss": 7.7803, + "step": 17330 + }, + { + "epoch": 0.2, + "learning_rate": 1.0122188281033047e-05, + "loss": 2.4892, + "step": 17332 + }, + { + "epoch": 0.2, + "learning_rate": 1.0117559937054524e-05, + "loss": 0.0031, + "step": 17334 + }, + { + "epoch": 0.2, + "learning_rate": 1.0112931593075999e-05, + "loss": 1.671, + "step": 17336 + }, + { + "epoch": 0.2, + "learning_rate": 1.0108303249097473e-05, + "loss": 0.6398, + "step": 17338 + }, + { + "epoch": 0.2, + "learning_rate": 1.0103674905118948e-05, + "loss": 5.6892, + "step": 17340 + }, + { + "epoch": 0.2, + "learning_rate": 1.0099046561140424e-05, + "loss": 2.3067, + "step": 17342 + }, + { + "epoch": 0.2, + "learning_rate": 1.0094418217161899e-05, + "loss": 1.477, + "step": 17344 + }, + { + "epoch": 0.2, + "learning_rate": 1.0089789873183375e-05, + "loss": 0.121, + "step": 17346 + }, + { + "epoch": 0.2, + "learning_rate": 1.008516152920485e-05, + "loss": 2.1911, + "step": 17348 + }, + { + "epoch": 0.2, + "learning_rate": 1.0080533185226327e-05, + "loss": 4.3986, + "step": 17350 + }, + { + "epoch": 0.2, + "learning_rate": 1.0075904841247802e-05, + "loss": 1.3002, + "step": 17352 + }, + { + "epoch": 0.2, + "learning_rate": 1.0071276497269278e-05, + "loss": 0.7485, + "step": 17354 + }, + { + "epoch": 0.2, + "learning_rate": 1.0066648153290753e-05, + "loss": 2.5868, + "step": 17356 + }, + { + "epoch": 0.2, + "learning_rate": 1.0062019809312229e-05, + "loss": 0.0467, + "step": 17358 + }, + { + "epoch": 0.2, + "learning_rate": 1.0057391465333704e-05, + "loss": 0.5214, + "step": 17360 + }, + { + "epoch": 0.2, + "learning_rate": 1.005276312135518e-05, + "loss": 1.5878, + "step": 17362 + }, + { + "epoch": 0.2, + "learning_rate": 1.0048134777376655e-05, + "loss": 0.1135, + "step": 17364 + }, + { + "epoch": 0.2, + "learning_rate": 1.004350643339813e-05, + "loss": 1.3467, + "step": 17366 + }, + { + "epoch": 0.2, + "learning_rate": 1.0038878089419606e-05, + "loss": 3.5664, + "step": 17368 + }, + { + "epoch": 0.2, + "learning_rate": 1.0034249745441083e-05, + "loss": 6.4193, + "step": 17370 + }, + { + "epoch": 0.2, + "learning_rate": 1.0029621401462558e-05, + "loss": 3.404, + "step": 17372 + }, + { + "epoch": 0.2, + "learning_rate": 1.0024993057484032e-05, + "loss": 4.2661, + "step": 17374 + }, + { + "epoch": 0.2, + "learning_rate": 1.0020364713505508e-05, + "loss": 2.3746, + "step": 17376 + }, + { + "epoch": 0.2, + "learning_rate": 1.0015736369526983e-05, + "loss": 0.6922, + "step": 17378 + }, + { + "epoch": 0.2, + "learning_rate": 1.0011108025548458e-05, + "loss": 1.6739, + "step": 17380 + }, + { + "epoch": 0.2, + "learning_rate": 1.0006479681569934e-05, + "loss": 0.0015, + "step": 17382 + }, + { + "epoch": 0.2, + "learning_rate": 1.000185133759141e-05, + "loss": 0.7582, + "step": 17384 + }, + { + "epoch": 0.2, + "learning_rate": 9.997222993612885e-06, + "loss": 1.5105, + "step": 17386 + }, + { + "epoch": 0.2, + "learning_rate": 9.992594649634362e-06, + "loss": 2.7609, + "step": 17388 + }, + { + "epoch": 0.2, + "learning_rate": 9.987966305655837e-06, + "loss": 0.9346, + "step": 17390 + }, + { + "epoch": 0.2, + "learning_rate": 9.983337961677313e-06, + "loss": 1.5944, + "step": 17392 + }, + { + "epoch": 0.2, + "learning_rate": 9.978709617698788e-06, + "loss": 1.5516, + "step": 17394 + }, + { + "epoch": 0.2, + "learning_rate": 9.974081273720264e-06, + "loss": 1.1256, + "step": 17396 + }, + { + "epoch": 0.2, + "learning_rate": 9.969452929741739e-06, + "loss": 0.2813, + "step": 17398 + }, + { + "epoch": 0.2, + "learning_rate": 9.964824585763214e-06, + "loss": 3.4472, + "step": 17400 + }, + { + "epoch": 0.2, + "learning_rate": 9.96019624178469e-06, + "loss": 0.0025, + "step": 17402 + }, + { + "epoch": 0.2, + "learning_rate": 9.955567897806165e-06, + "loss": 0.6148, + "step": 17404 + }, + { + "epoch": 0.2, + "learning_rate": 9.95093955382764e-06, + "loss": 2.9507, + "step": 17406 + }, + { + "epoch": 0.2, + "learning_rate": 9.946311209849118e-06, + "loss": 0.0014, + "step": 17408 + }, + { + "epoch": 0.2, + "learning_rate": 9.941682865870593e-06, + "loss": 0.7553, + "step": 17410 + }, + { + "epoch": 0.2, + "learning_rate": 9.937054521892067e-06, + "loss": 1.3621, + "step": 17412 + }, + { + "epoch": 0.2, + "learning_rate": 9.932426177913542e-06, + "loss": 3.5511, + "step": 17414 + }, + { + "epoch": 0.2, + "learning_rate": 9.927797833935018e-06, + "loss": 0.007, + "step": 17416 + }, + { + "epoch": 0.2, + "learning_rate": 9.923169489956493e-06, + "loss": 3.5321, + "step": 17418 + }, + { + "epoch": 0.2, + "learning_rate": 9.918541145977969e-06, + "loss": 2.6825, + "step": 17420 + }, + { + "epoch": 0.2, + "learning_rate": 9.913912801999444e-06, + "loss": 0.4721, + "step": 17422 + }, + { + "epoch": 0.2, + "learning_rate": 9.909284458020921e-06, + "loss": 0.0037, + "step": 17424 + }, + { + "epoch": 0.2, + "learning_rate": 9.904656114042397e-06, + "loss": 1.0527, + "step": 17426 + }, + { + "epoch": 0.2, + "learning_rate": 9.900027770063872e-06, + "loss": 4.6482, + "step": 17428 + }, + { + "epoch": 0.2, + "learning_rate": 9.895399426085347e-06, + "loss": 0.0058, + "step": 17430 + }, + { + "epoch": 0.2, + "learning_rate": 9.890771082106823e-06, + "loss": 0.4187, + "step": 17432 + }, + { + "epoch": 0.2, + "learning_rate": 9.886142738128298e-06, + "loss": 0.0029, + "step": 17434 + }, + { + "epoch": 0.2, + "learning_rate": 9.881514394149774e-06, + "loss": 0.11, + "step": 17436 + }, + { + "epoch": 0.2, + "learning_rate": 9.876886050171249e-06, + "loss": 0.2489, + "step": 17438 + }, + { + "epoch": 0.2, + "learning_rate": 9.872257706192725e-06, + "loss": 2.8421, + "step": 17440 + }, + { + "epoch": 0.2, + "learning_rate": 9.8676293622142e-06, + "loss": 3.3258, + "step": 17442 + }, + { + "epoch": 0.2, + "learning_rate": 9.863001018235677e-06, + "loss": 0.0012, + "step": 17444 + }, + { + "epoch": 0.2, + "learning_rate": 9.858372674257153e-06, + "loss": 1.652, + "step": 17446 + }, + { + "epoch": 0.2, + "learning_rate": 9.853744330278626e-06, + "loss": 1.0631, + "step": 17448 + }, + { + "epoch": 0.2, + "learning_rate": 9.849115986300102e-06, + "loss": 1.1922, + "step": 17450 + }, + { + "epoch": 0.2, + "learning_rate": 9.844487642321577e-06, + "loss": 4.0656, + "step": 17452 + }, + { + "epoch": 0.2, + "learning_rate": 9.839859298343053e-06, + "loss": 4.4561, + "step": 17454 + }, + { + "epoch": 0.2, + "learning_rate": 9.835230954364528e-06, + "loss": 0.7948, + "step": 17456 + }, + { + "epoch": 0.2, + "learning_rate": 9.830602610386003e-06, + "loss": 4.165, + "step": 17458 + }, + { + "epoch": 0.2, + "learning_rate": 9.82597426640748e-06, + "loss": 0.2439, + "step": 17460 + }, + { + "epoch": 0.2, + "learning_rate": 9.821345922428956e-06, + "loss": 0.0006, + "step": 17462 + }, + { + "epoch": 0.2, + "learning_rate": 9.816717578450431e-06, + "loss": 1.4368, + "step": 17464 + }, + { + "epoch": 0.2, + "learning_rate": 9.812089234471907e-06, + "loss": 1.7057, + "step": 17466 + }, + { + "epoch": 0.2, + "learning_rate": 9.807460890493382e-06, + "loss": 4.28, + "step": 17468 + }, + { + "epoch": 0.2, + "learning_rate": 9.802832546514858e-06, + "loss": 0.0003, + "step": 17470 + }, + { + "epoch": 0.2, + "learning_rate": 9.798204202536333e-06, + "loss": 1.3202, + "step": 17472 + }, + { + "epoch": 0.2, + "learning_rate": 9.793575858557808e-06, + "loss": 1.9023, + "step": 17474 + }, + { + "epoch": 0.2, + "learning_rate": 9.788947514579284e-06, + "loss": 0.7103, + "step": 17476 + }, + { + "epoch": 0.2, + "learning_rate": 9.78431917060076e-06, + "loss": 6.2731, + "step": 17478 + }, + { + "epoch": 0.2, + "learning_rate": 9.779690826622236e-06, + "loss": 0.2228, + "step": 17480 + }, + { + "epoch": 0.2, + "learning_rate": 9.775062482643712e-06, + "loss": 2.0402, + "step": 17482 + }, + { + "epoch": 0.2, + "learning_rate": 9.770434138665186e-06, + "loss": 2.1915, + "step": 17484 + }, + { + "epoch": 0.2, + "learning_rate": 9.765805794686661e-06, + "loss": 0.2848, + "step": 17486 + }, + { + "epoch": 0.2, + "learning_rate": 9.761177450708136e-06, + "loss": 5.168, + "step": 17488 + }, + { + "epoch": 0.2, + "learning_rate": 9.756549106729612e-06, + "loss": 2.8916, + "step": 17490 + }, + { + "epoch": 0.2, + "learning_rate": 9.751920762751087e-06, + "loss": 3.355, + "step": 17492 + }, + { + "epoch": 0.2, + "learning_rate": 9.747292418772563e-06, + "loss": 0.1571, + "step": 17494 + }, + { + "epoch": 0.2, + "learning_rate": 9.742664074794038e-06, + "loss": 0.4299, + "step": 17496 + }, + { + "epoch": 0.2, + "learning_rate": 9.738035730815515e-06, + "loss": 1.1369, + "step": 17498 + }, + { + "epoch": 0.2, + "learning_rate": 9.73340738683699e-06, + "loss": 0.341, + "step": 17500 + }, + { + "epoch": 0.2, + "learning_rate": 9.728779042858466e-06, + "loss": 0.3027, + "step": 17502 + }, + { + "epoch": 0.2, + "learning_rate": 9.724150698879942e-06, + "loss": 0.0021, + "step": 17504 + }, + { + "epoch": 0.2, + "learning_rate": 9.719522354901417e-06, + "loss": 0.0442, + "step": 17506 + }, + { + "epoch": 0.2, + "learning_rate": 9.714894010922892e-06, + "loss": 4.8425, + "step": 17508 + }, + { + "epoch": 0.2, + "learning_rate": 9.710265666944368e-06, + "loss": 0.4896, + "step": 17510 + }, + { + "epoch": 0.2, + "learning_rate": 9.705637322965843e-06, + "loss": 3.6537, + "step": 17512 + }, + { + "epoch": 0.2, + "learning_rate": 9.701008978987319e-06, + "loss": 4.4468, + "step": 17514 + }, + { + "epoch": 0.2, + "learning_rate": 9.696380635008794e-06, + "loss": 0.04, + "step": 17516 + }, + { + "epoch": 0.2, + "learning_rate": 9.691752291030271e-06, + "loss": 1.3352, + "step": 17518 + }, + { + "epoch": 0.2, + "learning_rate": 9.687123947051745e-06, + "loss": 2.6653, + "step": 17520 + }, + { + "epoch": 0.2, + "learning_rate": 9.68249560307322e-06, + "loss": 0.1341, + "step": 17522 + }, + { + "epoch": 0.2, + "learning_rate": 9.677867259094696e-06, + "loss": 0.004, + "step": 17524 + }, + { + "epoch": 0.2, + "learning_rate": 9.673238915116171e-06, + "loss": 2.7277, + "step": 17526 + }, + { + "epoch": 0.2, + "learning_rate": 9.668610571137647e-06, + "loss": 0.8272, + "step": 17528 + }, + { + "epoch": 0.2, + "learning_rate": 9.663982227159122e-06, + "loss": 2.0371, + "step": 17530 + }, + { + "epoch": 0.2, + "learning_rate": 9.659353883180597e-06, + "loss": 0.5155, + "step": 17532 + }, + { + "epoch": 0.2, + "learning_rate": 9.654725539202075e-06, + "loss": 2.9593, + "step": 17534 + }, + { + "epoch": 0.2, + "learning_rate": 9.65009719522355e-06, + "loss": 4.2907, + "step": 17536 + }, + { + "epoch": 0.2, + "learning_rate": 9.645468851245025e-06, + "loss": 0.0016, + "step": 17538 + }, + { + "epoch": 0.2, + "learning_rate": 9.640840507266501e-06, + "loss": 5.2405, + "step": 17540 + }, + { + "epoch": 0.2, + "learning_rate": 9.636212163287976e-06, + "loss": 0.4696, + "step": 17542 + }, + { + "epoch": 0.2, + "learning_rate": 9.631583819309452e-06, + "loss": 0.0109, + "step": 17544 + }, + { + "epoch": 0.2, + "learning_rate": 9.626955475330927e-06, + "loss": 2.8925, + "step": 17546 + }, + { + "epoch": 0.2, + "learning_rate": 9.622327131352403e-06, + "loss": 0.9879, + "step": 17548 + }, + { + "epoch": 0.2, + "learning_rate": 9.617698787373878e-06, + "loss": 2.5846, + "step": 17550 + }, + { + "epoch": 0.2, + "learning_rate": 9.613070443395353e-06, + "loss": 2.135, + "step": 17552 + }, + { + "epoch": 0.2, + "learning_rate": 9.60844209941683e-06, + "loss": 4.5932, + "step": 17554 + }, + { + "epoch": 0.2, + "learning_rate": 9.603813755438304e-06, + "loss": 2.2842, + "step": 17556 + }, + { + "epoch": 0.2, + "learning_rate": 9.59918541145978e-06, + "loss": 0.0031, + "step": 17558 + }, + { + "epoch": 0.2, + "learning_rate": 9.594557067481255e-06, + "loss": 0.3737, + "step": 17560 + }, + { + "epoch": 0.2, + "learning_rate": 9.58992872350273e-06, + "loss": 3.5355, + "step": 17562 + }, + { + "epoch": 0.2, + "learning_rate": 9.585300379524206e-06, + "loss": 1.3505, + "step": 17564 + }, + { + "epoch": 0.2, + "learning_rate": 9.580672035545681e-06, + "loss": 2.0247, + "step": 17566 + }, + { + "epoch": 0.2, + "learning_rate": 9.576043691567157e-06, + "loss": 1.9294, + "step": 17568 + }, + { + "epoch": 0.2, + "learning_rate": 9.571415347588634e-06, + "loss": 0.6516, + "step": 17570 + }, + { + "epoch": 0.2, + "learning_rate": 9.56678700361011e-06, + "loss": 4.0319, + "step": 17572 + }, + { + "epoch": 0.2, + "learning_rate": 9.562158659631585e-06, + "loss": 1.9675, + "step": 17574 + }, + { + "epoch": 0.2, + "learning_rate": 9.55753031565306e-06, + "loss": 2.1134, + "step": 17576 + }, + { + "epoch": 0.2, + "learning_rate": 9.552901971674536e-06, + "loss": 6.9561, + "step": 17578 + }, + { + "epoch": 0.2, + "learning_rate": 9.548273627696011e-06, + "loss": 3.1191, + "step": 17580 + }, + { + "epoch": 0.2, + "learning_rate": 9.543645283717486e-06, + "loss": 0.6626, + "step": 17582 + }, + { + "epoch": 0.2, + "learning_rate": 9.539016939738962e-06, + "loss": 1.3087, + "step": 17584 + }, + { + "epoch": 0.2, + "learning_rate": 9.534388595760437e-06, + "loss": 2.3891, + "step": 17586 + }, + { + "epoch": 0.2, + "learning_rate": 9.529760251781913e-06, + "loss": 2.266, + "step": 17588 + }, + { + "epoch": 0.2, + "learning_rate": 9.52513190780339e-06, + "loss": 1.9666, + "step": 17590 + }, + { + "epoch": 0.2, + "learning_rate": 9.520503563824864e-06, + "loss": 0.0058, + "step": 17592 + }, + { + "epoch": 0.2, + "learning_rate": 9.515875219846339e-06, + "loss": 4.5148, + "step": 17594 + }, + { + "epoch": 0.2, + "learning_rate": 9.511246875867814e-06, + "loss": 0.2373, + "step": 17596 + }, + { + "epoch": 0.2, + "learning_rate": 9.50661853188929e-06, + "loss": 2.8274, + "step": 17598 + }, + { + "epoch": 0.2, + "learning_rate": 9.501990187910765e-06, + "loss": 2.6116, + "step": 17600 + }, + { + "epoch": 0.2, + "learning_rate": 9.49736184393224e-06, + "loss": 2.3679, + "step": 17602 + }, + { + "epoch": 0.2, + "learning_rate": 9.492733499953716e-06, + "loss": 1.7609, + "step": 17604 + }, + { + "epoch": 0.2, + "learning_rate": 9.488105155975192e-06, + "loss": 0.6321, + "step": 17606 + }, + { + "epoch": 0.2, + "learning_rate": 9.483476811996669e-06, + "loss": 0.0238, + "step": 17608 + }, + { + "epoch": 0.2, + "learning_rate": 9.478848468018144e-06, + "loss": 2.8545, + "step": 17610 + }, + { + "epoch": 0.2, + "learning_rate": 9.47422012403962e-06, + "loss": 0.3889, + "step": 17612 + }, + { + "epoch": 0.2, + "learning_rate": 9.469591780061095e-06, + "loss": 5.1684, + "step": 17614 + }, + { + "epoch": 0.2, + "learning_rate": 9.46496343608257e-06, + "loss": 0.9859, + "step": 17616 + }, + { + "epoch": 0.2, + "learning_rate": 9.460335092104046e-06, + "loss": 1.3481, + "step": 17618 + }, + { + "epoch": 0.2, + "learning_rate": 9.455706748125521e-06, + "loss": 2.5175, + "step": 17620 + }, + { + "epoch": 0.2, + "learning_rate": 9.451078404146997e-06, + "loss": 0.0036, + "step": 17622 + }, + { + "epoch": 0.2, + "learning_rate": 9.446450060168472e-06, + "loss": 1.8033, + "step": 17624 + }, + { + "epoch": 0.2, + "learning_rate": 9.441821716189948e-06, + "loss": 2.4979, + "step": 17626 + }, + { + "epoch": 0.2, + "learning_rate": 9.437193372211423e-06, + "loss": 0.0055, + "step": 17628 + }, + { + "epoch": 0.2, + "learning_rate": 9.432565028232898e-06, + "loss": 0.017, + "step": 17630 + }, + { + "epoch": 0.2, + "learning_rate": 9.427936684254374e-06, + "loss": 0.6679, + "step": 17632 + }, + { + "epoch": 0.2, + "learning_rate": 9.42330834027585e-06, + "loss": 1.5658, + "step": 17634 + }, + { + "epoch": 0.2, + "learning_rate": 9.418679996297325e-06, + "loss": 0.0829, + "step": 17636 + }, + { + "epoch": 0.2, + "learning_rate": 9.4140516523188e-06, + "loss": 2.1522, + "step": 17638 + }, + { + "epoch": 0.2, + "learning_rate": 9.409423308340275e-06, + "loss": 1.8307, + "step": 17640 + }, + { + "epoch": 0.2, + "learning_rate": 9.404794964361751e-06, + "loss": 6.3148, + "step": 17642 + }, + { + "epoch": 0.2, + "learning_rate": 9.400166620383228e-06, + "loss": 3.6199, + "step": 17644 + }, + { + "epoch": 0.2, + "learning_rate": 9.395538276404703e-06, + "loss": 2.5513, + "step": 17646 + }, + { + "epoch": 0.2, + "learning_rate": 9.390909932426179e-06, + "loss": 0.5538, + "step": 17648 + }, + { + "epoch": 0.2, + "learning_rate": 9.386281588447654e-06, + "loss": 3.1203, + "step": 17650 + }, + { + "epoch": 0.2, + "learning_rate": 9.38165324446913e-06, + "loss": 0.5367, + "step": 17652 + }, + { + "epoch": 0.2, + "learning_rate": 9.377024900490605e-06, + "loss": 0.6674, + "step": 17654 + }, + { + "epoch": 0.2, + "learning_rate": 9.37239655651208e-06, + "loss": 1.3472, + "step": 17656 + }, + { + "epoch": 0.2, + "learning_rate": 9.367768212533556e-06, + "loss": 4.3143, + "step": 17658 + }, + { + "epoch": 0.2, + "learning_rate": 9.363139868555031e-06, + "loss": 0.9241, + "step": 17660 + }, + { + "epoch": 0.2, + "learning_rate": 9.358511524576507e-06, + "loss": 0.876, + "step": 17662 + }, + { + "epoch": 0.2, + "learning_rate": 9.353883180597982e-06, + "loss": 1.5038, + "step": 17664 + }, + { + "epoch": 0.2, + "learning_rate": 9.349254836619458e-06, + "loss": 4.8952, + "step": 17666 + }, + { + "epoch": 0.2, + "learning_rate": 9.344626492640933e-06, + "loss": 1.2906, + "step": 17668 + }, + { + "epoch": 0.2, + "learning_rate": 9.339998148662409e-06, + "loss": 4.0824, + "step": 17670 + }, + { + "epoch": 0.2, + "learning_rate": 9.335369804683884e-06, + "loss": 2.5006, + "step": 17672 + }, + { + "epoch": 0.2, + "learning_rate": 9.33074146070536e-06, + "loss": 0.0025, + "step": 17674 + }, + { + "epoch": 0.2, + "learning_rate": 9.326113116726835e-06, + "loss": 1.7942, + "step": 17676 + }, + { + "epoch": 0.2, + "learning_rate": 9.32148477274831e-06, + "loss": 6.2133, + "step": 17678 + }, + { + "epoch": 0.2, + "learning_rate": 9.316856428769787e-06, + "loss": 0.0132, + "step": 17680 + }, + { + "epoch": 0.2, + "learning_rate": 9.312228084791263e-06, + "loss": 2.2574, + "step": 17682 + }, + { + "epoch": 0.2, + "learning_rate": 9.307599740812738e-06, + "loss": 0.6463, + "step": 17684 + }, + { + "epoch": 0.2, + "learning_rate": 9.302971396834214e-06, + "loss": 1.0197, + "step": 17686 + }, + { + "epoch": 0.2, + "learning_rate": 9.298343052855689e-06, + "loss": 1.8029, + "step": 17688 + }, + { + "epoch": 0.2, + "learning_rate": 9.293714708877164e-06, + "loss": 0.0026, + "step": 17690 + }, + { + "epoch": 0.2, + "learning_rate": 9.28908636489864e-06, + "loss": 4.6563, + "step": 17692 + }, + { + "epoch": 0.2, + "learning_rate": 9.284458020920115e-06, + "loss": 1.4874, + "step": 17694 + }, + { + "epoch": 0.2, + "learning_rate": 9.27982967694159e-06, + "loss": 1.423, + "step": 17696 + }, + { + "epoch": 0.2, + "learning_rate": 9.275201332963066e-06, + "loss": 1.2733, + "step": 17698 + }, + { + "epoch": 0.2, + "learning_rate": 9.270572988984542e-06, + "loss": 1.1392, + "step": 17700 + }, + { + "epoch": 0.2, + "learning_rate": 9.265944645006017e-06, + "loss": 0.0025, + "step": 17702 + }, + { + "epoch": 0.2, + "learning_rate": 9.261316301027492e-06, + "loss": 1.2373, + "step": 17704 + }, + { + "epoch": 0.2, + "learning_rate": 9.256687957048968e-06, + "loss": 0.3966, + "step": 17706 + }, + { + "epoch": 0.2, + "learning_rate": 9.252059613070443e-06, + "loss": 1.4258, + "step": 17708 + }, + { + "epoch": 0.2, + "learning_rate": 9.247431269091919e-06, + "loss": 0.0012, + "step": 17710 + }, + { + "epoch": 0.2, + "learning_rate": 9.242802925113394e-06, + "loss": 5.738, + "step": 17712 + }, + { + "epoch": 0.2, + "learning_rate": 9.23817458113487e-06, + "loss": 0.5457, + "step": 17714 + }, + { + "epoch": 0.2, + "learning_rate": 9.233546237156345e-06, + "loss": 0.0381, + "step": 17716 + }, + { + "epoch": 0.2, + "learning_rate": 9.228917893177822e-06, + "loss": 1.7831, + "step": 17718 + }, + { + "epoch": 0.2, + "learning_rate": 9.224289549199298e-06, + "loss": 0.2575, + "step": 17720 + }, + { + "epoch": 0.2, + "learning_rate": 9.219661205220773e-06, + "loss": 0.0389, + "step": 17722 + }, + { + "epoch": 0.2, + "learning_rate": 9.215032861242248e-06, + "loss": 0.396, + "step": 17724 + }, + { + "epoch": 0.2, + "learning_rate": 9.210404517263724e-06, + "loss": 1.0754, + "step": 17726 + }, + { + "epoch": 0.2, + "learning_rate": 9.2057761732852e-06, + "loss": 1.2545, + "step": 17728 + }, + { + "epoch": 0.2, + "learning_rate": 9.201147829306675e-06, + "loss": 1.8999, + "step": 17730 + }, + { + "epoch": 0.2, + "learning_rate": 9.19651948532815e-06, + "loss": 1.8618, + "step": 17732 + }, + { + "epoch": 0.2, + "learning_rate": 9.191891141349626e-06, + "loss": 1.5642, + "step": 17734 + }, + { + "epoch": 0.2, + "learning_rate": 9.187262797371101e-06, + "loss": 5.8555, + "step": 17736 + }, + { + "epoch": 0.2, + "learning_rate": 9.182634453392576e-06, + "loss": 1.414, + "step": 17738 + }, + { + "epoch": 0.2, + "learning_rate": 9.178006109414052e-06, + "loss": 0.835, + "step": 17740 + }, + { + "epoch": 0.2, + "learning_rate": 9.173377765435527e-06, + "loss": 0.0209, + "step": 17742 + }, + { + "epoch": 0.2, + "learning_rate": 9.168749421457003e-06, + "loss": 3.209, + "step": 17744 + }, + { + "epoch": 0.2, + "learning_rate": 9.164121077478478e-06, + "loss": 1.0643, + "step": 17746 + }, + { + "epoch": 0.2, + "learning_rate": 9.159492733499954e-06, + "loss": 0.0005, + "step": 17748 + }, + { + "epoch": 0.2, + "learning_rate": 9.154864389521429e-06, + "loss": 0.0025, + "step": 17750 + }, + { + "epoch": 0.2, + "learning_rate": 9.150236045542904e-06, + "loss": 0.019, + "step": 17752 + }, + { + "epoch": 0.2, + "learning_rate": 9.145607701564381e-06, + "loss": 1.0509, + "step": 17754 + }, + { + "epoch": 0.2, + "learning_rate": 9.140979357585857e-06, + "loss": 0.0287, + "step": 17756 + }, + { + "epoch": 0.2, + "learning_rate": 9.136351013607332e-06, + "loss": 2.5271, + "step": 17758 + }, + { + "epoch": 0.2, + "learning_rate": 9.131722669628808e-06, + "loss": 3.7968, + "step": 17760 + }, + { + "epoch": 0.2, + "learning_rate": 9.127094325650283e-06, + "loss": 0.0036, + "step": 17762 + }, + { + "epoch": 0.2, + "learning_rate": 9.122465981671759e-06, + "loss": 3.6761, + "step": 17764 + }, + { + "epoch": 0.2, + "learning_rate": 9.117837637693234e-06, + "loss": 1.4779, + "step": 17766 + }, + { + "epoch": 0.2, + "learning_rate": 9.11320929371471e-06, + "loss": 3.9365, + "step": 17768 + }, + { + "epoch": 0.2, + "learning_rate": 9.108580949736185e-06, + "loss": 0.0006, + "step": 17770 + }, + { + "epoch": 0.2, + "learning_rate": 9.10395260575766e-06, + "loss": 1.6952, + "step": 17772 + }, + { + "epoch": 0.2, + "learning_rate": 9.099324261779136e-06, + "loss": 0.0071, + "step": 17774 + }, + { + "epoch": 0.2, + "learning_rate": 9.094695917800611e-06, + "loss": 0.5095, + "step": 17776 + }, + { + "epoch": 0.2, + "learning_rate": 9.090067573822087e-06, + "loss": 1.4908, + "step": 17778 + }, + { + "epoch": 0.2, + "learning_rate": 9.085439229843562e-06, + "loss": 0.0017, + "step": 17780 + }, + { + "epoch": 0.2, + "learning_rate": 9.080810885865037e-06, + "loss": 1.7061, + "step": 17782 + }, + { + "epoch": 0.2, + "learning_rate": 9.076182541886513e-06, + "loss": 0.5146, + "step": 17784 + }, + { + "epoch": 0.2, + "learning_rate": 9.071554197907988e-06, + "loss": 0.2747, + "step": 17786 + }, + { + "epoch": 0.2, + "learning_rate": 9.066925853929464e-06, + "loss": 0.3484, + "step": 17788 + }, + { + "epoch": 0.2, + "learning_rate": 9.062297509950939e-06, + "loss": 0.0152, + "step": 17790 + }, + { + "epoch": 0.2, + "learning_rate": 9.057669165972416e-06, + "loss": 0.2273, + "step": 17792 + }, + { + "epoch": 0.2, + "learning_rate": 9.053040821993892e-06, + "loss": 4.9922, + "step": 17794 + }, + { + "epoch": 0.2, + "learning_rate": 9.048412478015367e-06, + "loss": 2.2244, + "step": 17796 + }, + { + "epoch": 0.2, + "learning_rate": 9.043784134036843e-06, + "loss": 1.0989, + "step": 17798 + }, + { + "epoch": 0.21, + "learning_rate": 9.039155790058318e-06, + "loss": 2.4083, + "step": 17800 + }, + { + "epoch": 0.21, + "learning_rate": 9.034527446079793e-06, + "loss": 0.1054, + "step": 17802 + }, + { + "epoch": 0.21, + "learning_rate": 9.029899102101269e-06, + "loss": 5.7005, + "step": 17804 + }, + { + "epoch": 0.21, + "learning_rate": 9.025270758122744e-06, + "loss": 0.001, + "step": 17806 + }, + { + "epoch": 0.21, + "learning_rate": 9.02064241414422e-06, + "loss": 0.279, + "step": 17808 + }, + { + "epoch": 0.21, + "learning_rate": 9.016014070165695e-06, + "loss": 0.3224, + "step": 17810 + }, + { + "epoch": 0.21, + "learning_rate": 9.01138572618717e-06, + "loss": 0.4758, + "step": 17812 + }, + { + "epoch": 0.21, + "learning_rate": 9.006757382208646e-06, + "loss": 1.5108, + "step": 17814 + }, + { + "epoch": 0.21, + "learning_rate": 9.002129038230121e-06, + "loss": 4.9447, + "step": 17816 + }, + { + "epoch": 0.21, + "learning_rate": 8.997500694251597e-06, + "loss": 0.9999, + "step": 17818 + }, + { + "epoch": 0.21, + "learning_rate": 8.992872350273072e-06, + "loss": 7.6745, + "step": 17820 + }, + { + "epoch": 0.21, + "learning_rate": 8.988244006294548e-06, + "loss": 0.0222, + "step": 17822 + }, + { + "epoch": 0.21, + "learning_rate": 8.983615662316023e-06, + "loss": 0.2438, + "step": 17824 + }, + { + "epoch": 0.21, + "learning_rate": 8.978987318337498e-06, + "loss": 2.2164, + "step": 17826 + }, + { + "epoch": 0.21, + "learning_rate": 8.974358974358976e-06, + "loss": 6.5445, + "step": 17828 + }, + { + "epoch": 0.21, + "learning_rate": 8.969730630380451e-06, + "loss": 0.097, + "step": 17830 + }, + { + "epoch": 0.21, + "learning_rate": 8.965102286401926e-06, + "loss": 1.0672, + "step": 17832 + }, + { + "epoch": 0.21, + "learning_rate": 8.960473942423402e-06, + "loss": 2.4275, + "step": 17834 + }, + { + "epoch": 0.21, + "learning_rate": 8.955845598444877e-06, + "loss": 1.7259, + "step": 17836 + }, + { + "epoch": 0.21, + "learning_rate": 8.951217254466353e-06, + "loss": 2.8615, + "step": 17838 + }, + { + "epoch": 0.21, + "learning_rate": 8.946588910487828e-06, + "loss": 4.5009, + "step": 17840 + }, + { + "epoch": 0.21, + "learning_rate": 8.941960566509304e-06, + "loss": 1.9145, + "step": 17842 + }, + { + "epoch": 0.21, + "learning_rate": 8.937332222530779e-06, + "loss": 2.1639, + "step": 17844 + }, + { + "epoch": 0.21, + "learning_rate": 8.932703878552254e-06, + "loss": 1.2489, + "step": 17846 + }, + { + "epoch": 0.21, + "learning_rate": 8.92807553457373e-06, + "loss": 0.0014, + "step": 17848 + }, + { + "epoch": 0.21, + "learning_rate": 8.923447190595205e-06, + "loss": 0.0007, + "step": 17850 + }, + { + "epoch": 0.21, + "learning_rate": 8.91881884661668e-06, + "loss": 1.8288, + "step": 17852 + }, + { + "epoch": 0.21, + "learning_rate": 8.914190502638156e-06, + "loss": 0.1261, + "step": 17854 + }, + { + "epoch": 0.21, + "learning_rate": 8.909562158659632e-06, + "loss": 3.5555, + "step": 17856 + }, + { + "epoch": 0.21, + "learning_rate": 8.904933814681107e-06, + "loss": 0.0037, + "step": 17858 + }, + { + "epoch": 0.21, + "learning_rate": 8.900305470702582e-06, + "loss": 0.877, + "step": 17860 + }, + { + "epoch": 0.21, + "learning_rate": 8.895677126724058e-06, + "loss": 1.5135, + "step": 17862 + }, + { + "epoch": 0.21, + "learning_rate": 8.891048782745535e-06, + "loss": 0.0045, + "step": 17864 + }, + { + "epoch": 0.21, + "learning_rate": 8.88642043876701e-06, + "loss": 0.0309, + "step": 17866 + }, + { + "epoch": 0.21, + "learning_rate": 8.881792094788486e-06, + "loss": 0.0038, + "step": 17868 + }, + { + "epoch": 0.21, + "learning_rate": 8.877163750809961e-06, + "loss": 2.268, + "step": 17870 + }, + { + "epoch": 0.21, + "learning_rate": 8.872535406831437e-06, + "loss": 0.8377, + "step": 17872 + }, + { + "epoch": 0.21, + "learning_rate": 8.867907062852912e-06, + "loss": 1.279, + "step": 17874 + }, + { + "epoch": 0.21, + "learning_rate": 8.863278718874387e-06, + "loss": 1.7032, + "step": 17876 + }, + { + "epoch": 0.21, + "learning_rate": 8.858650374895863e-06, + "loss": 0.2339, + "step": 17878 + }, + { + "epoch": 0.21, + "learning_rate": 8.854022030917338e-06, + "loss": 2.2193, + "step": 17880 + }, + { + "epoch": 0.21, + "learning_rate": 8.849393686938814e-06, + "loss": 0.011, + "step": 17882 + }, + { + "epoch": 0.21, + "learning_rate": 8.84476534296029e-06, + "loss": 1.4706, + "step": 17884 + }, + { + "epoch": 0.21, + "learning_rate": 8.840136998981765e-06, + "loss": 5.1827, + "step": 17886 + }, + { + "epoch": 0.21, + "learning_rate": 8.83550865500324e-06, + "loss": 3.4926, + "step": 17888 + }, + { + "epoch": 0.21, + "learning_rate": 8.830880311024715e-06, + "loss": 1.9671, + "step": 17890 + }, + { + "epoch": 0.21, + "learning_rate": 8.826251967046191e-06, + "loss": 0.775, + "step": 17892 + }, + { + "epoch": 0.21, + "learning_rate": 8.821623623067666e-06, + "loss": 3.3082, + "step": 17894 + }, + { + "epoch": 0.21, + "learning_rate": 8.816995279089142e-06, + "loss": 3.4971, + "step": 17896 + }, + { + "epoch": 0.21, + "learning_rate": 8.812366935110617e-06, + "loss": 0.929, + "step": 17898 + }, + { + "epoch": 0.21, + "learning_rate": 8.807738591132093e-06, + "loss": 5.519, + "step": 17900 + }, + { + "epoch": 0.21, + "learning_rate": 8.80311024715357e-06, + "loss": 0.8438, + "step": 17902 + }, + { + "epoch": 0.21, + "learning_rate": 8.798481903175045e-06, + "loss": 2.022, + "step": 17904 + }, + { + "epoch": 0.21, + "learning_rate": 8.79385355919652e-06, + "loss": 5.0338, + "step": 17906 + }, + { + "epoch": 0.21, + "learning_rate": 8.789225215217996e-06, + "loss": 3.1664, + "step": 17908 + }, + { + "epoch": 0.21, + "learning_rate": 8.784596871239471e-06, + "loss": 1.125, + "step": 17910 + }, + { + "epoch": 0.21, + "learning_rate": 8.779968527260947e-06, + "loss": 1.2995, + "step": 17912 + }, + { + "epoch": 0.21, + "learning_rate": 8.775340183282422e-06, + "loss": 0.7664, + "step": 17914 + }, + { + "epoch": 0.21, + "learning_rate": 8.770711839303898e-06, + "loss": 0.5544, + "step": 17916 + }, + { + "epoch": 0.21, + "learning_rate": 8.766083495325373e-06, + "loss": 2.2225, + "step": 17918 + }, + { + "epoch": 0.21, + "learning_rate": 8.761455151346848e-06, + "loss": 4.0363, + "step": 17920 + }, + { + "epoch": 0.21, + "learning_rate": 8.756826807368324e-06, + "loss": 1.248, + "step": 17922 + }, + { + "epoch": 0.21, + "learning_rate": 8.7521984633898e-06, + "loss": 4.646, + "step": 17924 + }, + { + "epoch": 0.21, + "learning_rate": 8.747570119411275e-06, + "loss": 0.0228, + "step": 17926 + }, + { + "epoch": 0.21, + "learning_rate": 8.74294177543275e-06, + "loss": 3.1551, + "step": 17928 + }, + { + "epoch": 0.21, + "learning_rate": 8.738313431454226e-06, + "loss": 0.5551, + "step": 17930 + }, + { + "epoch": 0.21, + "learning_rate": 8.733685087475701e-06, + "loss": 0.262, + "step": 17932 + }, + { + "epoch": 0.21, + "learning_rate": 8.729056743497176e-06, + "loss": 1.1281, + "step": 17934 + }, + { + "epoch": 0.21, + "learning_rate": 8.724428399518652e-06, + "loss": 0.727, + "step": 17936 + }, + { + "epoch": 0.21, + "learning_rate": 8.719800055540129e-06, + "loss": 1.2762, + "step": 17938 + }, + { + "epoch": 0.21, + "learning_rate": 8.715171711561604e-06, + "loss": 4.7664, + "step": 17940 + }, + { + "epoch": 0.21, + "learning_rate": 8.71054336758308e-06, + "loss": 3.4277, + "step": 17942 + }, + { + "epoch": 0.21, + "learning_rate": 8.705915023604555e-06, + "loss": 0.0556, + "step": 17944 + }, + { + "epoch": 0.21, + "learning_rate": 8.70128667962603e-06, + "loss": 1.2683, + "step": 17946 + }, + { + "epoch": 0.21, + "learning_rate": 8.696658335647506e-06, + "loss": 0.0596, + "step": 17948 + }, + { + "epoch": 0.21, + "learning_rate": 8.692029991668982e-06, + "loss": 1.8033, + "step": 17950 + }, + { + "epoch": 0.21, + "learning_rate": 8.687401647690457e-06, + "loss": 4.9111, + "step": 17952 + }, + { + "epoch": 0.21, + "learning_rate": 8.682773303711932e-06, + "loss": 0.6405, + "step": 17954 + }, + { + "epoch": 0.21, + "learning_rate": 8.678144959733408e-06, + "loss": 0.6897, + "step": 17956 + }, + { + "epoch": 0.21, + "learning_rate": 8.673516615754883e-06, + "loss": 0.333, + "step": 17958 + }, + { + "epoch": 0.21, + "learning_rate": 8.668888271776359e-06, + "loss": 1.2894, + "step": 17960 + }, + { + "epoch": 0.21, + "learning_rate": 8.664259927797834e-06, + "loss": 0.0086, + "step": 17962 + }, + { + "epoch": 0.21, + "learning_rate": 8.65963158381931e-06, + "loss": 0.0005, + "step": 17964 + }, + { + "epoch": 0.21, + "learning_rate": 8.655003239840785e-06, + "loss": 0.5376, + "step": 17966 + }, + { + "epoch": 0.21, + "learning_rate": 8.65037489586226e-06, + "loss": 1.3204, + "step": 17968 + }, + { + "epoch": 0.21, + "learning_rate": 8.645746551883736e-06, + "loss": 3.2096, + "step": 17970 + }, + { + "epoch": 0.21, + "learning_rate": 8.641118207905211e-06, + "loss": 0.725, + "step": 17972 + }, + { + "epoch": 0.21, + "learning_rate": 8.636489863926688e-06, + "loss": 0.2159, + "step": 17974 + }, + { + "epoch": 0.21, + "learning_rate": 8.631861519948164e-06, + "loss": 7.5116, + "step": 17976 + }, + { + "epoch": 0.21, + "learning_rate": 8.62723317596964e-06, + "loss": 1.0297, + "step": 17978 + }, + { + "epoch": 0.21, + "learning_rate": 8.622604831991115e-06, + "loss": 0.2622, + "step": 17980 + }, + { + "epoch": 0.21, + "learning_rate": 8.61797648801259e-06, + "loss": 0.0005, + "step": 17982 + }, + { + "epoch": 0.21, + "learning_rate": 8.613348144034065e-06, + "loss": 2.2767, + "step": 17984 + }, + { + "epoch": 0.21, + "learning_rate": 8.608719800055541e-06, + "loss": 0.4739, + "step": 17986 + }, + { + "epoch": 0.21, + "learning_rate": 8.604091456077016e-06, + "loss": 0.3689, + "step": 17988 + }, + { + "epoch": 0.21, + "learning_rate": 8.599463112098492e-06, + "loss": 2.0531, + "step": 17990 + }, + { + "epoch": 0.21, + "learning_rate": 8.594834768119967e-06, + "loss": 0.0147, + "step": 17992 + }, + { + "epoch": 0.21, + "learning_rate": 8.590206424141443e-06, + "loss": 2.032, + "step": 17994 + }, + { + "epoch": 0.21, + "learning_rate": 8.585578080162918e-06, + "loss": 1.4323, + "step": 17996 + }, + { + "epoch": 0.21, + "learning_rate": 8.580949736184393e-06, + "loss": 0.4126, + "step": 17998 + }, + { + "epoch": 0.21, + "learning_rate": 8.576321392205869e-06, + "loss": 0.0003, + "step": 18000 + }, + { + "epoch": 0.21, + "learning_rate": 8.571693048227344e-06, + "loss": 0.1226, + "step": 18002 + }, + { + "epoch": 0.21, + "learning_rate": 8.56706470424882e-06, + "loss": 0.0009, + "step": 18004 + }, + { + "epoch": 0.21, + "learning_rate": 8.562436360270295e-06, + "loss": 0.2063, + "step": 18006 + }, + { + "epoch": 0.21, + "learning_rate": 8.55780801629177e-06, + "loss": 3.1553, + "step": 18008 + }, + { + "epoch": 0.21, + "learning_rate": 8.553179672313246e-06, + "loss": 3.64, + "step": 18010 + }, + { + "epoch": 0.21, + "learning_rate": 8.548551328334723e-06, + "loss": 0.0023, + "step": 18012 + }, + { + "epoch": 0.21, + "learning_rate": 8.543922984356199e-06, + "loss": 0.1195, + "step": 18014 + }, + { + "epoch": 0.21, + "learning_rate": 8.539294640377674e-06, + "loss": 1.8913, + "step": 18016 + }, + { + "epoch": 0.21, + "learning_rate": 8.53466629639915e-06, + "loss": 2.9787, + "step": 18018 + }, + { + "epoch": 0.21, + "learning_rate": 8.530037952420625e-06, + "loss": 8.5651, + "step": 18020 + }, + { + "epoch": 0.21, + "learning_rate": 8.5254096084421e-06, + "loss": 0.2374, + "step": 18022 + }, + { + "epoch": 0.21, + "learning_rate": 8.520781264463576e-06, + "loss": 2.8886, + "step": 18024 + }, + { + "epoch": 0.21, + "learning_rate": 8.516152920485051e-06, + "loss": 1.0926, + "step": 18026 + }, + { + "epoch": 0.21, + "learning_rate": 8.511524576506526e-06, + "loss": 1.2612, + "step": 18028 + }, + { + "epoch": 0.21, + "learning_rate": 8.506896232528002e-06, + "loss": 0.7364, + "step": 18030 + }, + { + "epoch": 0.21, + "learning_rate": 8.502267888549477e-06, + "loss": 0.2961, + "step": 18032 + }, + { + "epoch": 0.21, + "learning_rate": 8.497639544570953e-06, + "loss": 0.6282, + "step": 18034 + }, + { + "epoch": 0.21, + "learning_rate": 8.493011200592428e-06, + "loss": 1.8184, + "step": 18036 + }, + { + "epoch": 0.21, + "learning_rate": 8.488382856613904e-06, + "loss": 0.7538, + "step": 18038 + }, + { + "epoch": 0.21, + "learning_rate": 8.483754512635379e-06, + "loss": 0.0014, + "step": 18040 + }, + { + "epoch": 0.21, + "learning_rate": 8.479126168656854e-06, + "loss": 3.8086, + "step": 18042 + }, + { + "epoch": 0.21, + "learning_rate": 8.47449782467833e-06, + "loss": 0.097, + "step": 18044 + }, + { + "epoch": 0.21, + "learning_rate": 8.469869480699805e-06, + "loss": 1.3109, + "step": 18046 + }, + { + "epoch": 0.21, + "learning_rate": 8.465241136721282e-06, + "loss": 0.2929, + "step": 18048 + }, + { + "epoch": 0.21, + "learning_rate": 8.460612792742758e-06, + "loss": 2.4523, + "step": 18050 + }, + { + "epoch": 0.21, + "learning_rate": 8.455984448764233e-06, + "loss": 5.3084, + "step": 18052 + }, + { + "epoch": 0.21, + "learning_rate": 8.451356104785709e-06, + "loss": 4.6654, + "step": 18054 + }, + { + "epoch": 0.21, + "learning_rate": 8.446727760807184e-06, + "loss": 0.4405, + "step": 18056 + }, + { + "epoch": 0.21, + "learning_rate": 8.44209941682866e-06, + "loss": 0.0006, + "step": 18058 + }, + { + "epoch": 0.21, + "learning_rate": 8.437471072850135e-06, + "loss": 5.9472, + "step": 18060 + }, + { + "epoch": 0.21, + "learning_rate": 8.43284272887161e-06, + "loss": 4.2324, + "step": 18062 + }, + { + "epoch": 0.21, + "learning_rate": 8.428214384893086e-06, + "loss": 2.5635, + "step": 18064 + }, + { + "epoch": 0.21, + "learning_rate": 8.423586040914561e-06, + "loss": 0.1121, + "step": 18066 + }, + { + "epoch": 0.21, + "learning_rate": 8.418957696936037e-06, + "loss": 1.9448, + "step": 18068 + }, + { + "epoch": 0.21, + "learning_rate": 8.414329352957512e-06, + "loss": 0.0143, + "step": 18070 + }, + { + "epoch": 0.21, + "learning_rate": 8.409701008978988e-06, + "loss": 0.0027, + "step": 18072 + }, + { + "epoch": 0.21, + "learning_rate": 8.405072665000463e-06, + "loss": 0.0016, + "step": 18074 + }, + { + "epoch": 0.21, + "learning_rate": 8.400444321021938e-06, + "loss": 0.1151, + "step": 18076 + }, + { + "epoch": 0.21, + "learning_rate": 8.395815977043414e-06, + "loss": 1.4308, + "step": 18078 + }, + { + "epoch": 0.21, + "learning_rate": 8.39118763306489e-06, + "loss": 5.6127, + "step": 18080 + }, + { + "epoch": 0.21, + "learning_rate": 8.386559289086365e-06, + "loss": 1.9714, + "step": 18082 + }, + { + "epoch": 0.21, + "learning_rate": 8.381930945107842e-06, + "loss": 0.027, + "step": 18084 + }, + { + "epoch": 0.21, + "learning_rate": 8.377302601129317e-06, + "loss": 1.4655, + "step": 18086 + }, + { + "epoch": 0.21, + "learning_rate": 8.372674257150793e-06, + "loss": 2.5395, + "step": 18088 + }, + { + "epoch": 0.21, + "learning_rate": 8.368045913172268e-06, + "loss": 6.8316, + "step": 18090 + }, + { + "epoch": 0.21, + "learning_rate": 8.363417569193743e-06, + "loss": 0.8381, + "step": 18092 + }, + { + "epoch": 0.21, + "learning_rate": 8.358789225215219e-06, + "loss": 2.0222, + "step": 18094 + }, + { + "epoch": 0.21, + "learning_rate": 8.354160881236694e-06, + "loss": 3.5211, + "step": 18096 + }, + { + "epoch": 0.21, + "learning_rate": 8.34953253725817e-06, + "loss": 0.5721, + "step": 18098 + }, + { + "epoch": 0.21, + "learning_rate": 8.344904193279643e-06, + "loss": 1.6093, + "step": 18100 + }, + { + "epoch": 0.21, + "learning_rate": 8.34027584930112e-06, + "loss": 2.403, + "step": 18102 + }, + { + "epoch": 0.21, + "learning_rate": 8.335647505322596e-06, + "loss": 0.9236, + "step": 18104 + }, + { + "epoch": 0.21, + "learning_rate": 8.331019161344071e-06, + "loss": 0.0016, + "step": 18106 + }, + { + "epoch": 0.21, + "learning_rate": 8.326390817365547e-06, + "loss": 1.4603, + "step": 18108 + }, + { + "epoch": 0.21, + "learning_rate": 8.321762473387022e-06, + "loss": 1.7034, + "step": 18110 + }, + { + "epoch": 0.21, + "learning_rate": 8.317134129408498e-06, + "loss": 1.9037, + "step": 18112 + }, + { + "epoch": 0.21, + "learning_rate": 8.312505785429973e-06, + "loss": 0.3992, + "step": 18114 + }, + { + "epoch": 0.21, + "learning_rate": 8.307877441451449e-06, + "loss": 1.3445, + "step": 18116 + }, + { + "epoch": 0.21, + "learning_rate": 8.303249097472924e-06, + "loss": 1.2552, + "step": 18118 + }, + { + "epoch": 0.21, + "learning_rate": 8.2986207534944e-06, + "loss": 0.0005, + "step": 18120 + }, + { + "epoch": 0.21, + "learning_rate": 8.293992409515877e-06, + "loss": 1.7361, + "step": 18122 + }, + { + "epoch": 0.21, + "learning_rate": 8.289364065537352e-06, + "loss": 0.6006, + "step": 18124 + }, + { + "epoch": 0.21, + "learning_rate": 8.284735721558827e-06, + "loss": 0.5287, + "step": 18126 + }, + { + "epoch": 0.21, + "learning_rate": 8.280107377580303e-06, + "loss": 0.1347, + "step": 18128 + }, + { + "epoch": 0.21, + "learning_rate": 8.275479033601778e-06, + "loss": 1.2993, + "step": 18130 + }, + { + "epoch": 0.21, + "learning_rate": 8.270850689623254e-06, + "loss": 0.0012, + "step": 18132 + }, + { + "epoch": 0.21, + "learning_rate": 8.266222345644729e-06, + "loss": 2.9028, + "step": 18134 + }, + { + "epoch": 0.21, + "learning_rate": 8.261594001666203e-06, + "loss": 4.2111, + "step": 18136 + }, + { + "epoch": 0.21, + "learning_rate": 8.25696565768768e-06, + "loss": 1.9959, + "step": 18138 + }, + { + "epoch": 0.21, + "learning_rate": 8.252337313709155e-06, + "loss": 0.7133, + "step": 18140 + }, + { + "epoch": 0.21, + "learning_rate": 8.24770896973063e-06, + "loss": 0.0013, + "step": 18142 + }, + { + "epoch": 0.21, + "learning_rate": 8.243080625752106e-06, + "loss": 0.7486, + "step": 18144 + }, + { + "epoch": 0.21, + "learning_rate": 8.238452281773582e-06, + "loss": 0.3887, + "step": 18146 + }, + { + "epoch": 0.21, + "learning_rate": 8.233823937795057e-06, + "loss": 0.9095, + "step": 18148 + }, + { + "epoch": 0.21, + "learning_rate": 8.229195593816532e-06, + "loss": 1.7729, + "step": 18150 + }, + { + "epoch": 0.21, + "learning_rate": 8.224567249838008e-06, + "loss": 0.5791, + "step": 18152 + }, + { + "epoch": 0.21, + "learning_rate": 8.219938905859483e-06, + "loss": 4.6596, + "step": 18154 + }, + { + "epoch": 0.21, + "learning_rate": 8.215310561880959e-06, + "loss": 0.0357, + "step": 18156 + }, + { + "epoch": 0.21, + "learning_rate": 8.210682217902436e-06, + "loss": 1.7598, + "step": 18158 + }, + { + "epoch": 0.21, + "learning_rate": 8.206053873923911e-06, + "loss": 2.1792, + "step": 18160 + }, + { + "epoch": 0.21, + "learning_rate": 8.201425529945387e-06, + "loss": 0.0645, + "step": 18162 + }, + { + "epoch": 0.21, + "learning_rate": 8.196797185966862e-06, + "loss": 0.0007, + "step": 18164 + }, + { + "epoch": 0.21, + "learning_rate": 8.192168841988338e-06, + "loss": 2.0623, + "step": 18166 + }, + { + "epoch": 0.21, + "learning_rate": 8.187540498009813e-06, + "loss": 7.3585, + "step": 18168 + }, + { + "epoch": 0.21, + "learning_rate": 8.182912154031288e-06, + "loss": 0.0008, + "step": 18170 + }, + { + "epoch": 0.21, + "learning_rate": 8.178283810052762e-06, + "loss": 4.7933, + "step": 18172 + }, + { + "epoch": 0.21, + "learning_rate": 8.17365546607424e-06, + "loss": 1.4122, + "step": 18174 + }, + { + "epoch": 0.21, + "learning_rate": 8.169027122095715e-06, + "loss": 7.7284, + "step": 18176 + }, + { + "epoch": 0.21, + "learning_rate": 8.16439877811719e-06, + "loss": 1.1591, + "step": 18178 + }, + { + "epoch": 0.21, + "learning_rate": 8.159770434138666e-06, + "loss": 2.8168, + "step": 18180 + }, + { + "epoch": 0.21, + "learning_rate": 8.155142090160141e-06, + "loss": 0.5556, + "step": 18182 + }, + { + "epoch": 0.21, + "learning_rate": 8.150513746181616e-06, + "loss": 2.487, + "step": 18184 + }, + { + "epoch": 0.21, + "learning_rate": 8.145885402203092e-06, + "loss": 0.2101, + "step": 18186 + }, + { + "epoch": 0.21, + "learning_rate": 8.141257058224567e-06, + "loss": 0.6281, + "step": 18188 + }, + { + "epoch": 0.21, + "learning_rate": 8.136628714246043e-06, + "loss": 2.1692, + "step": 18190 + }, + { + "epoch": 0.21, + "learning_rate": 8.132000370267518e-06, + "loss": 4.0518, + "step": 18192 + }, + { + "epoch": 0.21, + "learning_rate": 8.127372026288994e-06, + "loss": 1.1794, + "step": 18194 + }, + { + "epoch": 0.21, + "learning_rate": 8.12274368231047e-06, + "loss": 0.0047, + "step": 18196 + }, + { + "epoch": 0.21, + "learning_rate": 8.118115338331946e-06, + "loss": 3.7098, + "step": 18198 + }, + { + "epoch": 0.21, + "learning_rate": 8.113486994353421e-06, + "loss": 0.2851, + "step": 18200 + }, + { + "epoch": 0.21, + "learning_rate": 8.108858650374897e-06, + "loss": 0.8921, + "step": 18202 + }, + { + "epoch": 0.21, + "learning_rate": 8.104230306396372e-06, + "loss": 0.0077, + "step": 18204 + }, + { + "epoch": 0.21, + "learning_rate": 8.099601962417848e-06, + "loss": 2.8095, + "step": 18206 + }, + { + "epoch": 0.21, + "learning_rate": 8.094973618439321e-06, + "loss": 2.9735, + "step": 18208 + }, + { + "epoch": 0.21, + "learning_rate": 8.090345274460797e-06, + "loss": 2.5299, + "step": 18210 + }, + { + "epoch": 0.21, + "learning_rate": 8.085716930482274e-06, + "loss": 0.0008, + "step": 18212 + }, + { + "epoch": 0.21, + "learning_rate": 8.08108858650375e-06, + "loss": 1.5211, + "step": 18214 + }, + { + "epoch": 0.21, + "learning_rate": 8.076460242525225e-06, + "loss": 2.054, + "step": 18216 + }, + { + "epoch": 0.21, + "learning_rate": 8.0718318985467e-06, + "loss": 0.2518, + "step": 18218 + }, + { + "epoch": 0.21, + "learning_rate": 8.067203554568176e-06, + "loss": 1.696, + "step": 18220 + }, + { + "epoch": 0.21, + "learning_rate": 8.062575210589651e-06, + "loss": 0.2729, + "step": 18222 + }, + { + "epoch": 0.21, + "learning_rate": 8.057946866611127e-06, + "loss": 2.0137, + "step": 18224 + }, + { + "epoch": 0.21, + "learning_rate": 8.053318522632602e-06, + "loss": 1.4195, + "step": 18226 + }, + { + "epoch": 0.21, + "learning_rate": 8.048690178654077e-06, + "loss": 1.1337, + "step": 18228 + }, + { + "epoch": 0.21, + "learning_rate": 8.044061834675553e-06, + "loss": 3.2558, + "step": 18230 + }, + { + "epoch": 0.21, + "learning_rate": 8.03943349069703e-06, + "loss": 1.7845, + "step": 18232 + }, + { + "epoch": 0.21, + "learning_rate": 8.034805146718505e-06, + "loss": 4.2132, + "step": 18234 + }, + { + "epoch": 0.21, + "learning_rate": 8.03017680273998e-06, + "loss": 1.1297, + "step": 18236 + }, + { + "epoch": 0.21, + "learning_rate": 8.025548458761456e-06, + "loss": 1.6473, + "step": 18238 + }, + { + "epoch": 0.21, + "learning_rate": 8.020920114782932e-06, + "loss": 4.6807, + "step": 18240 + }, + { + "epoch": 0.21, + "learning_rate": 8.016291770804407e-06, + "loss": 4.8673, + "step": 18242 + }, + { + "epoch": 0.21, + "learning_rate": 8.01166342682588e-06, + "loss": 2.3331, + "step": 18244 + }, + { + "epoch": 0.21, + "learning_rate": 8.007035082847356e-06, + "loss": 0.0358, + "step": 18246 + }, + { + "epoch": 0.21, + "learning_rate": 8.002406738868833e-06, + "loss": 0.0444, + "step": 18248 + }, + { + "epoch": 0.21, + "learning_rate": 7.997778394890309e-06, + "loss": 0.6343, + "step": 18250 + }, + { + "epoch": 0.21, + "learning_rate": 7.993150050911784e-06, + "loss": 2.3068, + "step": 18252 + }, + { + "epoch": 0.21, + "learning_rate": 7.98852170693326e-06, + "loss": 2.1088, + "step": 18254 + }, + { + "epoch": 0.21, + "learning_rate": 7.983893362954735e-06, + "loss": 4.7567, + "step": 18256 + }, + { + "epoch": 0.21, + "learning_rate": 7.97926501897621e-06, + "loss": 1.5881, + "step": 18258 + }, + { + "epoch": 0.21, + "learning_rate": 7.974636674997686e-06, + "loss": 0.4313, + "step": 18260 + }, + { + "epoch": 0.21, + "learning_rate": 7.970008331019161e-06, + "loss": 0.8834, + "step": 18262 + }, + { + "epoch": 0.21, + "learning_rate": 7.965379987040637e-06, + "loss": 1.7597, + "step": 18264 + }, + { + "epoch": 0.21, + "learning_rate": 7.960751643062112e-06, + "loss": 0.8536, + "step": 18266 + }, + { + "epoch": 0.21, + "learning_rate": 7.95612329908359e-06, + "loss": 0.0007, + "step": 18268 + }, + { + "epoch": 0.21, + "learning_rate": 7.951494955105065e-06, + "loss": 1.5067, + "step": 18270 + }, + { + "epoch": 0.21, + "learning_rate": 7.94686661112654e-06, + "loss": 1.1456, + "step": 18272 + }, + { + "epoch": 0.21, + "learning_rate": 7.942238267148016e-06, + "loss": 1.4295, + "step": 18274 + }, + { + "epoch": 0.21, + "learning_rate": 7.937609923169491e-06, + "loss": 0.2277, + "step": 18276 + }, + { + "epoch": 0.21, + "learning_rate": 7.932981579190966e-06, + "loss": 1.969, + "step": 18278 + }, + { + "epoch": 0.21, + "learning_rate": 7.928353235212442e-06, + "loss": 0.2174, + "step": 18280 + }, + { + "epoch": 0.21, + "learning_rate": 7.923724891233916e-06, + "loss": 1.5742, + "step": 18282 + }, + { + "epoch": 0.21, + "learning_rate": 7.919096547255391e-06, + "loss": 3.232, + "step": 18284 + }, + { + "epoch": 0.21, + "learning_rate": 7.914468203276868e-06, + "loss": 7.8422, + "step": 18286 + }, + { + "epoch": 0.21, + "learning_rate": 7.909839859298344e-06, + "loss": 0.007, + "step": 18288 + }, + { + "epoch": 0.21, + "learning_rate": 7.905211515319819e-06, + "loss": 0.1761, + "step": 18290 + }, + { + "epoch": 0.21, + "learning_rate": 7.900583171341294e-06, + "loss": 1.4063, + "step": 18292 + }, + { + "epoch": 0.21, + "learning_rate": 7.89595482736277e-06, + "loss": 0.2543, + "step": 18294 + }, + { + "epoch": 0.21, + "learning_rate": 7.891326483384245e-06, + "loss": 0.1522, + "step": 18296 + }, + { + "epoch": 0.21, + "learning_rate": 7.88669813940572e-06, + "loss": 0.4265, + "step": 18298 + }, + { + "epoch": 0.21, + "learning_rate": 7.882069795427196e-06, + "loss": 0.6211, + "step": 18300 + }, + { + "epoch": 0.21, + "learning_rate": 7.877441451448672e-06, + "loss": 3.023, + "step": 18302 + }, + { + "epoch": 0.21, + "learning_rate": 7.872813107470147e-06, + "loss": 2.8604, + "step": 18304 + }, + { + "epoch": 0.21, + "learning_rate": 7.868184763491624e-06, + "loss": 0.1831, + "step": 18306 + }, + { + "epoch": 0.21, + "learning_rate": 7.8635564195131e-06, + "loss": 1.0248, + "step": 18308 + }, + { + "epoch": 0.21, + "learning_rate": 7.858928075534575e-06, + "loss": 2.515, + "step": 18310 + }, + { + "epoch": 0.21, + "learning_rate": 7.85429973155605e-06, + "loss": 2.4721, + "step": 18312 + }, + { + "epoch": 0.21, + "learning_rate": 7.849671387577526e-06, + "loss": 1.0742, + "step": 18314 + }, + { + "epoch": 0.21, + "learning_rate": 7.845043043599001e-06, + "loss": 0.9671, + "step": 18316 + }, + { + "epoch": 0.21, + "learning_rate": 7.840414699620475e-06, + "loss": 0.0049, + "step": 18318 + }, + { + "epoch": 0.21, + "learning_rate": 7.83578635564195e-06, + "loss": 2.3331, + "step": 18320 + }, + { + "epoch": 0.21, + "learning_rate": 7.831158011663427e-06, + "loss": 2.4793, + "step": 18322 + }, + { + "epoch": 0.21, + "learning_rate": 7.826529667684903e-06, + "loss": 2.3554, + "step": 18324 + }, + { + "epoch": 0.21, + "learning_rate": 7.821901323706378e-06, + "loss": 1.4372, + "step": 18326 + }, + { + "epoch": 0.21, + "learning_rate": 7.817272979727854e-06, + "loss": 2.2361, + "step": 18328 + }, + { + "epoch": 0.21, + "learning_rate": 7.81264463574933e-06, + "loss": 4.4393, + "step": 18330 + }, + { + "epoch": 0.21, + "learning_rate": 7.808016291770805e-06, + "loss": 4.7919, + "step": 18332 + }, + { + "epoch": 0.21, + "learning_rate": 7.80338794779228e-06, + "loss": 1.6699, + "step": 18334 + }, + { + "epoch": 0.21, + "learning_rate": 7.798759603813755e-06, + "loss": 0.0027, + "step": 18336 + }, + { + "epoch": 0.21, + "learning_rate": 7.794131259835231e-06, + "loss": 0.7938, + "step": 18338 + }, + { + "epoch": 0.21, + "learning_rate": 7.789502915856706e-06, + "loss": 1.9922, + "step": 18340 + }, + { + "epoch": 0.21, + "learning_rate": 7.784874571878183e-06, + "loss": 0.63, + "step": 18342 + }, + { + "epoch": 0.21, + "learning_rate": 7.780246227899659e-06, + "loss": 1.0586, + "step": 18344 + }, + { + "epoch": 0.21, + "learning_rate": 7.775617883921134e-06, + "loss": 1.5097, + "step": 18346 + }, + { + "epoch": 0.21, + "learning_rate": 7.77098953994261e-06, + "loss": 1.1844, + "step": 18348 + }, + { + "epoch": 0.21, + "learning_rate": 7.766361195964085e-06, + "loss": 1.1932, + "step": 18350 + }, + { + "epoch": 0.21, + "learning_rate": 7.76173285198556e-06, + "loss": 0.0024, + "step": 18352 + }, + { + "epoch": 0.21, + "learning_rate": 7.757104508007034e-06, + "loss": 1.8673, + "step": 18354 + }, + { + "epoch": 0.21, + "learning_rate": 7.75247616402851e-06, + "loss": 0.8251, + "step": 18356 + }, + { + "epoch": 0.21, + "learning_rate": 7.747847820049987e-06, + "loss": 3.9533, + "step": 18358 + }, + { + "epoch": 0.21, + "learning_rate": 7.743219476071462e-06, + "loss": 3.8987, + "step": 18360 + }, + { + "epoch": 0.21, + "learning_rate": 7.738591132092938e-06, + "loss": 1.2861, + "step": 18362 + }, + { + "epoch": 0.21, + "learning_rate": 7.733962788114413e-06, + "loss": 3.0504, + "step": 18364 + }, + { + "epoch": 0.21, + "learning_rate": 7.729334444135888e-06, + "loss": 2.6219, + "step": 18366 + }, + { + "epoch": 0.21, + "learning_rate": 7.724706100157364e-06, + "loss": 0.0261, + "step": 18368 + }, + { + "epoch": 0.21, + "learning_rate": 7.72007775617884e-06, + "loss": 0.0004, + "step": 18370 + }, + { + "epoch": 0.21, + "learning_rate": 7.715449412200315e-06, + "loss": 0.4662, + "step": 18372 + }, + { + "epoch": 0.21, + "learning_rate": 7.71082106822179e-06, + "loss": 0.0007, + "step": 18374 + }, + { + "epoch": 0.21, + "learning_rate": 7.706192724243266e-06, + "loss": 1.1284, + "step": 18376 + }, + { + "epoch": 0.21, + "learning_rate": 7.701564380264743e-06, + "loss": 0.3812, + "step": 18378 + }, + { + "epoch": 0.21, + "learning_rate": 7.696936036286218e-06, + "loss": 0.0439, + "step": 18380 + }, + { + "epoch": 0.21, + "learning_rate": 7.692307692307694e-06, + "loss": 0.5613, + "step": 18382 + }, + { + "epoch": 0.21, + "learning_rate": 7.687679348329169e-06, + "loss": 1.1631, + "step": 18384 + }, + { + "epoch": 0.21, + "learning_rate": 7.683051004350644e-06, + "loss": 0.0007, + "step": 18386 + }, + { + "epoch": 0.21, + "learning_rate": 7.67842266037212e-06, + "loss": 3.1827, + "step": 18388 + }, + { + "epoch": 0.21, + "learning_rate": 7.673794316393594e-06, + "loss": 1.9194, + "step": 18390 + }, + { + "epoch": 0.21, + "learning_rate": 7.669165972415069e-06, + "loss": 0.037, + "step": 18392 + }, + { + "epoch": 0.21, + "learning_rate": 7.664537628436544e-06, + "loss": 1.0811, + "step": 18394 + }, + { + "epoch": 0.21, + "learning_rate": 7.659909284458022e-06, + "loss": 3.1579, + "step": 18396 + }, + { + "epoch": 0.21, + "learning_rate": 7.655280940479497e-06, + "loss": 1.4598, + "step": 18398 + }, + { + "epoch": 0.21, + "learning_rate": 7.650652596500972e-06, + "loss": 0.4743, + "step": 18400 + }, + { + "epoch": 0.21, + "learning_rate": 7.646024252522448e-06, + "loss": 3.2844, + "step": 18402 + }, + { + "epoch": 0.21, + "learning_rate": 7.641395908543923e-06, + "loss": 0.3954, + "step": 18404 + }, + { + "epoch": 0.21, + "learning_rate": 7.636767564565399e-06, + "loss": 0.4711, + "step": 18406 + }, + { + "epoch": 0.21, + "learning_rate": 7.632139220586874e-06, + "loss": 0.0012, + "step": 18408 + }, + { + "epoch": 0.21, + "learning_rate": 7.62751087660835e-06, + "loss": 1.853, + "step": 18410 + }, + { + "epoch": 0.21, + "learning_rate": 7.622882532629826e-06, + "loss": 0.0004, + "step": 18412 + }, + { + "epoch": 0.21, + "learning_rate": 7.618254188651301e-06, + "loss": 0.0032, + "step": 18414 + }, + { + "epoch": 0.21, + "learning_rate": 7.613625844672777e-06, + "loss": 5.5709, + "step": 18416 + }, + { + "epoch": 0.21, + "learning_rate": 7.608997500694252e-06, + "loss": 1.3824, + "step": 18418 + }, + { + "epoch": 0.21, + "learning_rate": 7.604369156715728e-06, + "loss": 4.7715, + "step": 18420 + }, + { + "epoch": 0.21, + "learning_rate": 7.599740812737204e-06, + "loss": 0.0002, + "step": 18422 + }, + { + "epoch": 0.21, + "learning_rate": 7.595112468758679e-06, + "loss": 0.0005, + "step": 18424 + }, + { + "epoch": 0.21, + "learning_rate": 7.590484124780154e-06, + "loss": 0.3506, + "step": 18426 + }, + { + "epoch": 0.21, + "learning_rate": 7.585855780801629e-06, + "loss": 2.0523, + "step": 18428 + }, + { + "epoch": 0.21, + "learning_rate": 7.581227436823105e-06, + "loss": 4.0864, + "step": 18430 + }, + { + "epoch": 0.21, + "learning_rate": 7.57659909284458e-06, + "loss": 4.7475, + "step": 18432 + }, + { + "epoch": 0.21, + "learning_rate": 7.5719707488660555e-06, + "loss": 0.3653, + "step": 18434 + }, + { + "epoch": 0.21, + "learning_rate": 7.567342404887532e-06, + "loss": 0.0459, + "step": 18436 + }, + { + "epoch": 0.21, + "learning_rate": 7.562714060909007e-06, + "loss": 1.9795, + "step": 18438 + }, + { + "epoch": 0.21, + "learning_rate": 7.558085716930483e-06, + "loss": 0.0398, + "step": 18440 + }, + { + "epoch": 0.21, + "learning_rate": 7.553457372951958e-06, + "loss": 2.1417, + "step": 18442 + }, + { + "epoch": 0.21, + "learning_rate": 7.5488290289734334e-06, + "loss": 1.1707, + "step": 18444 + }, + { + "epoch": 0.21, + "learning_rate": 7.544200684994909e-06, + "loss": 0.1611, + "step": 18446 + }, + { + "epoch": 0.21, + "learning_rate": 7.539572341016385e-06, + "loss": 0.0261, + "step": 18448 + }, + { + "epoch": 0.21, + "learning_rate": 7.5349439970378606e-06, + "loss": 0.0019, + "step": 18450 + }, + { + "epoch": 0.21, + "learning_rate": 7.530315653059336e-06, + "loss": 0.0013, + "step": 18452 + }, + { + "epoch": 0.21, + "learning_rate": 7.525687309080811e-06, + "loss": 2.0685, + "step": 18454 + }, + { + "epoch": 0.21, + "learning_rate": 7.521058965102287e-06, + "loss": 3.724, + "step": 18456 + }, + { + "epoch": 0.21, + "learning_rate": 7.516430621123763e-06, + "loss": 0.8842, + "step": 18458 + }, + { + "epoch": 0.21, + "learning_rate": 7.5118022771452385e-06, + "loss": 0.0009, + "step": 18460 + }, + { + "epoch": 0.21, + "learning_rate": 7.507173933166712e-06, + "loss": 1.9587, + "step": 18462 + }, + { + "epoch": 0.21, + "learning_rate": 7.5025455891881885e-06, + "loss": 1.3654, + "step": 18464 + }, + { + "epoch": 0.21, + "learning_rate": 7.497917245209664e-06, + "loss": 2.0614, + "step": 18466 + }, + { + "epoch": 0.21, + "learning_rate": 7.493288901231139e-06, + "loss": 0.0033, + "step": 18468 + }, + { + "epoch": 0.21, + "learning_rate": 7.488660557252615e-06, + "loss": 0.013, + "step": 18470 + }, + { + "epoch": 0.21, + "learning_rate": 7.48403221327409e-06, + "loss": 2.2166, + "step": 18472 + }, + { + "epoch": 0.21, + "learning_rate": 7.4794038692955665e-06, + "loss": 2.2783, + "step": 18474 + }, + { + "epoch": 0.21, + "learning_rate": 7.474775525317042e-06, + "loss": 0.3097, + "step": 18476 + }, + { + "epoch": 0.21, + "learning_rate": 7.470147181338517e-06, + "loss": 0.158, + "step": 18478 + }, + { + "epoch": 0.21, + "learning_rate": 7.465518837359993e-06, + "loss": 0.0015, + "step": 18480 + }, + { + "epoch": 0.21, + "learning_rate": 7.460890493381468e-06, + "loss": 2.4144, + "step": 18482 + }, + { + "epoch": 0.21, + "learning_rate": 7.4562621494029445e-06, + "loss": 6.3433, + "step": 18484 + }, + { + "epoch": 0.21, + "learning_rate": 7.45163380542442e-06, + "loss": 6.8526, + "step": 18486 + }, + { + "epoch": 0.21, + "learning_rate": 7.447005461445895e-06, + "loss": 2.0841, + "step": 18488 + }, + { + "epoch": 0.21, + "learning_rate": 7.442377117467371e-06, + "loss": 0.9099, + "step": 18490 + }, + { + "epoch": 0.21, + "learning_rate": 7.437748773488846e-06, + "loss": 1.4458, + "step": 18492 + }, + { + "epoch": 0.21, + "learning_rate": 7.4331204295103224e-06, + "loss": 0.0007, + "step": 18494 + }, + { + "epoch": 0.21, + "learning_rate": 7.428492085531798e-06, + "loss": 1.7042, + "step": 18496 + }, + { + "epoch": 0.21, + "learning_rate": 7.423863741553272e-06, + "loss": 0.9089, + "step": 18498 + }, + { + "epoch": 0.21, + "learning_rate": 7.419235397574748e-06, + "loss": 2.7362, + "step": 18500 + }, + { + "epoch": 0.21, + "learning_rate": 7.414607053596223e-06, + "loss": 1.2885, + "step": 18502 + }, + { + "epoch": 0.21, + "learning_rate": 7.409978709617699e-06, + "loss": 0.0008, + "step": 18504 + }, + { + "epoch": 0.21, + "learning_rate": 7.405350365639174e-06, + "loss": 0.0435, + "step": 18506 + }, + { + "epoch": 0.21, + "learning_rate": 7.4007220216606496e-06, + "loss": 2.9914, + "step": 18508 + }, + { + "epoch": 0.21, + "learning_rate": 7.396093677682126e-06, + "loss": 1.4342, + "step": 18510 + }, + { + "epoch": 0.21, + "learning_rate": 7.391465333703601e-06, + "loss": 0.6099, + "step": 18512 + }, + { + "epoch": 0.21, + "learning_rate": 7.386836989725077e-06, + "loss": 0.0023, + "step": 18514 + }, + { + "epoch": 0.21, + "learning_rate": 7.382208645746552e-06, + "loss": 5.5307, + "step": 18516 + }, + { + "epoch": 0.21, + "learning_rate": 7.3775803017680275e-06, + "loss": 1.8235, + "step": 18518 + }, + { + "epoch": 0.21, + "learning_rate": 7.372951957789504e-06, + "loss": 0.7563, + "step": 18520 + }, + { + "epoch": 0.21, + "learning_rate": 7.368323613810979e-06, + "loss": 0.0007, + "step": 18522 + }, + { + "epoch": 0.21, + "learning_rate": 7.363695269832455e-06, + "loss": 2.7501, + "step": 18524 + }, + { + "epoch": 0.21, + "learning_rate": 7.35906692585393e-06, + "loss": 1.3935, + "step": 18526 + }, + { + "epoch": 0.21, + "learning_rate": 7.3544385818754055e-06, + "loss": 3.2219, + "step": 18528 + }, + { + "epoch": 0.21, + "learning_rate": 7.349810237896882e-06, + "loss": 2.9072, + "step": 18530 + }, + { + "epoch": 0.21, + "learning_rate": 7.345181893918357e-06, + "loss": 0.7848, + "step": 18532 + }, + { + "epoch": 0.21, + "learning_rate": 7.340553549939831e-06, + "loss": 2.0571, + "step": 18534 + }, + { + "epoch": 0.21, + "learning_rate": 7.335925205961306e-06, + "loss": 1.5454, + "step": 18536 + }, + { + "epoch": 0.21, + "learning_rate": 7.331296861982783e-06, + "loss": 1.5176, + "step": 18538 + }, + { + "epoch": 0.21, + "learning_rate": 7.326668518004258e-06, + "loss": 2.8585, + "step": 18540 + }, + { + "epoch": 0.21, + "learning_rate": 7.3220401740257335e-06, + "loss": 2.1632, + "step": 18542 + }, + { + "epoch": 0.21, + "learning_rate": 7.317411830047209e-06, + "loss": 3.3527, + "step": 18544 + }, + { + "epoch": 0.21, + "learning_rate": 7.312783486068684e-06, + "loss": 1.9906, + "step": 18546 + }, + { + "epoch": 0.21, + "learning_rate": 7.308155142090161e-06, + "loss": 0.2801, + "step": 18548 + }, + { + "epoch": 0.21, + "learning_rate": 7.303526798111636e-06, + "loss": 0.4718, + "step": 18550 + }, + { + "epoch": 0.21, + "learning_rate": 7.2988984541331115e-06, + "loss": 1.5365, + "step": 18552 + }, + { + "epoch": 0.21, + "learning_rate": 7.294270110154587e-06, + "loss": 4.0193, + "step": 18554 + }, + { + "epoch": 0.21, + "learning_rate": 7.289641766176062e-06, + "loss": 0.0081, + "step": 18556 + }, + { + "epoch": 0.21, + "learning_rate": 7.2850134221975386e-06, + "loss": 0.9081, + "step": 18558 + }, + { + "epoch": 0.21, + "learning_rate": 7.280385078219014e-06, + "loss": 2.3506, + "step": 18560 + }, + { + "epoch": 0.21, + "learning_rate": 7.275756734240489e-06, + "loss": 1.4049, + "step": 18562 + }, + { + "epoch": 0.21, + "learning_rate": 7.271128390261965e-06, + "loss": 0.0064, + "step": 18564 + }, + { + "epoch": 0.21, + "learning_rate": 7.26650004628344e-06, + "loss": 0.4414, + "step": 18566 + }, + { + "epoch": 0.21, + "learning_rate": 7.2618717023049165e-06, + "loss": 0.0008, + "step": 18568 + }, + { + "epoch": 0.21, + "learning_rate": 7.257243358326392e-06, + "loss": 3.2717, + "step": 18570 + }, + { + "epoch": 0.21, + "learning_rate": 7.252615014347866e-06, + "loss": 3.3214, + "step": 18572 + }, + { + "epoch": 0.21, + "learning_rate": 7.247986670369342e-06, + "loss": 1.9879, + "step": 18574 + }, + { + "epoch": 0.21, + "learning_rate": 7.243358326390817e-06, + "loss": 0.0429, + "step": 18576 + }, + { + "epoch": 0.21, + "learning_rate": 7.238729982412293e-06, + "loss": 1.3841, + "step": 18578 + }, + { + "epoch": 0.21, + "learning_rate": 7.234101638433768e-06, + "loss": 3.7469, + "step": 18580 + }, + { + "epoch": 0.21, + "learning_rate": 7.229473294455244e-06, + "loss": 3.26, + "step": 18582 + }, + { + "epoch": 0.21, + "learning_rate": 7.22484495047672e-06, + "loss": 0.0007, + "step": 18584 + }, + { + "epoch": 0.21, + "learning_rate": 7.220216606498195e-06, + "loss": 0.0692, + "step": 18586 + }, + { + "epoch": 0.21, + "learning_rate": 7.215588262519671e-06, + "loss": 0.8172, + "step": 18588 + }, + { + "epoch": 0.21, + "learning_rate": 7.210959918541146e-06, + "loss": 4.3344, + "step": 18590 + }, + { + "epoch": 0.21, + "learning_rate": 7.206331574562622e-06, + "loss": 0.2456, + "step": 18592 + }, + { + "epoch": 0.21, + "learning_rate": 7.201703230584098e-06, + "loss": 1.149, + "step": 18594 + }, + { + "epoch": 0.21, + "learning_rate": 7.197074886605573e-06, + "loss": 0.0015, + "step": 18596 + }, + { + "epoch": 0.21, + "learning_rate": 7.192446542627049e-06, + "loss": 3.3058, + "step": 18598 + }, + { + "epoch": 0.21, + "learning_rate": 7.187818198648524e-06, + "loss": 0.3084, + "step": 18600 + }, + { + "epoch": 0.21, + "learning_rate": 7.18318985467e-06, + "loss": 3.6076, + "step": 18602 + }, + { + "epoch": 0.21, + "learning_rate": 7.178561510691476e-06, + "loss": 0.5197, + "step": 18604 + }, + { + "epoch": 0.21, + "learning_rate": 7.173933166712951e-06, + "loss": 1.9619, + "step": 18606 + }, + { + "epoch": 0.21, + "learning_rate": 7.169304822734425e-06, + "loss": 1.9337, + "step": 18608 + }, + { + "epoch": 0.21, + "learning_rate": 7.164676478755901e-06, + "loss": 4.9472, + "step": 18610 + }, + { + "epoch": 0.21, + "learning_rate": 7.160048134777377e-06, + "loss": 3.9692, + "step": 18612 + }, + { + "epoch": 0.21, + "learning_rate": 7.155419790798852e-06, + "loss": 0.3928, + "step": 18614 + }, + { + "epoch": 0.21, + "learning_rate": 7.150791446820328e-06, + "loss": 0.0595, + "step": 18616 + }, + { + "epoch": 0.21, + "learning_rate": 7.146163102841803e-06, + "loss": 1.2091, + "step": 18618 + }, + { + "epoch": 0.21, + "learning_rate": 7.141534758863279e-06, + "loss": 0.0041, + "step": 18620 + }, + { + "epoch": 0.21, + "learning_rate": 7.136906414884755e-06, + "loss": 4.8461, + "step": 18622 + }, + { + "epoch": 0.21, + "learning_rate": 7.13227807090623e-06, + "loss": 0.1339, + "step": 18624 + }, + { + "epoch": 0.21, + "learning_rate": 7.1276497269277056e-06, + "loss": 6.9697, + "step": 18626 + }, + { + "epoch": 0.21, + "learning_rate": 7.123021382949181e-06, + "loss": 0.0042, + "step": 18628 + }, + { + "epoch": 0.21, + "learning_rate": 7.118393038970657e-06, + "loss": 0.4483, + "step": 18630 + }, + { + "epoch": 0.21, + "learning_rate": 7.113764694992133e-06, + "loss": 0.0177, + "step": 18632 + }, + { + "epoch": 0.21, + "learning_rate": 7.109136351013608e-06, + "loss": 0.0901, + "step": 18634 + }, + { + "epoch": 0.21, + "learning_rate": 7.1045080070350835e-06, + "loss": 0.7892, + "step": 18636 + }, + { + "epoch": 0.21, + "learning_rate": 7.099879663056559e-06, + "loss": 4.5744, + "step": 18638 + }, + { + "epoch": 0.21, + "learning_rate": 7.095251319078035e-06, + "loss": 2.1296, + "step": 18640 + }, + { + "epoch": 0.21, + "learning_rate": 7.090622975099511e-06, + "loss": 0.4795, + "step": 18642 + }, + { + "epoch": 0.21, + "learning_rate": 7.085994631120984e-06, + "loss": 3.8105, + "step": 18644 + }, + { + "epoch": 0.21, + "learning_rate": 7.08136628714246e-06, + "loss": 0.0017, + "step": 18646 + }, + { + "epoch": 0.21, + "learning_rate": 7.076737943163936e-06, + "loss": 1.3816, + "step": 18648 + }, + { + "epoch": 0.21, + "learning_rate": 7.0721095991854115e-06, + "loss": 7.4232, + "step": 18650 + }, + { + "epoch": 0.21, + "learning_rate": 7.067481255206887e-06, + "loss": 1.1092, + "step": 18652 + }, + { + "epoch": 0.21, + "learning_rate": 7.062852911228362e-06, + "loss": 0.7366, + "step": 18654 + }, + { + "epoch": 0.21, + "learning_rate": 7.058224567249838e-06, + "loss": 1.5642, + "step": 18656 + }, + { + "epoch": 0.21, + "learning_rate": 7.053596223271314e-06, + "loss": 0.0066, + "step": 18658 + }, + { + "epoch": 0.21, + "learning_rate": 7.0489678792927895e-06, + "loss": 0.0004, + "step": 18660 + }, + { + "epoch": 0.21, + "learning_rate": 7.044339535314265e-06, + "loss": 0.0005, + "step": 18662 + }, + { + "epoch": 0.21, + "learning_rate": 7.03971119133574e-06, + "loss": 1.8117, + "step": 18664 + }, + { + "epoch": 0.21, + "learning_rate": 7.035082847357216e-06, + "loss": 0.0208, + "step": 18666 + }, + { + "epoch": 0.22, + "learning_rate": 7.030454503378692e-06, + "loss": 0.3306, + "step": 18668 + }, + { + "epoch": 0.22, + "learning_rate": 7.0258261594001674e-06, + "loss": 0.5434, + "step": 18670 + }, + { + "epoch": 0.22, + "learning_rate": 7.021197815421643e-06, + "loss": 0.4323, + "step": 18672 + }, + { + "epoch": 0.22, + "learning_rate": 7.016569471443118e-06, + "loss": 0.0061, + "step": 18674 + }, + { + "epoch": 0.22, + "learning_rate": 7.011941127464594e-06, + "loss": 0.3397, + "step": 18676 + }, + { + "epoch": 0.22, + "learning_rate": 7.00731278348607e-06, + "loss": 0.524, + "step": 18678 + }, + { + "epoch": 0.22, + "learning_rate": 7.002684439507544e-06, + "loss": 5.4875, + "step": 18680 + }, + { + "epoch": 0.22, + "learning_rate": 6.998056095529019e-06, + "loss": 1.3183, + "step": 18682 + }, + { + "epoch": 0.22, + "learning_rate": 6.993427751550495e-06, + "loss": 0.1394, + "step": 18684 + }, + { + "epoch": 0.22, + "learning_rate": 6.988799407571971e-06, + "loss": 2.4119, + "step": 18686 + }, + { + "epoch": 0.22, + "learning_rate": 6.984171063593446e-06, + "loss": 1.3695, + "step": 18688 + }, + { + "epoch": 0.22, + "learning_rate": 6.979542719614922e-06, + "loss": 0.0846, + "step": 18690 + }, + { + "epoch": 0.22, + "learning_rate": 6.974914375636397e-06, + "loss": 3.8513, + "step": 18692 + }, + { + "epoch": 0.22, + "learning_rate": 6.970286031657873e-06, + "loss": 2.5898, + "step": 18694 + }, + { + "epoch": 0.22, + "learning_rate": 6.965657687679349e-06, + "loss": 0.0653, + "step": 18696 + }, + { + "epoch": 0.22, + "learning_rate": 6.961029343700824e-06, + "loss": 0.9791, + "step": 18698 + }, + { + "epoch": 0.22, + "learning_rate": 6.9564009997223e-06, + "loss": 0.0056, + "step": 18700 + }, + { + "epoch": 0.22, + "learning_rate": 6.951772655743775e-06, + "loss": 0.9113, + "step": 18702 + }, + { + "epoch": 0.22, + "learning_rate": 6.947144311765251e-06, + "loss": 0.1173, + "step": 18704 + }, + { + "epoch": 0.22, + "learning_rate": 6.942515967786727e-06, + "loss": 1.4396, + "step": 18706 + }, + { + "epoch": 0.22, + "learning_rate": 6.937887623808202e-06, + "loss": 1.5741, + "step": 18708 + }, + { + "epoch": 0.22, + "learning_rate": 6.933259279829678e-06, + "loss": 0.0006, + "step": 18710 + }, + { + "epoch": 0.22, + "learning_rate": 6.928630935851153e-06, + "loss": 1.6112, + "step": 18712 + }, + { + "epoch": 0.22, + "learning_rate": 6.924002591872629e-06, + "loss": 0.1142, + "step": 18714 + }, + { + "epoch": 0.22, + "learning_rate": 6.919374247894103e-06, + "loss": 1.2048, + "step": 18716 + }, + { + "epoch": 0.22, + "learning_rate": 6.9147459039155785e-06, + "loss": 0.2917, + "step": 18718 + }, + { + "epoch": 0.22, + "learning_rate": 6.910117559937055e-06, + "loss": 0.0033, + "step": 18720 + }, + { + "epoch": 0.22, + "learning_rate": 6.90548921595853e-06, + "loss": 0.3018, + "step": 18722 + }, + { + "epoch": 0.22, + "learning_rate": 6.900860871980006e-06, + "loss": 0.0035, + "step": 18724 + }, + { + "epoch": 0.22, + "learning_rate": 6.896232528001481e-06, + "loss": 2.7534, + "step": 18726 + }, + { + "epoch": 0.22, + "learning_rate": 6.8916041840229564e-06, + "loss": 3.4132, + "step": 18728 + }, + { + "epoch": 0.22, + "learning_rate": 6.886975840044433e-06, + "loss": 3.0728, + "step": 18730 + }, + { + "epoch": 0.22, + "learning_rate": 6.882347496065908e-06, + "loss": 6.0762, + "step": 18732 + }, + { + "epoch": 0.22, + "learning_rate": 6.8777191520873836e-06, + "loss": 3.4295, + "step": 18734 + }, + { + "epoch": 0.22, + "learning_rate": 6.873090808108859e-06, + "loss": 0.0007, + "step": 18736 + }, + { + "epoch": 0.22, + "learning_rate": 6.868462464130334e-06, + "loss": 2.202, + "step": 18738 + }, + { + "epoch": 0.22, + "learning_rate": 6.863834120151811e-06, + "loss": 1.2101, + "step": 18740 + }, + { + "epoch": 0.22, + "learning_rate": 6.859205776173286e-06, + "loss": 1.9056, + "step": 18742 + }, + { + "epoch": 0.22, + "learning_rate": 6.8545774321947615e-06, + "loss": 3.2887, + "step": 18744 + }, + { + "epoch": 0.22, + "learning_rate": 6.849949088216237e-06, + "loss": 0.0013, + "step": 18746 + }, + { + "epoch": 0.22, + "learning_rate": 6.845320744237712e-06, + "loss": 0.0007, + "step": 18748 + }, + { + "epoch": 0.22, + "learning_rate": 6.840692400259189e-06, + "loss": 1.0945, + "step": 18750 + }, + { + "epoch": 0.22, + "learning_rate": 6.836064056280662e-06, + "loss": 3.9319, + "step": 18752 + }, + { + "epoch": 0.22, + "learning_rate": 6.831435712302138e-06, + "loss": 0.388, + "step": 18754 + }, + { + "epoch": 0.22, + "learning_rate": 6.826807368323613e-06, + "loss": 2.751, + "step": 18756 + }, + { + "epoch": 0.22, + "learning_rate": 6.8221790243450895e-06, + "loss": 3.6404, + "step": 18758 + }, + { + "epoch": 0.22, + "learning_rate": 6.817550680366565e-06, + "loss": 3.1234, + "step": 18760 + }, + { + "epoch": 0.22, + "learning_rate": 6.81292233638804e-06, + "loss": 0.1499, + "step": 18762 + }, + { + "epoch": 0.22, + "learning_rate": 6.808293992409516e-06, + "loss": 2.6571, + "step": 18764 + }, + { + "epoch": 0.22, + "learning_rate": 6.803665648430991e-06, + "loss": 0.0037, + "step": 18766 + }, + { + "epoch": 0.22, + "learning_rate": 6.7990373044524675e-06, + "loss": 1.5199, + "step": 18768 + }, + { + "epoch": 0.22, + "learning_rate": 6.794408960473943e-06, + "loss": 1.3351, + "step": 18770 + }, + { + "epoch": 0.22, + "learning_rate": 6.789780616495418e-06, + "loss": 0.0045, + "step": 18772 + }, + { + "epoch": 0.22, + "learning_rate": 6.785152272516894e-06, + "loss": 5.2442, + "step": 18774 + }, + { + "epoch": 0.22, + "learning_rate": 6.780523928538369e-06, + "loss": 3.7629, + "step": 18776 + }, + { + "epoch": 0.22, + "learning_rate": 6.7758955845598454e-06, + "loss": 1.7444, + "step": 18778 + }, + { + "epoch": 0.22, + "learning_rate": 6.771267240581321e-06, + "loss": 1.83, + "step": 18780 + }, + { + "epoch": 0.22, + "learning_rate": 6.766638896602796e-06, + "loss": 1.7647, + "step": 18782 + }, + { + "epoch": 0.22, + "learning_rate": 6.762010552624272e-06, + "loss": 1.1358, + "step": 18784 + }, + { + "epoch": 0.22, + "learning_rate": 6.757382208645747e-06, + "loss": 1.987, + "step": 18786 + }, + { + "epoch": 0.22, + "learning_rate": 6.752753864667222e-06, + "loss": 4.1836, + "step": 18788 + }, + { + "epoch": 0.22, + "learning_rate": 6.748125520688697e-06, + "loss": 0.3735, + "step": 18790 + }, + { + "epoch": 0.22, + "learning_rate": 6.7434971767101726e-06, + "loss": 0.0028, + "step": 18792 + }, + { + "epoch": 0.22, + "learning_rate": 6.738868832731649e-06, + "loss": 5.9772, + "step": 18794 + }, + { + "epoch": 0.22, + "learning_rate": 6.734240488753124e-06, + "loss": 1.2038, + "step": 18796 + }, + { + "epoch": 0.22, + "learning_rate": 6.7296121447746e-06, + "loss": 1.7382, + "step": 18798 + }, + { + "epoch": 0.22, + "learning_rate": 6.724983800796075e-06, + "loss": 0.5711, + "step": 18800 + }, + { + "epoch": 0.22, + "learning_rate": 6.7203554568175505e-06, + "loss": 0.0007, + "step": 18802 + }, + { + "epoch": 0.22, + "learning_rate": 6.715727112839027e-06, + "loss": 0.001, + "step": 18804 + }, + { + "epoch": 0.22, + "learning_rate": 6.711098768860502e-06, + "loss": 0.0005, + "step": 18806 + }, + { + "epoch": 0.22, + "learning_rate": 6.706470424881978e-06, + "loss": 1.502, + "step": 18808 + }, + { + "epoch": 0.22, + "learning_rate": 6.701842080903453e-06, + "loss": 0.109, + "step": 18810 + }, + { + "epoch": 0.22, + "learning_rate": 6.6972137369249285e-06, + "loss": 0.458, + "step": 18812 + }, + { + "epoch": 0.22, + "learning_rate": 6.692585392946405e-06, + "loss": 2.0564, + "step": 18814 + }, + { + "epoch": 0.22, + "learning_rate": 6.68795704896788e-06, + "loss": 4.7744, + "step": 18816 + }, + { + "epoch": 0.22, + "learning_rate": 6.683328704989356e-06, + "loss": 7.8842, + "step": 18818 + }, + { + "epoch": 0.22, + "learning_rate": 6.678700361010831e-06, + "loss": 2.5351, + "step": 18820 + }, + { + "epoch": 0.22, + "learning_rate": 6.6740720170323065e-06, + "loss": 1.3294, + "step": 18822 + }, + { + "epoch": 0.22, + "learning_rate": 6.669443673053781e-06, + "loss": 0.7107, + "step": 18824 + }, + { + "epoch": 0.22, + "learning_rate": 6.6648153290752565e-06, + "loss": 1.5544, + "step": 18826 + }, + { + "epoch": 0.22, + "learning_rate": 6.660186985096732e-06, + "loss": 0.0011, + "step": 18828 + }, + { + "epoch": 0.22, + "learning_rate": 6.655558641118208e-06, + "loss": 0.004, + "step": 18830 + }, + { + "epoch": 0.22, + "learning_rate": 6.650930297139684e-06, + "loss": 0.7693, + "step": 18832 + }, + { + "epoch": 0.22, + "learning_rate": 6.646301953161159e-06, + "loss": 0.0158, + "step": 18834 + }, + { + "epoch": 0.22, + "learning_rate": 6.6416736091826344e-06, + "loss": 0.0036, + "step": 18836 + }, + { + "epoch": 0.22, + "learning_rate": 6.63704526520411e-06, + "loss": 3.7194, + "step": 18838 + }, + { + "epoch": 0.22, + "learning_rate": 6.632416921225586e-06, + "loss": 1.1555, + "step": 18840 + }, + { + "epoch": 0.22, + "learning_rate": 6.6277885772470616e-06, + "loss": 0.0229, + "step": 18842 + }, + { + "epoch": 0.22, + "learning_rate": 6.623160233268537e-06, + "loss": 4.3888, + "step": 18844 + }, + { + "epoch": 0.22, + "learning_rate": 6.618531889290012e-06, + "loss": 2.7807, + "step": 18846 + }, + { + "epoch": 0.22, + "learning_rate": 6.613903545311488e-06, + "loss": 0.3388, + "step": 18848 + }, + { + "epoch": 0.22, + "learning_rate": 6.609275201332963e-06, + "loss": 0.1767, + "step": 18850 + }, + { + "epoch": 0.22, + "learning_rate": 6.6046468573544395e-06, + "loss": 5.0098, + "step": 18852 + }, + { + "epoch": 0.22, + "learning_rate": 6.600018513375915e-06, + "loss": 4.879, + "step": 18854 + }, + { + "epoch": 0.22, + "learning_rate": 6.59539016939739e-06, + "loss": 2.4376, + "step": 18856 + }, + { + "epoch": 0.22, + "learning_rate": 6.590761825418866e-06, + "loss": 1.2702, + "step": 18858 + }, + { + "epoch": 0.22, + "learning_rate": 6.586133481440341e-06, + "loss": 0.2083, + "step": 18860 + }, + { + "epoch": 0.22, + "learning_rate": 6.581505137461816e-06, + "loss": 1.4088, + "step": 18862 + }, + { + "epoch": 0.22, + "learning_rate": 6.576876793483291e-06, + "loss": 0.2145, + "step": 18864 + }, + { + "epoch": 0.22, + "learning_rate": 6.572248449504767e-06, + "loss": 1.6796, + "step": 18866 + }, + { + "epoch": 0.22, + "learning_rate": 6.567620105526243e-06, + "loss": 0.7068, + "step": 18868 + }, + { + "epoch": 0.22, + "learning_rate": 6.562991761547718e-06, + "loss": 0.6942, + "step": 18870 + }, + { + "epoch": 0.22, + "learning_rate": 6.558363417569194e-06, + "loss": 8.3282, + "step": 18872 + }, + { + "epoch": 0.22, + "learning_rate": 6.553735073590669e-06, + "loss": 3.7797, + "step": 18874 + }, + { + "epoch": 0.22, + "learning_rate": 6.549106729612145e-06, + "loss": 0.4063, + "step": 18876 + }, + { + "epoch": 0.22, + "learning_rate": 6.544478385633621e-06, + "loss": 2.3526, + "step": 18878 + }, + { + "epoch": 0.22, + "learning_rate": 6.539850041655096e-06, + "loss": 3.184, + "step": 18880 + }, + { + "epoch": 0.22, + "learning_rate": 6.535221697676572e-06, + "loss": 1.9826, + "step": 18882 + }, + { + "epoch": 0.22, + "learning_rate": 6.530593353698047e-06, + "loss": 0.1491, + "step": 18884 + }, + { + "epoch": 0.22, + "learning_rate": 6.525965009719523e-06, + "loss": 0.3826, + "step": 18886 + }, + { + "epoch": 0.22, + "learning_rate": 6.521336665740999e-06, + "loss": 1.8339, + "step": 18888 + }, + { + "epoch": 0.22, + "learning_rate": 6.516708321762474e-06, + "loss": 0.0009, + "step": 18890 + }, + { + "epoch": 0.22, + "learning_rate": 6.51207997778395e-06, + "loss": 4.9544, + "step": 18892 + }, + { + "epoch": 0.22, + "learning_rate": 6.507451633805425e-06, + "loss": 0.1184, + "step": 18894 + }, + { + "epoch": 0.22, + "learning_rate": 6.5028232898269006e-06, + "loss": 2.992, + "step": 18896 + }, + { + "epoch": 0.22, + "learning_rate": 6.498194945848375e-06, + "loss": 4.9246, + "step": 18898 + }, + { + "epoch": 0.22, + "learning_rate": 6.4935666018698506e-06, + "loss": 0.4826, + "step": 18900 + }, + { + "epoch": 0.22, + "learning_rate": 6.488938257891326e-06, + "loss": 0.0019, + "step": 18902 + }, + { + "epoch": 0.22, + "learning_rate": 6.484309913912802e-06, + "loss": 7.2973, + "step": 18904 + }, + { + "epoch": 0.22, + "learning_rate": 6.479681569934278e-06, + "loss": 1.6394, + "step": 18906 + }, + { + "epoch": 0.22, + "learning_rate": 6.475053225955753e-06, + "loss": 2.891, + "step": 18908 + }, + { + "epoch": 0.22, + "learning_rate": 6.4704248819772285e-06, + "loss": 0.195, + "step": 18910 + }, + { + "epoch": 0.22, + "learning_rate": 6.465796537998704e-06, + "loss": 1.1159, + "step": 18912 + }, + { + "epoch": 0.22, + "learning_rate": 6.46116819402018e-06, + "loss": 1.8886, + "step": 18914 + }, + { + "epoch": 0.22, + "learning_rate": 6.456539850041656e-06, + "loss": 0.0477, + "step": 18916 + }, + { + "epoch": 0.22, + "learning_rate": 6.451911506063131e-06, + "loss": 0.6792, + "step": 18918 + }, + { + "epoch": 0.22, + "learning_rate": 6.4472831620846065e-06, + "loss": 2.2394, + "step": 18920 + }, + { + "epoch": 0.22, + "learning_rate": 6.442654818106082e-06, + "loss": 0.0011, + "step": 18922 + }, + { + "epoch": 0.22, + "learning_rate": 6.438026474127558e-06, + "loss": 3.6483, + "step": 18924 + }, + { + "epoch": 0.22, + "learning_rate": 6.433398130149034e-06, + "loss": 0.0014, + "step": 18926 + }, + { + "epoch": 0.22, + "learning_rate": 6.428769786170509e-06, + "loss": 2.8933, + "step": 18928 + }, + { + "epoch": 0.22, + "learning_rate": 6.4241414421919845e-06, + "loss": 3.9278, + "step": 18930 + }, + { + "epoch": 0.22, + "learning_rate": 6.41951309821346e-06, + "loss": 0.0282, + "step": 18932 + }, + { + "epoch": 0.22, + "learning_rate": 6.4148847542349345e-06, + "loss": 0.0015, + "step": 18934 + }, + { + "epoch": 0.22, + "learning_rate": 6.41025641025641e-06, + "loss": 0.0077, + "step": 18936 + }, + { + "epoch": 0.22, + "learning_rate": 6.405628066277885e-06, + "loss": 0.0031, + "step": 18938 + }, + { + "epoch": 0.22, + "learning_rate": 6.400999722299361e-06, + "loss": 2.6474, + "step": 18940 + }, + { + "epoch": 0.22, + "learning_rate": 6.396371378320837e-06, + "loss": 0.0033, + "step": 18942 + }, + { + "epoch": 0.22, + "learning_rate": 6.3917430343423125e-06, + "loss": 1.3528, + "step": 18944 + }, + { + "epoch": 0.22, + "learning_rate": 6.387114690363788e-06, + "loss": 1.7129, + "step": 18946 + }, + { + "epoch": 0.22, + "learning_rate": 6.382486346385263e-06, + "loss": 1.3138, + "step": 18948 + }, + { + "epoch": 0.22, + "learning_rate": 6.377858002406739e-06, + "loss": 1.4547, + "step": 18950 + }, + { + "epoch": 0.22, + "learning_rate": 6.373229658428215e-06, + "loss": 1.5437, + "step": 18952 + }, + { + "epoch": 0.22, + "learning_rate": 6.3686013144496904e-06, + "loss": 0.6125, + "step": 18954 + }, + { + "epoch": 0.22, + "learning_rate": 6.363972970471166e-06, + "loss": 0.7609, + "step": 18956 + }, + { + "epoch": 0.22, + "learning_rate": 6.359344626492641e-06, + "loss": 1.2146, + "step": 18958 + }, + { + "epoch": 0.22, + "learning_rate": 6.354716282514117e-06, + "loss": 2.8508, + "step": 18960 + }, + { + "epoch": 0.22, + "learning_rate": 6.350087938535593e-06, + "loss": 1.7008, + "step": 18962 + }, + { + "epoch": 0.22, + "learning_rate": 6.345459594557068e-06, + "loss": 0.3529, + "step": 18964 + }, + { + "epoch": 0.22, + "learning_rate": 6.340831250578544e-06, + "loss": 1.0015, + "step": 18966 + }, + { + "epoch": 0.22, + "learning_rate": 6.336202906600019e-06, + "loss": 0.0032, + "step": 18968 + }, + { + "epoch": 0.22, + "learning_rate": 6.331574562621494e-06, + "loss": 1.5365, + "step": 18970 + }, + { + "epoch": 0.22, + "learning_rate": 6.326946218642969e-06, + "loss": 0.6966, + "step": 18972 + }, + { + "epoch": 0.22, + "learning_rate": 6.322317874664445e-06, + "loss": 0.5463, + "step": 18974 + }, + { + "epoch": 0.22, + "learning_rate": 6.31768953068592e-06, + "loss": 2.3454, + "step": 18976 + }, + { + "epoch": 0.22, + "learning_rate": 6.313061186707396e-06, + "loss": 1.1663, + "step": 18978 + }, + { + "epoch": 0.22, + "learning_rate": 6.308432842728872e-06, + "loss": 0.1678, + "step": 18980 + }, + { + "epoch": 0.22, + "learning_rate": 6.303804498750347e-06, + "loss": 0.0009, + "step": 18982 + }, + { + "epoch": 0.22, + "learning_rate": 6.299176154771823e-06, + "loss": 1.4845, + "step": 18984 + }, + { + "epoch": 0.22, + "learning_rate": 6.294547810793298e-06, + "loss": 0.7664, + "step": 18986 + }, + { + "epoch": 0.22, + "learning_rate": 6.289919466814774e-06, + "loss": 1.2494, + "step": 18988 + }, + { + "epoch": 0.22, + "learning_rate": 6.28529112283625e-06, + "loss": 0.5851, + "step": 18990 + }, + { + "epoch": 0.22, + "learning_rate": 6.280662778857725e-06, + "loss": 0.0007, + "step": 18992 + }, + { + "epoch": 0.22, + "learning_rate": 6.276034434879201e-06, + "loss": 1.1431, + "step": 18994 + }, + { + "epoch": 0.22, + "learning_rate": 6.271406090900676e-06, + "loss": 1.4807, + "step": 18996 + }, + { + "epoch": 0.22, + "learning_rate": 6.266777746922152e-06, + "loss": 2.4738, + "step": 18998 + }, + { + "epoch": 0.22, + "learning_rate": 6.262149402943628e-06, + "loss": 2.6152, + "step": 19000 + }, + { + "epoch": 0.22, + "learning_rate": 6.257521058965103e-06, + "loss": 0.127, + "step": 19002 + }, + { + "epoch": 0.22, + "learning_rate": 6.252892714986579e-06, + "loss": 2.0493, + "step": 19004 + }, + { + "epoch": 0.22, + "learning_rate": 6.248264371008054e-06, + "loss": 0.0022, + "step": 19006 + }, + { + "epoch": 0.22, + "learning_rate": 6.2436360270295294e-06, + "loss": 2.2907, + "step": 19008 + }, + { + "epoch": 0.22, + "learning_rate": 6.239007683051005e-06, + "loss": 0.7032, + "step": 19010 + }, + { + "epoch": 0.22, + "learning_rate": 6.23437933907248e-06, + "loss": 0.0166, + "step": 19012 + }, + { + "epoch": 0.22, + "learning_rate": 6.229750995093956e-06, + "loss": 0.7407, + "step": 19014 + }, + { + "epoch": 0.22, + "learning_rate": 6.225122651115431e-06, + "loss": 1.8433, + "step": 19016 + }, + { + "epoch": 0.22, + "learning_rate": 6.2204943071369066e-06, + "loss": 4.0245, + "step": 19018 + }, + { + "epoch": 0.22, + "learning_rate": 6.215865963158382e-06, + "loss": 1.7886, + "step": 19020 + }, + { + "epoch": 0.22, + "learning_rate": 6.211237619179857e-06, + "loss": 3.7121, + "step": 19022 + }, + { + "epoch": 0.22, + "learning_rate": 6.206609275201334e-06, + "loss": 3.7429, + "step": 19024 + }, + { + "epoch": 0.22, + "learning_rate": 6.201980931222809e-06, + "loss": 4.7632, + "step": 19026 + }, + { + "epoch": 0.22, + "learning_rate": 6.1973525872442845e-06, + "loss": 0.0037, + "step": 19028 + }, + { + "epoch": 0.22, + "learning_rate": 6.19272424326576e-06, + "loss": 0.0035, + "step": 19030 + }, + { + "epoch": 0.22, + "learning_rate": 6.188095899287235e-06, + "loss": 2.5502, + "step": 19032 + }, + { + "epoch": 0.22, + "learning_rate": 6.183467555308711e-06, + "loss": 0.3566, + "step": 19034 + }, + { + "epoch": 0.22, + "learning_rate": 6.178839211330186e-06, + "loss": 0.6519, + "step": 19036 + }, + { + "epoch": 0.22, + "learning_rate": 6.174210867351662e-06, + "loss": 0.7912, + "step": 19038 + }, + { + "epoch": 0.22, + "learning_rate": 6.169582523373137e-06, + "loss": 7.0106, + "step": 19040 + }, + { + "epoch": 0.22, + "learning_rate": 6.164954179394613e-06, + "loss": 1.8813, + "step": 19042 + }, + { + "epoch": 0.22, + "learning_rate": 6.160325835416089e-06, + "loss": 3.9019, + "step": 19044 + }, + { + "epoch": 0.22, + "learning_rate": 6.155697491437564e-06, + "loss": 4.254, + "step": 19046 + }, + { + "epoch": 0.22, + "learning_rate": 6.15106914745904e-06, + "loss": 0.0005, + "step": 19048 + }, + { + "epoch": 0.22, + "learning_rate": 6.146440803480515e-06, + "loss": 0.6391, + "step": 19050 + }, + { + "epoch": 0.22, + "learning_rate": 6.1418124595019905e-06, + "loss": 1.0627, + "step": 19052 + }, + { + "epoch": 0.22, + "learning_rate": 6.137184115523466e-06, + "loss": 0.6827, + "step": 19054 + }, + { + "epoch": 0.22, + "learning_rate": 6.132555771544941e-06, + "loss": 1.2934, + "step": 19056 + }, + { + "epoch": 0.22, + "learning_rate": 6.127927427566417e-06, + "loss": 0.0024, + "step": 19058 + }, + { + "epoch": 0.22, + "learning_rate": 6.123299083587892e-06, + "loss": 1.5505, + "step": 19060 + }, + { + "epoch": 0.22, + "learning_rate": 6.1186707396093684e-06, + "loss": 0.5083, + "step": 19062 + }, + { + "epoch": 0.22, + "learning_rate": 6.114042395630844e-06, + "loss": 0.2192, + "step": 19064 + }, + { + "epoch": 0.22, + "learning_rate": 6.109414051652319e-06, + "loss": 0.0181, + "step": 19066 + }, + { + "epoch": 0.22, + "learning_rate": 6.104785707673795e-06, + "loss": 1.0363, + "step": 19068 + }, + { + "epoch": 0.22, + "learning_rate": 6.10015736369527e-06, + "loss": 1.0302, + "step": 19070 + }, + { + "epoch": 0.22, + "learning_rate": 6.0955290197167456e-06, + "loss": 0.0192, + "step": 19072 + }, + { + "epoch": 0.22, + "learning_rate": 6.090900675738221e-06, + "loss": 1.0003, + "step": 19074 + }, + { + "epoch": 0.22, + "learning_rate": 6.086272331759696e-06, + "loss": 0.5009, + "step": 19076 + }, + { + "epoch": 0.22, + "learning_rate": 6.081643987781172e-06, + "loss": 1.8941, + "step": 19078 + }, + { + "epoch": 0.22, + "learning_rate": 6.077015643802648e-06, + "loss": 0.0005, + "step": 19080 + }, + { + "epoch": 0.22, + "learning_rate": 6.0723872998241235e-06, + "loss": 0.2932, + "step": 19082 + }, + { + "epoch": 0.22, + "learning_rate": 6.067758955845599e-06, + "loss": 0.1687, + "step": 19084 + }, + { + "epoch": 0.22, + "learning_rate": 6.063130611867074e-06, + "loss": 3.025, + "step": 19086 + }, + { + "epoch": 0.22, + "learning_rate": 6.05850226788855e-06, + "loss": 1.6565, + "step": 19088 + }, + { + "epoch": 0.22, + "learning_rate": 6.053873923910025e-06, + "loss": 0.001, + "step": 19090 + }, + { + "epoch": 0.22, + "learning_rate": 6.049245579931501e-06, + "loss": 4.5432, + "step": 19092 + }, + { + "epoch": 0.22, + "learning_rate": 6.044617235952976e-06, + "loss": 0.5734, + "step": 19094 + }, + { + "epoch": 0.22, + "learning_rate": 6.0399888919744515e-06, + "loss": 3.3936, + "step": 19096 + }, + { + "epoch": 0.22, + "learning_rate": 6.035360547995928e-06, + "loss": 1.1206, + "step": 19098 + }, + { + "epoch": 0.22, + "learning_rate": 6.030732204017403e-06, + "loss": 2.2224, + "step": 19100 + }, + { + "epoch": 0.22, + "learning_rate": 6.026103860038879e-06, + "loss": 1.0426, + "step": 19102 + }, + { + "epoch": 0.22, + "learning_rate": 6.021475516060354e-06, + "loss": 4.089, + "step": 19104 + }, + { + "epoch": 0.22, + "learning_rate": 6.0168471720818295e-06, + "loss": 0.0153, + "step": 19106 + }, + { + "epoch": 0.22, + "learning_rate": 6.012218828103305e-06, + "loss": 3.1542, + "step": 19108 + }, + { + "epoch": 0.22, + "learning_rate": 6.00759048412478e-06, + "loss": 0.0003, + "step": 19110 + }, + { + "epoch": 0.22, + "learning_rate": 6.002962140146256e-06, + "loss": 0.0008, + "step": 19112 + }, + { + "epoch": 0.22, + "learning_rate": 5.998333796167731e-06, + "loss": 1.9317, + "step": 19114 + }, + { + "epoch": 0.22, + "learning_rate": 5.9937054521892074e-06, + "loss": 3.1706, + "step": 19116 + }, + { + "epoch": 0.22, + "learning_rate": 5.989077108210683e-06, + "loss": 2.2755, + "step": 19118 + }, + { + "epoch": 0.22, + "learning_rate": 5.984448764232158e-06, + "loss": 1.7302, + "step": 19120 + }, + { + "epoch": 0.22, + "learning_rate": 5.979820420253634e-06, + "loss": 2.4883, + "step": 19122 + }, + { + "epoch": 0.22, + "learning_rate": 5.975192076275109e-06, + "loss": 1.9437, + "step": 19124 + }, + { + "epoch": 0.22, + "learning_rate": 5.9705637322965846e-06, + "loss": 1.5467, + "step": 19126 + }, + { + "epoch": 0.22, + "learning_rate": 5.96593538831806e-06, + "loss": 0.9198, + "step": 19128 + }, + { + "epoch": 0.22, + "learning_rate": 5.961307044339535e-06, + "loss": 4.4292, + "step": 19130 + }, + { + "epoch": 0.22, + "learning_rate": 5.956678700361011e-06, + "loss": 3.7091, + "step": 19132 + }, + { + "epoch": 0.22, + "learning_rate": 5.952050356382487e-06, + "loss": 0.0082, + "step": 19134 + }, + { + "epoch": 0.22, + "learning_rate": 5.9474220124039625e-06, + "loss": 2.4708, + "step": 19136 + }, + { + "epoch": 0.22, + "learning_rate": 5.942793668425438e-06, + "loss": 1.3665, + "step": 19138 + }, + { + "epoch": 0.22, + "learning_rate": 5.938165324446913e-06, + "loss": 4.0494, + "step": 19140 + }, + { + "epoch": 0.22, + "learning_rate": 5.933536980468388e-06, + "loss": 0.1515, + "step": 19142 + }, + { + "epoch": 0.22, + "learning_rate": 5.928908636489864e-06, + "loss": 0.0077, + "step": 19144 + }, + { + "epoch": 0.22, + "learning_rate": 5.92428029251134e-06, + "loss": 2.1912, + "step": 19146 + }, + { + "epoch": 0.22, + "learning_rate": 5.919651948532815e-06, + "loss": 0.4103, + "step": 19148 + }, + { + "epoch": 0.22, + "learning_rate": 5.9150236045542905e-06, + "loss": 0.4467, + "step": 19150 + }, + { + "epoch": 0.22, + "learning_rate": 5.910395260575766e-06, + "loss": 1.2256, + "step": 19152 + }, + { + "epoch": 0.22, + "learning_rate": 5.905766916597242e-06, + "loss": 0.0327, + "step": 19154 + }, + { + "epoch": 0.22, + "learning_rate": 5.901138572618718e-06, + "loss": 3.3413, + "step": 19156 + }, + { + "epoch": 0.22, + "learning_rate": 5.896510228640193e-06, + "loss": 0.0011, + "step": 19158 + }, + { + "epoch": 0.22, + "learning_rate": 5.891881884661668e-06, + "loss": 1.0851, + "step": 19160 + }, + { + "epoch": 0.22, + "learning_rate": 5.887253540683144e-06, + "loss": 0.9687, + "step": 19162 + }, + { + "epoch": 0.22, + "learning_rate": 5.882625196704619e-06, + "loss": 1.4474, + "step": 19164 + }, + { + "epoch": 0.22, + "learning_rate": 5.877996852726095e-06, + "loss": 0.0063, + "step": 19166 + }, + { + "epoch": 0.22, + "learning_rate": 5.87336850874757e-06, + "loss": 1.552, + "step": 19168 + }, + { + "epoch": 0.22, + "learning_rate": 5.868740164769046e-06, + "loss": 0.2935, + "step": 19170 + }, + { + "epoch": 0.22, + "learning_rate": 5.864111820790522e-06, + "loss": 3.1405, + "step": 19172 + }, + { + "epoch": 0.22, + "learning_rate": 5.859483476811997e-06, + "loss": 1.8126, + "step": 19174 + }, + { + "epoch": 0.22, + "learning_rate": 5.854855132833473e-06, + "loss": 0.0015, + "step": 19176 + }, + { + "epoch": 0.22, + "learning_rate": 5.850226788854947e-06, + "loss": 3.2565, + "step": 19178 + }, + { + "epoch": 0.22, + "learning_rate": 5.8455984448764236e-06, + "loss": 0.342, + "step": 19180 + }, + { + "epoch": 0.22, + "learning_rate": 5.840970100897899e-06, + "loss": 0.0185, + "step": 19182 + }, + { + "epoch": 0.22, + "learning_rate": 5.836341756919374e-06, + "loss": 3.5792, + "step": 19184 + }, + { + "epoch": 0.22, + "learning_rate": 5.83171341294085e-06, + "loss": 5.2717, + "step": 19186 + }, + { + "epoch": 0.22, + "learning_rate": 5.827085068962325e-06, + "loss": 0.6778, + "step": 19188 + }, + { + "epoch": 0.22, + "learning_rate": 5.8224567249838015e-06, + "loss": 0.7874, + "step": 19190 + }, + { + "epoch": 0.22, + "learning_rate": 5.817828381005277e-06, + "loss": 0.0111, + "step": 19192 + }, + { + "epoch": 0.22, + "learning_rate": 5.813200037026752e-06, + "loss": 0.483, + "step": 19194 + }, + { + "epoch": 0.22, + "learning_rate": 5.808571693048227e-06, + "loss": 3.5929, + "step": 19196 + }, + { + "epoch": 0.22, + "learning_rate": 5.803943349069703e-06, + "loss": 0.0006, + "step": 19198 + }, + { + "epoch": 0.22, + "learning_rate": 5.799315005091179e-06, + "loss": 0.2276, + "step": 19200 + }, + { + "epoch": 0.22, + "learning_rate": 5.794686661112654e-06, + "loss": 0.0815, + "step": 19202 + }, + { + "epoch": 0.22, + "learning_rate": 5.7900583171341295e-06, + "loss": 0.4711, + "step": 19204 + }, + { + "epoch": 0.22, + "learning_rate": 5.785429973155605e-06, + "loss": 0.9405, + "step": 19206 + }, + { + "epoch": 0.22, + "learning_rate": 5.780801629177081e-06, + "loss": 4.6065, + "step": 19208 + }, + { + "epoch": 0.22, + "learning_rate": 5.776173285198557e-06, + "loss": 9.458, + "step": 19210 + }, + { + "epoch": 0.22, + "learning_rate": 5.771544941220032e-06, + "loss": 9.4888, + "step": 19212 + }, + { + "epoch": 0.22, + "learning_rate": 5.766916597241507e-06, + "loss": 1.5839, + "step": 19214 + }, + { + "epoch": 0.22, + "learning_rate": 5.762288253262983e-06, + "loss": 4.6053, + "step": 19216 + }, + { + "epoch": 0.22, + "learning_rate": 5.757659909284458e-06, + "loss": 0.2925, + "step": 19218 + }, + { + "epoch": 0.22, + "learning_rate": 5.753031565305934e-06, + "loss": 0.2319, + "step": 19220 + }, + { + "epoch": 0.22, + "learning_rate": 5.748403221327409e-06, + "loss": 0.0886, + "step": 19222 + }, + { + "epoch": 0.22, + "learning_rate": 5.743774877348885e-06, + "loss": 1.1154, + "step": 19224 + }, + { + "epoch": 0.22, + "learning_rate": 5.739146533370361e-06, + "loss": 1.3807, + "step": 19226 + }, + { + "epoch": 0.22, + "learning_rate": 5.734518189391836e-06, + "loss": 0.6781, + "step": 19228 + }, + { + "epoch": 0.22, + "learning_rate": 5.729889845413312e-06, + "loss": 1.3797, + "step": 19230 + }, + { + "epoch": 0.22, + "learning_rate": 5.725261501434786e-06, + "loss": 0.2657, + "step": 19232 + }, + { + "epoch": 0.22, + "learning_rate": 5.7206331574562626e-06, + "loss": 0.0068, + "step": 19234 + }, + { + "epoch": 0.22, + "learning_rate": 5.716004813477738e-06, + "loss": 0.4468, + "step": 19236 + }, + { + "epoch": 0.22, + "learning_rate": 5.711376469499213e-06, + "loss": 1.6642, + "step": 19238 + }, + { + "epoch": 0.22, + "learning_rate": 5.706748125520689e-06, + "loss": 0.0024, + "step": 19240 + }, + { + "epoch": 0.22, + "learning_rate": 5.702119781542164e-06, + "loss": 1.6819, + "step": 19242 + }, + { + "epoch": 0.22, + "learning_rate": 5.6974914375636405e-06, + "loss": 4.1821, + "step": 19244 + }, + { + "epoch": 0.22, + "learning_rate": 5.692863093585116e-06, + "loss": 2.0712, + "step": 19246 + }, + { + "epoch": 0.22, + "learning_rate": 5.688234749606591e-06, + "loss": 2.4685, + "step": 19248 + }, + { + "epoch": 0.22, + "learning_rate": 5.683606405628066e-06, + "loss": 0.0416, + "step": 19250 + }, + { + "epoch": 0.22, + "learning_rate": 5.678978061649541e-06, + "loss": 0.2607, + "step": 19252 + }, + { + "epoch": 0.22, + "learning_rate": 5.674349717671018e-06, + "loss": 1.4553, + "step": 19254 + }, + { + "epoch": 0.22, + "learning_rate": 5.669721373692493e-06, + "loss": 3.8406, + "step": 19256 + }, + { + "epoch": 0.22, + "learning_rate": 5.6650930297139685e-06, + "loss": 2.2556, + "step": 19258 + }, + { + "epoch": 0.22, + "learning_rate": 5.660464685735444e-06, + "loss": 4.7357, + "step": 19260 + }, + { + "epoch": 0.22, + "learning_rate": 5.655836341756919e-06, + "loss": 2.3174, + "step": 19262 + }, + { + "epoch": 0.22, + "learning_rate": 5.651207997778396e-06, + "loss": 0.0117, + "step": 19264 + }, + { + "epoch": 0.22, + "learning_rate": 5.646579653799871e-06, + "loss": 1.4404, + "step": 19266 + }, + { + "epoch": 0.22, + "learning_rate": 5.6419513098213465e-06, + "loss": 1.2218, + "step": 19268 + }, + { + "epoch": 0.22, + "learning_rate": 5.637322965842821e-06, + "loss": 0.0028, + "step": 19270 + }, + { + "epoch": 0.22, + "learning_rate": 5.632694621864297e-06, + "loss": 1.1307, + "step": 19272 + }, + { + "epoch": 0.22, + "learning_rate": 5.628066277885773e-06, + "loss": 1.0859, + "step": 19274 + }, + { + "epoch": 0.22, + "learning_rate": 5.623437933907248e-06, + "loss": 1.8234, + "step": 19276 + }, + { + "epoch": 0.22, + "learning_rate": 5.618809589928724e-06, + "loss": 0.0016, + "step": 19278 + }, + { + "epoch": 0.22, + "learning_rate": 5.614181245950199e-06, + "loss": 0.0516, + "step": 19280 + }, + { + "epoch": 0.22, + "learning_rate": 5.609552901971675e-06, + "loss": 1.628, + "step": 19282 + }, + { + "epoch": 0.22, + "learning_rate": 5.604924557993151e-06, + "loss": 4.9162, + "step": 19284 + }, + { + "epoch": 0.22, + "learning_rate": 5.600296214014626e-06, + "loss": 1.837, + "step": 19286 + }, + { + "epoch": 0.22, + "learning_rate": 5.595667870036101e-06, + "loss": 1.5987, + "step": 19288 + }, + { + "epoch": 0.22, + "learning_rate": 5.591039526057577e-06, + "loss": 1.7679, + "step": 19290 + }, + { + "epoch": 0.22, + "learning_rate": 5.5864111820790524e-06, + "loss": 0.0007, + "step": 19292 + }, + { + "epoch": 0.22, + "learning_rate": 5.581782838100528e-06, + "loss": 0.0141, + "step": 19294 + }, + { + "epoch": 0.22, + "learning_rate": 5.577154494122003e-06, + "loss": 0.8932, + "step": 19296 + }, + { + "epoch": 0.22, + "learning_rate": 5.572526150143479e-06, + "loss": 1.3604, + "step": 19298 + }, + { + "epoch": 0.22, + "learning_rate": 5.567897806164955e-06, + "loss": 0.8716, + "step": 19300 + }, + { + "epoch": 0.22, + "learning_rate": 5.56326946218643e-06, + "loss": 0.0008, + "step": 19302 + }, + { + "epoch": 0.22, + "learning_rate": 5.558641118207906e-06, + "loss": 1.8313, + "step": 19304 + }, + { + "epoch": 0.22, + "learning_rate": 5.55401277422938e-06, + "loss": 0.0031, + "step": 19306 + }, + { + "epoch": 0.22, + "learning_rate": 5.549384430250857e-06, + "loss": 1.8895, + "step": 19308 + }, + { + "epoch": 0.22, + "learning_rate": 5.544756086272332e-06, + "loss": 0.0029, + "step": 19310 + }, + { + "epoch": 0.22, + "learning_rate": 5.5401277422938075e-06, + "loss": 0.0004, + "step": 19312 + }, + { + "epoch": 0.22, + "learning_rate": 5.535499398315283e-06, + "loss": 0.1039, + "step": 19314 + }, + { + "epoch": 0.22, + "learning_rate": 5.530871054336758e-06, + "loss": 1.9743, + "step": 19316 + }, + { + "epoch": 0.22, + "learning_rate": 5.526242710358235e-06, + "loss": 1.329, + "step": 19318 + }, + { + "epoch": 0.22, + "learning_rate": 5.52161436637971e-06, + "loss": 0.0015, + "step": 19320 + }, + { + "epoch": 0.22, + "learning_rate": 5.5169860224011855e-06, + "loss": 1.1758, + "step": 19322 + }, + { + "epoch": 0.22, + "learning_rate": 5.51235767842266e-06, + "loss": 0.0099, + "step": 19324 + }, + { + "epoch": 0.22, + "learning_rate": 5.507729334444136e-06, + "loss": 1.1392, + "step": 19326 + }, + { + "epoch": 0.22, + "learning_rate": 5.503100990465612e-06, + "loss": 0.5247, + "step": 19328 + }, + { + "epoch": 0.22, + "learning_rate": 5.498472646487087e-06, + "loss": 1.6777, + "step": 19330 + }, + { + "epoch": 0.22, + "learning_rate": 5.493844302508563e-06, + "loss": 0.2181, + "step": 19332 + }, + { + "epoch": 0.22, + "learning_rate": 5.489215958530038e-06, + "loss": 0.4036, + "step": 19334 + }, + { + "epoch": 0.22, + "learning_rate": 5.484587614551514e-06, + "loss": 5.3025, + "step": 19336 + }, + { + "epoch": 0.22, + "learning_rate": 5.47995927057299e-06, + "loss": 3.5041, + "step": 19338 + }, + { + "epoch": 0.22, + "learning_rate": 5.475330926594465e-06, + "loss": 1.4783, + "step": 19340 + }, + { + "epoch": 0.22, + "learning_rate": 5.47070258261594e-06, + "loss": 5.237, + "step": 19342 + }, + { + "epoch": 0.22, + "learning_rate": 5.466074238637415e-06, + "loss": 1.0331, + "step": 19344 + }, + { + "epoch": 0.22, + "learning_rate": 5.4614458946588914e-06, + "loss": 1.8942, + "step": 19346 + }, + { + "epoch": 0.22, + "learning_rate": 5.456817550680367e-06, + "loss": 0.6359, + "step": 19348 + }, + { + "epoch": 0.22, + "learning_rate": 5.452189206701842e-06, + "loss": 0.8661, + "step": 19350 + }, + { + "epoch": 0.22, + "learning_rate": 5.447560862723318e-06, + "loss": 1.5262, + "step": 19352 + }, + { + "epoch": 0.22, + "learning_rate": 5.442932518744793e-06, + "loss": 0.0064, + "step": 19354 + }, + { + "epoch": 0.22, + "learning_rate": 5.438304174766269e-06, + "loss": 0.0146, + "step": 19356 + }, + { + "epoch": 0.22, + "learning_rate": 5.433675830787745e-06, + "loss": 2.0817, + "step": 19358 + }, + { + "epoch": 0.22, + "learning_rate": 5.429047486809219e-06, + "loss": 5.4409, + "step": 19360 + }, + { + "epoch": 0.22, + "learning_rate": 5.424419142830695e-06, + "loss": 3.9706, + "step": 19362 + }, + { + "epoch": 0.22, + "learning_rate": 5.419790798852171e-06, + "loss": 1.0827, + "step": 19364 + }, + { + "epoch": 0.22, + "learning_rate": 5.4151624548736465e-06, + "loss": 2.0929, + "step": 19366 + }, + { + "epoch": 0.22, + "learning_rate": 5.410534110895122e-06, + "loss": 1.2173, + "step": 19368 + }, + { + "epoch": 0.22, + "learning_rate": 5.405905766916597e-06, + "loss": 4.6172, + "step": 19370 + }, + { + "epoch": 0.22, + "learning_rate": 5.401277422938073e-06, + "loss": 0.0012, + "step": 19372 + }, + { + "epoch": 0.22, + "learning_rate": 5.396649078959549e-06, + "loss": 1.14, + "step": 19374 + }, + { + "epoch": 0.22, + "learning_rate": 5.3920207349810245e-06, + "loss": 5.6752, + "step": 19376 + }, + { + "epoch": 0.22, + "learning_rate": 5.387392391002499e-06, + "loss": 0.9607, + "step": 19378 + }, + { + "epoch": 0.22, + "learning_rate": 5.3827640470239745e-06, + "loss": 0.6249, + "step": 19380 + }, + { + "epoch": 0.22, + "learning_rate": 5.378135703045451e-06, + "loss": 0.0709, + "step": 19382 + }, + { + "epoch": 0.22, + "learning_rate": 5.373507359066926e-06, + "loss": 4.3825, + "step": 19384 + }, + { + "epoch": 0.22, + "learning_rate": 5.368879015088402e-06, + "loss": 0.1657, + "step": 19386 + }, + { + "epoch": 0.22, + "learning_rate": 5.364250671109877e-06, + "loss": 0.8832, + "step": 19388 + }, + { + "epoch": 0.22, + "learning_rate": 5.3596223271313525e-06, + "loss": 2.1735, + "step": 19390 + }, + { + "epoch": 0.22, + "learning_rate": 5.354993983152829e-06, + "loss": 1.4918, + "step": 19392 + }, + { + "epoch": 0.22, + "learning_rate": 5.350365639174304e-06, + "loss": 2.7286, + "step": 19394 + }, + { + "epoch": 0.22, + "learning_rate": 5.345737295195779e-06, + "loss": 0.716, + "step": 19396 + }, + { + "epoch": 0.22, + "learning_rate": 5.341108951217254e-06, + "loss": 0.0034, + "step": 19398 + }, + { + "epoch": 0.22, + "learning_rate": 5.3364806072387304e-06, + "loss": 0.0018, + "step": 19400 + }, + { + "epoch": 0.22, + "learning_rate": 5.331852263260206e-06, + "loss": 2.6572, + "step": 19402 + }, + { + "epoch": 0.22, + "learning_rate": 5.327223919281681e-06, + "loss": 0.302, + "step": 19404 + }, + { + "epoch": 0.22, + "learning_rate": 5.322595575303157e-06, + "loss": 0.5596, + "step": 19406 + }, + { + "epoch": 0.22, + "learning_rate": 5.317967231324632e-06, + "loss": 2.118, + "step": 19408 + }, + { + "epoch": 0.22, + "learning_rate": 5.313338887346108e-06, + "loss": 1.7039, + "step": 19410 + }, + { + "epoch": 0.22, + "learning_rate": 5.308710543367584e-06, + "loss": 0.259, + "step": 19412 + }, + { + "epoch": 0.22, + "learning_rate": 5.304082199389058e-06, + "loss": 0.359, + "step": 19414 + }, + { + "epoch": 0.22, + "learning_rate": 5.299453855410534e-06, + "loss": 2.8029, + "step": 19416 + }, + { + "epoch": 0.22, + "learning_rate": 5.29482551143201e-06, + "loss": 1.1135, + "step": 19418 + }, + { + "epoch": 0.22, + "learning_rate": 5.2901971674534855e-06, + "loss": 0.7646, + "step": 19420 + }, + { + "epoch": 0.22, + "learning_rate": 5.285568823474961e-06, + "loss": 0.2531, + "step": 19422 + }, + { + "epoch": 0.22, + "learning_rate": 5.280940479496436e-06, + "loss": 2.029, + "step": 19424 + }, + { + "epoch": 0.22, + "learning_rate": 5.276312135517912e-06, + "loss": 0.5372, + "step": 19426 + }, + { + "epoch": 0.22, + "learning_rate": 5.271683791539388e-06, + "loss": 2.4706, + "step": 19428 + }, + { + "epoch": 0.22, + "learning_rate": 5.2670554475608635e-06, + "loss": 6.8244, + "step": 19430 + }, + { + "epoch": 0.22, + "learning_rate": 5.262427103582338e-06, + "loss": 1.6946, + "step": 19432 + }, + { + "epoch": 0.22, + "learning_rate": 5.2577987596038135e-06, + "loss": 0.5327, + "step": 19434 + }, + { + "epoch": 0.22, + "learning_rate": 5.25317041562529e-06, + "loss": 6.9696, + "step": 19436 + }, + { + "epoch": 0.22, + "learning_rate": 5.248542071646765e-06, + "loss": 6.123, + "step": 19438 + }, + { + "epoch": 0.22, + "learning_rate": 5.243913727668241e-06, + "loss": 0.0011, + "step": 19440 + }, + { + "epoch": 0.22, + "learning_rate": 5.239285383689716e-06, + "loss": 0.0047, + "step": 19442 + }, + { + "epoch": 0.22, + "learning_rate": 5.2346570397111915e-06, + "loss": 1.0766, + "step": 19444 + }, + { + "epoch": 0.22, + "learning_rate": 5.230028695732668e-06, + "loss": 1.3952, + "step": 19446 + }, + { + "epoch": 0.22, + "learning_rate": 5.225400351754143e-06, + "loss": 0.5235, + "step": 19448 + }, + { + "epoch": 0.22, + "learning_rate": 5.220772007775618e-06, + "loss": 0.1963, + "step": 19450 + }, + { + "epoch": 0.22, + "learning_rate": 5.216143663797093e-06, + "loss": 3.9422, + "step": 19452 + }, + { + "epoch": 0.22, + "learning_rate": 5.211515319818569e-06, + "loss": 1.3211, + "step": 19454 + }, + { + "epoch": 0.22, + "learning_rate": 5.206886975840045e-06, + "loss": 3.315, + "step": 19456 + }, + { + "epoch": 0.22, + "learning_rate": 5.20225863186152e-06, + "loss": 3.8378, + "step": 19458 + }, + { + "epoch": 0.22, + "learning_rate": 5.197630287882996e-06, + "loss": 1.0021, + "step": 19460 + }, + { + "epoch": 0.22, + "learning_rate": 5.193001943904471e-06, + "loss": 1.9531, + "step": 19462 + }, + { + "epoch": 0.22, + "learning_rate": 5.1883735999259466e-06, + "loss": 3.4466, + "step": 19464 + }, + { + "epoch": 0.22, + "learning_rate": 5.183745255947423e-06, + "loss": 4.8198, + "step": 19466 + }, + { + "epoch": 0.22, + "learning_rate": 5.179116911968897e-06, + "loss": 0.7028, + "step": 19468 + }, + { + "epoch": 0.22, + "learning_rate": 5.174488567990373e-06, + "loss": 3.145, + "step": 19470 + }, + { + "epoch": 0.22, + "learning_rate": 5.169860224011848e-06, + "loss": 0.8533, + "step": 19472 + }, + { + "epoch": 0.22, + "learning_rate": 5.1652318800333245e-06, + "loss": 0.4271, + "step": 19474 + }, + { + "epoch": 0.22, + "learning_rate": 5.1606035360548e-06, + "loss": 1.2251, + "step": 19476 + }, + { + "epoch": 0.22, + "learning_rate": 5.155975192076275e-06, + "loss": 6.8359, + "step": 19478 + }, + { + "epoch": 0.22, + "learning_rate": 5.151346848097751e-06, + "loss": 0.941, + "step": 19480 + }, + { + "epoch": 0.22, + "learning_rate": 5.146718504119226e-06, + "loss": 1.8861, + "step": 19482 + }, + { + "epoch": 0.22, + "learning_rate": 5.1420901601407025e-06, + "loss": 0.9458, + "step": 19484 + }, + { + "epoch": 0.22, + "learning_rate": 5.137461816162177e-06, + "loss": 0.0973, + "step": 19486 + }, + { + "epoch": 0.22, + "learning_rate": 5.1328334721836525e-06, + "loss": 1.2241, + "step": 19488 + }, + { + "epoch": 0.22, + "learning_rate": 5.128205128205128e-06, + "loss": 0.9336, + "step": 19490 + }, + { + "epoch": 0.22, + "learning_rate": 5.123576784226604e-06, + "loss": 1.8695, + "step": 19492 + }, + { + "epoch": 0.22, + "learning_rate": 5.11894844024808e-06, + "loss": 0.0037, + "step": 19494 + }, + { + "epoch": 0.22, + "learning_rate": 5.114320096269555e-06, + "loss": 5.0138, + "step": 19496 + }, + { + "epoch": 0.22, + "learning_rate": 5.1096917522910305e-06, + "loss": 0.0034, + "step": 19498 + }, + { + "epoch": 0.22, + "learning_rate": 5.105063408312506e-06, + "loss": 1.4566, + "step": 19500 + } + ], + "max_steps": 21706, + "num_train_epochs": 1, + "total_flos": 762615419904000.0, + "trial_name": null, + "trial_params": null +}