{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "eval_steps": 500, "global_step": 2523, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 0.0, "loss": 0.6576, "step": 2 }, { "epoch": 0.0, "learning_rate": 0.0, "loss": 0.6592, "step": 4 }, { "epoch": 0.01, "learning_rate": 0.0, "loss": 0.6936, "step": 6 }, { "epoch": 0.01, "learning_rate": 0.0, "loss": 0.6209, "step": 8 }, { "epoch": 0.01, "learning_rate": 0.0, "loss": 0.7266, "step": 10 }, { "epoch": 0.01, "learning_rate": 0.0, "loss": 0.583, "step": 12 }, { "epoch": 0.02, "learning_rate": 0.0, "loss": 0.5872, "step": 14 }, { "epoch": 0.02, "learning_rate": 0.0, "loss": 0.6971, "step": 16 }, { "epoch": 0.02, "learning_rate": 0.0, "loss": 0.6075, "step": 18 }, { "epoch": 0.02, "learning_rate": 1.3333333333333334e-06, "loss": 0.6744, "step": 20 }, { "epoch": 0.03, "learning_rate": 2.666666666666667e-06, "loss": 0.5986, "step": 22 }, { "epoch": 0.03, "learning_rate": 4.000000000000001e-06, "loss": 0.6547, "step": 24 }, { "epoch": 0.03, "learning_rate": 5.333333333333334e-06, "loss": 0.5527, "step": 26 }, { "epoch": 0.03, "learning_rate": 6.666666666666667e-06, "loss": 0.4789, "step": 28 }, { "epoch": 0.04, "learning_rate": 8.000000000000001e-06, "loss": 0.5411, "step": 30 }, { "epoch": 0.04, "learning_rate": 9.333333333333334e-06, "loss": 0.5627, "step": 32 }, { "epoch": 0.04, "learning_rate": 1.0666666666666667e-05, "loss": 0.5674, "step": 34 }, { "epoch": 0.04, "learning_rate": 1.2e-05, "loss": 0.5058, "step": 36 }, { "epoch": 0.05, "learning_rate": 1.3333333333333333e-05, "loss": 0.5078, "step": 38 }, { "epoch": 0.05, "learning_rate": 1.4666666666666666e-05, "loss": 0.5258, "step": 40 }, { "epoch": 0.05, "learning_rate": 1.6000000000000003e-05, "loss": 0.5611, "step": 42 }, { "epoch": 0.05, "learning_rate": 1.7333333333333336e-05, "loss": 0.4715, "step": 44 }, { "epoch": 0.05, "learning_rate": 1.866666666666667e-05, "loss": 0.7397, "step": 46 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.5526, "step": 48 }, { "epoch": 0.06, "learning_rate": 1.999996823967381e-05, "loss": 0.5387, "step": 50 }, { "epoch": 0.06, "learning_rate": 1.9999872958896982e-05, "loss": 0.5412, "step": 52 }, { "epoch": 0.06, "learning_rate": 1.9999714158274743e-05, "loss": 0.487, "step": 54 }, { "epoch": 0.07, "learning_rate": 1.9999491838815805e-05, "loss": 0.5345, "step": 56 }, { "epoch": 0.07, "learning_rate": 1.999920600193236e-05, "loss": 0.5062, "step": 58 }, { "epoch": 0.07, "learning_rate": 1.9998856649440058e-05, "loss": 0.5077, "step": 60 }, { "epoch": 0.07, "learning_rate": 1.999844378355801e-05, "loss": 0.5029, "step": 62 }, { "epoch": 0.08, "learning_rate": 1.999796740690877e-05, "loss": 0.5374, "step": 64 }, { "epoch": 0.08, "learning_rate": 1.9997427522518315e-05, "loss": 0.5321, "step": 66 }, { "epoch": 0.08, "learning_rate": 1.999682413381602e-05, "loss": 0.4695, "step": 68 }, { "epoch": 0.08, "learning_rate": 1.9996157244634647e-05, "loss": 0.4798, "step": 70 }, { "epoch": 0.09, "learning_rate": 1.9995799988672346e-05, "loss": 0.4682, "step": 72 }, { "epoch": 0.09, "learning_rate": 1.999503785684114e-05, "loss": 0.5138, "step": 74 }, { "epoch": 0.09, "learning_rate": 1.9994212235877407e-05, "loss": 0.5271, "step": 76 }, { "epoch": 0.09, "learning_rate": 1.999332313102555e-05, "loss": 0.5526, "step": 78 }, { "epoch": 0.1, "learning_rate": 1.999237054793322e-05, "loss": 0.5385, "step": 80 }, { "epoch": 0.1, "learning_rate": 1.9991354492651283e-05, "loss": 0.5259, "step": 82 }, { "epoch": 0.1, "learning_rate": 1.9990274971633787e-05, "loss": 0.5054, "step": 84 }, { "epoch": 0.1, "learning_rate": 1.9989131991737928e-05, "loss": 0.4698, "step": 86 }, { "epoch": 0.1, "learning_rate": 1.998792556022398e-05, "loss": 0.4909, "step": 88 }, { "epoch": 0.11, "learning_rate": 1.998665568475528e-05, "loss": 0.462, "step": 90 }, { "epoch": 0.11, "learning_rate": 1.998532237339816e-05, "loss": 0.4898, "step": 92 }, { "epoch": 0.11, "learning_rate": 1.9983925634621894e-05, "loss": 0.4771, "step": 94 }, { "epoch": 0.11, "learning_rate": 1.998246547729867e-05, "loss": 0.4774, "step": 96 }, { "epoch": 0.12, "learning_rate": 1.998094191070349e-05, "loss": 0.421, "step": 98 }, { "epoch": 0.12, "learning_rate": 1.997935494451416e-05, "loss": 0.4991, "step": 100 }, { "epoch": 0.12, "learning_rate": 1.9977704588811183e-05, "loss": 0.5349, "step": 102 }, { "epoch": 0.12, "learning_rate": 1.9975990854077733e-05, "loss": 0.4717, "step": 104 }, { "epoch": 0.13, "learning_rate": 1.9974213751199556e-05, "loss": 0.5024, "step": 106 }, { "epoch": 0.13, "learning_rate": 1.9972373291464933e-05, "loss": 0.5904, "step": 108 }, { "epoch": 0.13, "learning_rate": 1.9970469486564585e-05, "loss": 0.4817, "step": 110 }, { "epoch": 0.13, "learning_rate": 1.99685023485916e-05, "loss": 0.488, "step": 112 }, { "epoch": 0.14, "learning_rate": 1.9966471890041374e-05, "loss": 0.5262, "step": 114 }, { "epoch": 0.14, "learning_rate": 1.9964378123811502e-05, "loss": 0.4721, "step": 116 }, { "epoch": 0.14, "learning_rate": 1.9962221063201734e-05, "loss": 0.4594, "step": 118 }, { "epoch": 0.14, "learning_rate": 1.996000072191385e-05, "loss": 0.5068, "step": 120 }, { "epoch": 0.15, "learning_rate": 1.9957717114051608e-05, "loss": 0.5169, "step": 122 }, { "epoch": 0.15, "learning_rate": 1.9955370254120635e-05, "loss": 0.4781, "step": 124 }, { "epoch": 0.15, "learning_rate": 1.9952960157028335e-05, "loss": 0.4682, "step": 126 }, { "epoch": 0.15, "learning_rate": 1.9950486838083807e-05, "loss": 0.4754, "step": 128 }, { "epoch": 0.15, "learning_rate": 1.994795031299773e-05, "loss": 0.479, "step": 130 }, { "epoch": 0.16, "learning_rate": 1.9945350597882275e-05, "loss": 0.5381, "step": 132 }, { "epoch": 0.16, "learning_rate": 1.9942687709251006e-05, "loss": 0.5457, "step": 134 }, { "epoch": 0.16, "learning_rate": 1.993996166401877e-05, "loss": 0.4653, "step": 136 }, { "epoch": 0.16, "learning_rate": 1.9937172479501573e-05, "loss": 0.4686, "step": 138 }, { "epoch": 0.17, "learning_rate": 1.9934320173416502e-05, "loss": 0.5098, "step": 140 }, { "epoch": 0.17, "learning_rate": 1.9931404763881598e-05, "loss": 0.4823, "step": 142 }, { "epoch": 0.17, "learning_rate": 1.992842626941572e-05, "loss": 0.4999, "step": 144 }, { "epoch": 0.17, "learning_rate": 1.992538470893847e-05, "loss": 0.4868, "step": 146 }, { "epoch": 0.18, "learning_rate": 1.992228010177003e-05, "loss": 0.4668, "step": 148 }, { "epoch": 0.18, "learning_rate": 1.9919112467631074e-05, "loss": 0.4955, "step": 150 }, { "epoch": 0.18, "learning_rate": 1.991588182664262e-05, "loss": 0.4855, "step": 152 }, { "epoch": 0.18, "learning_rate": 1.99125881993259e-05, "loss": 0.4646, "step": 154 }, { "epoch": 0.19, "learning_rate": 1.9909231606602266e-05, "loss": 0.5182, "step": 156 }, { "epoch": 0.19, "learning_rate": 1.9905812069793002e-05, "loss": 0.4857, "step": 158 }, { "epoch": 0.19, "learning_rate": 1.990232961061924e-05, "loss": 0.5048, "step": 160 }, { "epoch": 0.19, "learning_rate": 1.989878425120177e-05, "loss": 0.4597, "step": 162 }, { "epoch": 0.2, "learning_rate": 1.9895176014060964e-05, "loss": 0.4387, "step": 164 }, { "epoch": 0.2, "learning_rate": 1.9891504922116572e-05, "loss": 0.4556, "step": 166 }, { "epoch": 0.2, "learning_rate": 1.9887770998687607e-05, "loss": 0.4937, "step": 168 }, { "epoch": 0.2, "learning_rate": 1.9883974267492202e-05, "loss": 0.5738, "step": 170 }, { "epoch": 0.2, "learning_rate": 1.9880114752647434e-05, "loss": 0.4769, "step": 172 }, { "epoch": 0.21, "learning_rate": 1.9876192478669197e-05, "loss": 0.5109, "step": 174 }, { "epoch": 0.21, "learning_rate": 1.987220747047203e-05, "loss": 0.4926, "step": 176 }, { "epoch": 0.21, "learning_rate": 1.9868159753368964e-05, "loss": 0.4726, "step": 178 }, { "epoch": 0.21, "learning_rate": 1.9864049353071365e-05, "loss": 0.4675, "step": 180 }, { "epoch": 0.22, "learning_rate": 1.985987629568876e-05, "loss": 0.4723, "step": 182 }, { "epoch": 0.22, "learning_rate": 1.9855640607728684e-05, "loss": 0.4215, "step": 184 }, { "epoch": 0.22, "learning_rate": 1.9851342316096503e-05, "loss": 0.505, "step": 186 }, { "epoch": 0.22, "learning_rate": 1.984698144809525e-05, "loss": 0.5278, "step": 188 }, { "epoch": 0.23, "learning_rate": 1.9842558031425434e-05, "loss": 0.4836, "step": 190 }, { "epoch": 0.23, "learning_rate": 1.983807209418489e-05, "loss": 0.445, "step": 192 }, { "epoch": 0.23, "learning_rate": 1.9833523664868587e-05, "loss": 0.4585, "step": 194 }, { "epoch": 0.23, "learning_rate": 1.982891277236845e-05, "loss": 0.4934, "step": 196 }, { "epoch": 0.24, "learning_rate": 1.982423944597315e-05, "loss": 0.4899, "step": 198 }, { "epoch": 0.24, "learning_rate": 1.981950371536798e-05, "loss": 0.5321, "step": 200 }, { "epoch": 0.24, "learning_rate": 1.9814705610634602e-05, "loss": 0.4883, "step": 202 }, { "epoch": 0.24, "learning_rate": 1.980984516225089e-05, "loss": 0.4805, "step": 204 }, { "epoch": 0.24, "learning_rate": 1.9804922401090732e-05, "loss": 0.4507, "step": 206 }, { "epoch": 0.25, "learning_rate": 1.9799937358423826e-05, "loss": 0.4872, "step": 208 }, { "epoch": 0.25, "learning_rate": 1.9794890065915486e-05, "loss": 0.5495, "step": 210 }, { "epoch": 0.25, "learning_rate": 1.9789780555626444e-05, "loss": 0.4936, "step": 212 }, { "epoch": 0.25, "learning_rate": 1.9784608860012652e-05, "loss": 0.4769, "step": 214 }, { "epoch": 0.26, "learning_rate": 1.9779375011925046e-05, "loss": 0.4714, "step": 216 }, { "epoch": 0.26, "learning_rate": 1.9774079044609373e-05, "loss": 0.5064, "step": 218 }, { "epoch": 0.26, "learning_rate": 1.976872099170597e-05, "loss": 0.4629, "step": 220 }, { "epoch": 0.26, "learning_rate": 1.976330088724953e-05, "loss": 0.4149, "step": 222 }, { "epoch": 0.27, "learning_rate": 1.9757818765668916e-05, "loss": 0.4565, "step": 224 }, { "epoch": 0.27, "learning_rate": 1.9752274661786916e-05, "loss": 0.4233, "step": 226 }, { "epoch": 0.27, "learning_rate": 1.9746668610820047e-05, "loss": 0.4557, "step": 228 }, { "epoch": 0.27, "learning_rate": 1.9741000648378303e-05, "loss": 0.5186, "step": 230 }, { "epoch": 0.28, "learning_rate": 1.9735270810464958e-05, "loss": 0.4425, "step": 232 }, { "epoch": 0.28, "learning_rate": 1.972947913347631e-05, "loss": 0.5274, "step": 234 }, { "epoch": 0.28, "learning_rate": 1.9723625654201472e-05, "loss": 0.4087, "step": 236 }, { "epoch": 0.28, "learning_rate": 1.971771040982213e-05, "loss": 0.4412, "step": 238 }, { "epoch": 0.29, "learning_rate": 1.9711733437912293e-05, "loss": 0.4998, "step": 240 }, { "epoch": 0.29, "learning_rate": 1.9705694776438084e-05, "loss": 0.5343, "step": 242 }, { "epoch": 0.29, "learning_rate": 1.9699594463757475e-05, "loss": 0.4365, "step": 244 }, { "epoch": 0.29, "learning_rate": 1.9693432538620046e-05, "loss": 0.4877, "step": 246 }, { "epoch": 0.29, "learning_rate": 1.9687209040166748e-05, "loss": 0.4361, "step": 248 }, { "epoch": 0.3, "learning_rate": 1.968092400792965e-05, "loss": 0.4555, "step": 250 }, { "epoch": 0.3, "learning_rate": 1.967457748183169e-05, "loss": 0.4353, "step": 252 }, { "epoch": 0.3, "learning_rate": 1.966816950218641e-05, "loss": 0.3476, "step": 254 }, { "epoch": 0.3, "learning_rate": 1.9661700109697718e-05, "loss": 0.4342, "step": 256 }, { "epoch": 0.31, "learning_rate": 1.9655169345459622e-05, "loss": 0.4603, "step": 258 }, { "epoch": 0.31, "learning_rate": 1.964857725095595e-05, "loss": 0.4514, "step": 260 }, { "epoch": 0.31, "learning_rate": 1.964192386806013e-05, "loss": 0.4889, "step": 262 }, { "epoch": 0.31, "learning_rate": 1.9635209239034872e-05, "loss": 0.5255, "step": 264 }, { "epoch": 0.32, "learning_rate": 1.962843340653195e-05, "loss": 0.5325, "step": 266 }, { "epoch": 0.32, "learning_rate": 1.9621596413591885e-05, "loss": 0.5163, "step": 268 }, { "epoch": 0.32, "learning_rate": 1.96146983036437e-05, "loss": 0.3708, "step": 270 }, { "epoch": 0.32, "learning_rate": 1.960773912050465e-05, "loss": 0.5378, "step": 272 }, { "epoch": 0.33, "learning_rate": 1.960071890837991e-05, "loss": 0.4389, "step": 274 }, { "epoch": 0.33, "learning_rate": 1.9593637711862335e-05, "loss": 0.4565, "step": 276 }, { "epoch": 0.33, "learning_rate": 1.9586495575932137e-05, "loss": 0.5446, "step": 278 }, { "epoch": 0.33, "learning_rate": 1.957929254595664e-05, "loss": 0.5079, "step": 280 }, { "epoch": 0.34, "learning_rate": 1.957202866768995e-05, "loss": 0.3997, "step": 282 }, { "epoch": 0.34, "learning_rate": 1.9564703987272703e-05, "loss": 0.4934, "step": 284 }, { "epoch": 0.34, "learning_rate": 1.9557318551231745e-05, "loss": 0.5076, "step": 286 }, { "epoch": 0.34, "learning_rate": 1.9549872406479843e-05, "loss": 0.4487, "step": 288 }, { "epoch": 0.34, "learning_rate": 1.9542365600315403e-05, "loss": 0.5515, "step": 290 }, { "epoch": 0.35, "learning_rate": 1.953479818042214e-05, "loss": 0.5262, "step": 292 }, { "epoch": 0.35, "learning_rate": 1.95271701948688e-05, "loss": 0.4953, "step": 294 }, { "epoch": 0.35, "learning_rate": 1.951948169210885e-05, "loss": 0.5127, "step": 296 }, { "epoch": 0.35, "learning_rate": 1.9511732720980156e-05, "loss": 0.4796, "step": 298 }, { "epoch": 0.36, "learning_rate": 1.950392333070469e-05, "loss": 0.5016, "step": 300 }, { "epoch": 0.36, "learning_rate": 1.9496053570888205e-05, "loss": 0.5114, "step": 302 }, { "epoch": 0.36, "learning_rate": 1.9488123491519935e-05, "loss": 0.4471, "step": 304 }, { "epoch": 0.36, "learning_rate": 1.9480133142972257e-05, "loss": 0.4427, "step": 306 }, { "epoch": 0.37, "learning_rate": 1.947208257600039e-05, "loss": 0.457, "step": 308 }, { "epoch": 0.37, "learning_rate": 1.9463971841742057e-05, "loss": 0.5193, "step": 310 }, { "epoch": 0.37, "learning_rate": 1.945580099171717e-05, "loss": 0.5337, "step": 312 }, { "epoch": 0.37, "learning_rate": 1.9447570077827503e-05, "loss": 0.4758, "step": 314 }, { "epoch": 0.38, "learning_rate": 1.9439279152356363e-05, "loss": 0.4223, "step": 316 }, { "epoch": 0.38, "learning_rate": 1.943092826796824e-05, "loss": 0.4909, "step": 318 }, { "epoch": 0.38, "learning_rate": 1.9422517477708506e-05, "loss": 0.4615, "step": 320 }, { "epoch": 0.38, "learning_rate": 1.9414046835003043e-05, "loss": 0.3863, "step": 322 }, { "epoch": 0.39, "learning_rate": 1.940551639365793e-05, "loss": 0.4676, "step": 324 }, { "epoch": 0.39, "learning_rate": 1.9396926207859085e-05, "loss": 0.488, "step": 326 }, { "epoch": 0.39, "learning_rate": 1.938827633217193e-05, "loss": 0.4888, "step": 328 }, { "epoch": 0.39, "learning_rate": 1.9379566821541034e-05, "loss": 0.4754, "step": 330 }, { "epoch": 0.39, "learning_rate": 1.9370797731289784e-05, "loss": 0.7974, "step": 332 }, { "epoch": 0.4, "learning_rate": 1.936196911712001e-05, "loss": 0.3944, "step": 334 }, { "epoch": 0.4, "learning_rate": 1.9353081035111644e-05, "loss": 0.4883, "step": 336 }, { "epoch": 0.4, "learning_rate": 1.9344133541722368e-05, "loss": 0.5429, "step": 338 }, { "epoch": 0.4, "learning_rate": 1.9335126693787237e-05, "loss": 0.4573, "step": 340 }, { "epoch": 0.41, "learning_rate": 1.9326060548518342e-05, "loss": 0.4276, "step": 342 }, { "epoch": 0.41, "learning_rate": 1.9316935163504424e-05, "loss": 0.5089, "step": 344 }, { "epoch": 0.41, "learning_rate": 1.930775059671053e-05, "loss": 0.4285, "step": 346 }, { "epoch": 0.41, "learning_rate": 1.9298506906477623e-05, "loss": 0.4438, "step": 348 }, { "epoch": 0.42, "learning_rate": 1.9289204151522227e-05, "loss": 0.4644, "step": 350 }, { "epoch": 0.42, "learning_rate": 1.927984239093605e-05, "loss": 0.3904, "step": 352 }, { "epoch": 0.42, "learning_rate": 1.9270421684185603e-05, "loss": 0.5486, "step": 354 }, { "epoch": 0.42, "learning_rate": 1.9260942091111836e-05, "loss": 0.5429, "step": 356 }, { "epoch": 0.43, "learning_rate": 1.9251403671929738e-05, "loss": 0.4597, "step": 358 }, { "epoch": 0.43, "learning_rate": 1.9241806487227967e-05, "loss": 0.4721, "step": 360 }, { "epoch": 0.43, "learning_rate": 1.923215059796847e-05, "loss": 0.4689, "step": 362 }, { "epoch": 0.43, "learning_rate": 1.922243606548609e-05, "loss": 0.4716, "step": 364 }, { "epoch": 0.44, "learning_rate": 1.9212662951488162e-05, "loss": 0.4993, "step": 366 }, { "epoch": 0.44, "learning_rate": 1.9202831318054153e-05, "loss": 0.4557, "step": 368 }, { "epoch": 0.44, "learning_rate": 1.9192941227635232e-05, "loss": 0.4701, "step": 370 }, { "epoch": 0.44, "learning_rate": 1.91829927430539e-05, "loss": 0.4032, "step": 372 }, { "epoch": 0.44, "learning_rate": 1.9172985927503584e-05, "loss": 0.4743, "step": 374 }, { "epoch": 0.45, "learning_rate": 1.9162920844548227e-05, "loss": 0.4491, "step": 376 }, { "epoch": 0.45, "learning_rate": 1.9152797558121894e-05, "loss": 0.429, "step": 378 }, { "epoch": 0.45, "learning_rate": 1.9142616132528356e-05, "loss": 0.4707, "step": 380 }, { "epoch": 0.45, "learning_rate": 1.91323766324407e-05, "loss": 0.4371, "step": 382 }, { "epoch": 0.46, "learning_rate": 1.912207912290089e-05, "loss": 0.5172, "step": 384 }, { "epoch": 0.46, "learning_rate": 1.9111723669319385e-05, "loss": 0.4482, "step": 386 }, { "epoch": 0.46, "learning_rate": 1.91013103374747e-05, "loss": 0.4701, "step": 388 }, { "epoch": 0.46, "learning_rate": 1.9090839193513e-05, "loss": 0.4737, "step": 390 }, { "epoch": 0.47, "learning_rate": 1.9080310303947668e-05, "loss": 0.4922, "step": 392 }, { "epoch": 0.47, "learning_rate": 1.9069723735658903e-05, "loss": 0.4081, "step": 394 }, { "epoch": 0.47, "learning_rate": 1.9059079555893277e-05, "loss": 0.486, "step": 396 }, { "epoch": 0.47, "learning_rate": 1.9048377832263314e-05, "loss": 0.4674, "step": 398 }, { "epoch": 0.48, "learning_rate": 1.903761863274706e-05, "loss": 0.4528, "step": 400 }, { "epoch": 0.48, "learning_rate": 1.902680202568765e-05, "loss": 0.4716, "step": 402 }, { "epoch": 0.48, "learning_rate": 1.9015928079792884e-05, "loss": 0.5213, "step": 404 }, { "epoch": 0.48, "learning_rate": 1.9004996864134767e-05, "loss": 0.4836, "step": 406 }, { "epoch": 0.49, "learning_rate": 1.8994008448149103e-05, "loss": 0.5513, "step": 408 }, { "epoch": 0.49, "learning_rate": 1.8982962901635022e-05, "loss": 0.4634, "step": 410 }, { "epoch": 0.49, "learning_rate": 1.8971860294754554e-05, "loss": 0.5111, "step": 412 }, { "epoch": 0.49, "learning_rate": 1.8960700698032194e-05, "loss": 0.4474, "step": 414 }, { "epoch": 0.49, "learning_rate": 1.894948418235441e-05, "loss": 0.4785, "step": 416 }, { "epoch": 0.5, "learning_rate": 1.8938210818969257e-05, "loss": 0.4977, "step": 418 }, { "epoch": 0.5, "learning_rate": 1.8926880679485865e-05, "loss": 0.4346, "step": 420 }, { "epoch": 0.5, "learning_rate": 1.8915493835874026e-05, "loss": 0.4139, "step": 422 }, { "epoch": 0.5, "learning_rate": 1.8904050360463708e-05, "loss": 0.4975, "step": 424 }, { "epoch": 0.51, "learning_rate": 1.8892550325944617e-05, "loss": 0.4766, "step": 426 }, { "epoch": 0.51, "learning_rate": 1.888099380536572e-05, "loss": 0.484, "step": 428 }, { "epoch": 0.51, "learning_rate": 1.886938087213479e-05, "loss": 0.4301, "step": 430 }, { "epoch": 0.51, "learning_rate": 1.885771160001794e-05, "loss": 0.4474, "step": 432 }, { "epoch": 0.52, "learning_rate": 1.8845986063139144e-05, "loss": 0.4445, "step": 434 }, { "epoch": 0.52, "learning_rate": 1.8834204335979777e-05, "loss": 0.4422, "step": 436 }, { "epoch": 0.52, "learning_rate": 1.8822366493378143e-05, "loss": 0.5337, "step": 438 }, { "epoch": 0.52, "learning_rate": 1.8810472610528987e-05, "loss": 0.4704, "step": 440 }, { "epoch": 0.53, "learning_rate": 1.8798522762983026e-05, "loss": 0.458, "step": 442 }, { "epoch": 0.53, "learning_rate": 1.8786517026646474e-05, "loss": 0.3866, "step": 444 }, { "epoch": 0.53, "learning_rate": 1.8774455477780557e-05, "loss": 0.4939, "step": 446 }, { "epoch": 0.53, "learning_rate": 1.8762338193001013e-05, "loss": 0.496, "step": 448 }, { "epoch": 0.54, "learning_rate": 1.8750165249277625e-05, "loss": 0.4171, "step": 450 }, { "epoch": 0.54, "learning_rate": 1.873793672393373e-05, "loss": 0.4582, "step": 452 }, { "epoch": 0.54, "learning_rate": 1.8725652694645714e-05, "loss": 0.4762, "step": 454 }, { "epoch": 0.54, "learning_rate": 1.871331323944254e-05, "loss": 0.3859, "step": 456 }, { "epoch": 0.54, "learning_rate": 1.8700918436705226e-05, "loss": 0.4165, "step": 458 }, { "epoch": 0.55, "learning_rate": 1.868846836516637e-05, "loss": 0.3933, "step": 460 }, { "epoch": 0.55, "learning_rate": 1.8675963103909636e-05, "loss": 0.4746, "step": 462 }, { "epoch": 0.55, "learning_rate": 1.866340273236926e-05, "loss": 0.4893, "step": 464 }, { "epoch": 0.55, "learning_rate": 1.8650787330329546e-05, "loss": 0.516, "step": 466 }, { "epoch": 0.56, "learning_rate": 1.8638116977924346e-05, "loss": 0.4391, "step": 468 }, { "epoch": 0.56, "learning_rate": 1.862539175563657e-05, "loss": 0.3998, "step": 470 }, { "epoch": 0.56, "learning_rate": 1.861261174429765e-05, "loss": 0.4525, "step": 472 }, { "epoch": 0.56, "learning_rate": 1.8599777025087068e-05, "loss": 0.4023, "step": 474 }, { "epoch": 0.57, "learning_rate": 1.858688767953178e-05, "loss": 0.397, "step": 476 }, { "epoch": 0.57, "learning_rate": 1.8573943789505762e-05, "loss": 0.4845, "step": 478 }, { "epoch": 0.57, "learning_rate": 1.8560945437229443e-05, "loss": 0.4518, "step": 480 }, { "epoch": 0.57, "learning_rate": 1.8547892705269207e-05, "loss": 0.5362, "step": 482 }, { "epoch": 0.58, "learning_rate": 1.8534785676536856e-05, "loss": 0.4601, "step": 484 }, { "epoch": 0.58, "learning_rate": 1.8521624434289094e-05, "loss": 0.4814, "step": 486 }, { "epoch": 0.58, "learning_rate": 1.850840906212699e-05, "loss": 0.4707, "step": 488 }, { "epoch": 0.58, "learning_rate": 1.849513964399545e-05, "loss": 0.4144, "step": 490 }, { "epoch": 0.59, "learning_rate": 1.8481816264182678e-05, "loss": 0.434, "step": 492 }, { "epoch": 0.59, "learning_rate": 1.8468439007319663e-05, "loss": 0.4782, "step": 494 }, { "epoch": 0.59, "learning_rate": 1.8455007958379604e-05, "loss": 0.3848, "step": 496 }, { "epoch": 0.59, "learning_rate": 1.8441523202677406e-05, "loss": 0.4541, "step": 498 }, { "epoch": 0.59, "learning_rate": 1.8427984825869114e-05, "loss": 0.4708, "step": 500 }, { "epoch": 0.6, "learning_rate": 1.8414392913951382e-05, "loss": 0.5103, "step": 502 }, { "epoch": 0.6, "learning_rate": 1.8400747553260915e-05, "loss": 0.4201, "step": 504 }, { "epoch": 0.6, "learning_rate": 1.8387048830473948e-05, "loss": 0.4586, "step": 506 }, { "epoch": 0.6, "learning_rate": 1.8373296832605647e-05, "loss": 0.4667, "step": 508 }, { "epoch": 0.61, "learning_rate": 1.8359491647009608e-05, "loss": 0.4846, "step": 510 }, { "epoch": 0.61, "learning_rate": 1.834563336137727e-05, "loss": 0.5255, "step": 512 }, { "epoch": 0.61, "learning_rate": 1.8331722063737365e-05, "loss": 0.482, "step": 514 }, { "epoch": 0.61, "learning_rate": 1.8317757842455363e-05, "loss": 0.4211, "step": 516 }, { "epoch": 0.62, "learning_rate": 1.830374078623291e-05, "loss": 0.4852, "step": 518 }, { "epoch": 0.62, "learning_rate": 1.8289670984107263e-05, "loss": 0.4299, "step": 520 }, { "epoch": 0.62, "learning_rate": 1.8275548525450722e-05, "loss": 0.5044, "step": 522 }, { "epoch": 0.62, "learning_rate": 1.8261373499970064e-05, "loss": 0.4072, "step": 524 }, { "epoch": 0.63, "learning_rate": 1.8247145997705977e-05, "loss": 0.4478, "step": 526 }, { "epoch": 0.63, "learning_rate": 1.823286610903248e-05, "loss": 0.4962, "step": 528 }, { "epoch": 0.63, "learning_rate": 1.8218533924656367e-05, "loss": 0.4658, "step": 530 }, { "epoch": 0.63, "learning_rate": 1.8204149535616596e-05, "loss": 0.4124, "step": 532 }, { "epoch": 0.63, "learning_rate": 1.8189713033283755e-05, "loss": 0.4149, "step": 534 }, { "epoch": 0.64, "learning_rate": 1.817522450935944e-05, "loss": 0.4327, "step": 536 }, { "epoch": 0.64, "learning_rate": 1.8160684055875704e-05, "loss": 0.4469, "step": 538 }, { "epoch": 0.64, "learning_rate": 1.8146091765194458e-05, "loss": 0.44, "step": 540 }, { "epoch": 0.64, "learning_rate": 1.8131447730006885e-05, "loss": 0.4911, "step": 542 }, { "epoch": 0.65, "learning_rate": 1.8116752043332848e-05, "loss": 0.4848, "step": 544 }, { "epoch": 0.65, "learning_rate": 1.810200479852031e-05, "loss": 0.4297, "step": 546 }, { "epoch": 0.65, "learning_rate": 1.8087206089244728e-05, "loss": 0.4205, "step": 548 }, { "epoch": 0.65, "learning_rate": 1.8072356009508473e-05, "loss": 0.3892, "step": 550 }, { "epoch": 0.66, "learning_rate": 1.805745465364022e-05, "loss": 0.4519, "step": 552 }, { "epoch": 0.66, "learning_rate": 1.8042502116294355e-05, "loss": 0.4376, "step": 554 }, { "epoch": 0.66, "learning_rate": 1.8027498492450367e-05, "loss": 0.4538, "step": 556 }, { "epoch": 0.66, "learning_rate": 1.8012443877412253e-05, "loss": 0.4672, "step": 558 }, { "epoch": 0.67, "learning_rate": 1.799733836680791e-05, "loss": 0.4034, "step": 560 }, { "epoch": 0.67, "learning_rate": 1.7982182056588536e-05, "loss": 0.4613, "step": 562 }, { "epoch": 0.67, "learning_rate": 1.796697504302799e-05, "loss": 0.4664, "step": 564 }, { "epoch": 0.67, "learning_rate": 1.795171742272222e-05, "loss": 0.4271, "step": 566 }, { "epoch": 0.68, "learning_rate": 1.7936409292588627e-05, "loss": 0.4741, "step": 568 }, { "epoch": 0.68, "learning_rate": 1.792105074986545e-05, "loss": 0.3175, "step": 570 }, { "epoch": 0.68, "learning_rate": 1.7905641892111152e-05, "loss": 0.4154, "step": 572 }, { "epoch": 0.68, "learning_rate": 1.7890182817203806e-05, "loss": 0.4558, "step": 574 }, { "epoch": 0.68, "learning_rate": 1.7874673623340463e-05, "loss": 0.465, "step": 576 }, { "epoch": 0.69, "learning_rate": 1.785911440903653e-05, "loss": 0.4688, "step": 578 }, { "epoch": 0.69, "learning_rate": 1.7843505273125164e-05, "loss": 0.4411, "step": 580 }, { "epoch": 0.69, "learning_rate": 1.7827846314756604e-05, "loss": 0.4286, "step": 582 }, { "epoch": 0.69, "learning_rate": 1.7812137633397577e-05, "loss": 0.4425, "step": 584 }, { "epoch": 0.7, "learning_rate": 1.7796379328830652e-05, "loss": 0.4126, "step": 586 }, { "epoch": 0.7, "learning_rate": 1.778057150115361e-05, "loss": 0.479, "step": 588 }, { "epoch": 0.7, "learning_rate": 1.77647142507788e-05, "loss": 0.3994, "step": 590 }, { "epoch": 0.7, "learning_rate": 1.7748807678432514e-05, "loss": 0.4574, "step": 592 }, { "epoch": 0.71, "learning_rate": 1.7732851885154336e-05, "loss": 0.3901, "step": 594 }, { "epoch": 0.71, "learning_rate": 1.7716846972296505e-05, "loss": 0.435, "step": 596 }, { "epoch": 0.71, "learning_rate": 1.7700793041523272e-05, "loss": 0.4337, "step": 598 }, { "epoch": 0.71, "learning_rate": 1.7684690194810256e-05, "loss": 0.4196, "step": 600 }, { "epoch": 0.72, "learning_rate": 1.7668538534443782e-05, "loss": 0.4508, "step": 602 }, { "epoch": 0.72, "learning_rate": 1.7652338163020257e-05, "loss": 0.4583, "step": 604 }, { "epoch": 0.72, "learning_rate": 1.76360891834455e-05, "loss": 0.4499, "step": 606 }, { "epoch": 0.72, "learning_rate": 1.7619791698934077e-05, "loss": 0.4263, "step": 608 }, { "epoch": 0.73, "learning_rate": 1.7603445813008685e-05, "loss": 0.3721, "step": 610 }, { "epoch": 0.73, "learning_rate": 1.7587051629499452e-05, "loss": 0.3788, "step": 612 }, { "epoch": 0.73, "learning_rate": 1.7570609252543302e-05, "loss": 0.4405, "step": 614 }, { "epoch": 0.73, "learning_rate": 1.755411878658329e-05, "loss": 0.4939, "step": 616 }, { "epoch": 0.73, "learning_rate": 1.7537580336367925e-05, "loss": 0.4188, "step": 618 }, { "epoch": 0.74, "learning_rate": 1.7520994006950526e-05, "loss": 0.4425, "step": 620 }, { "epoch": 0.74, "learning_rate": 1.7504359903688537e-05, "loss": 0.37, "step": 622 }, { "epoch": 0.74, "learning_rate": 1.748767813224287e-05, "loss": 0.4633, "step": 624 }, { "epoch": 0.74, "learning_rate": 1.747094879857722e-05, "loss": 0.3729, "step": 626 }, { "epoch": 0.75, "learning_rate": 1.7454172008957417e-05, "loss": 0.4312, "step": 628 }, { "epoch": 0.75, "learning_rate": 1.7437347869950713e-05, "loss": 0.4367, "step": 630 }, { "epoch": 0.75, "learning_rate": 1.7420476488425138e-05, "loss": 0.4091, "step": 632 }, { "epoch": 0.75, "learning_rate": 1.740355797154881e-05, "loss": 0.469, "step": 634 }, { "epoch": 0.76, "learning_rate": 1.7386592426789252e-05, "loss": 0.4872, "step": 636 }, { "epoch": 0.76, "learning_rate": 1.7369579961912712e-05, "loss": 0.4932, "step": 638 }, { "epoch": 0.76, "learning_rate": 1.7352520684983474e-05, "loss": 0.3848, "step": 640 }, { "epoch": 0.76, "learning_rate": 1.7335414704363178e-05, "loss": 0.3694, "step": 642 }, { "epoch": 0.77, "learning_rate": 1.7318262128710132e-05, "loss": 0.5099, "step": 644 }, { "epoch": 0.77, "learning_rate": 1.7301063066978617e-05, "loss": 0.4407, "step": 646 }, { "epoch": 0.77, "learning_rate": 1.728381762841819e-05, "loss": 0.4409, "step": 648 }, { "epoch": 0.77, "learning_rate": 1.7266525922573e-05, "loss": 0.4444, "step": 650 }, { "epoch": 0.78, "learning_rate": 1.72491880592811e-05, "loss": 0.4079, "step": 652 }, { "epoch": 0.78, "learning_rate": 1.7231804148673717e-05, "loss": 0.4502, "step": 654 }, { "epoch": 0.78, "learning_rate": 1.7214374301174594e-05, "loss": 0.49, "step": 656 }, { "epoch": 0.78, "learning_rate": 1.719689862749926e-05, "loss": 0.4778, "step": 658 }, { "epoch": 0.78, "learning_rate": 1.7179377238654325e-05, "loss": 0.3734, "step": 660 }, { "epoch": 0.79, "learning_rate": 1.716181024593681e-05, "loss": 0.4956, "step": 662 }, { "epoch": 0.79, "learning_rate": 1.714419776093338e-05, "loss": 0.3712, "step": 664 }, { "epoch": 0.79, "learning_rate": 1.7126539895519698e-05, "loss": 0.3779, "step": 666 }, { "epoch": 0.79, "learning_rate": 1.710883676185968e-05, "loss": 0.4457, "step": 668 }, { "epoch": 0.8, "learning_rate": 1.709108847240478e-05, "loss": 0.4161, "step": 670 }, { "epoch": 0.8, "learning_rate": 1.7073295139893296e-05, "loss": 0.4459, "step": 672 }, { "epoch": 0.8, "learning_rate": 1.705545687734963e-05, "loss": 0.4465, "step": 674 }, { "epoch": 0.8, "learning_rate": 1.7037573798083598e-05, "loss": 0.4284, "step": 676 }, { "epoch": 0.81, "learning_rate": 1.701964601568968e-05, "loss": 0.3806, "step": 678 }, { "epoch": 0.81, "learning_rate": 1.7001673644046322e-05, "loss": 0.4591, "step": 680 }, { "epoch": 0.81, "learning_rate": 1.6983656797315197e-05, "loss": 0.4809, "step": 682 }, { "epoch": 0.81, "learning_rate": 1.6965595589940496e-05, "loss": 0.3811, "step": 684 }, { "epoch": 0.82, "learning_rate": 1.6947490136648182e-05, "loss": 0.4223, "step": 686 }, { "epoch": 0.82, "learning_rate": 1.6929340552445283e-05, "loss": 0.4698, "step": 688 }, { "epoch": 0.82, "learning_rate": 1.6911146952619132e-05, "loss": 0.4059, "step": 690 }, { "epoch": 0.82, "learning_rate": 1.689290945273667e-05, "loss": 0.4451, "step": 692 }, { "epoch": 0.83, "learning_rate": 1.6874628168643683e-05, "loss": 0.3428, "step": 694 }, { "epoch": 0.83, "learning_rate": 1.685630321646408e-05, "loss": 0.4552, "step": 696 }, { "epoch": 0.83, "learning_rate": 1.683793471259915e-05, "loss": 0.6389, "step": 698 }, { "epoch": 0.83, "learning_rate": 1.681952277372683e-05, "loss": 0.4356, "step": 700 }, { "epoch": 0.83, "learning_rate": 1.680106751680096e-05, "loss": 0.4199, "step": 702 }, { "epoch": 0.84, "learning_rate": 1.6782569059050535e-05, "loss": 0.4369, "step": 704 }, { "epoch": 0.84, "learning_rate": 1.676402751797896e-05, "loss": 0.3924, "step": 706 }, { "epoch": 0.84, "learning_rate": 1.674544301136332e-05, "loss": 0.386, "step": 708 }, { "epoch": 0.84, "learning_rate": 1.672681565725361e-05, "loss": 0.464, "step": 710 }, { "epoch": 0.85, "learning_rate": 1.6708145573972005e-05, "loss": 0.4597, "step": 712 }, { "epoch": 0.85, "learning_rate": 1.6689432880112078e-05, "loss": 0.4164, "step": 714 }, { "epoch": 0.85, "learning_rate": 1.6670677694538096e-05, "loss": 0.3761, "step": 716 }, { "epoch": 0.85, "learning_rate": 1.6651880136384215e-05, "loss": 0.4499, "step": 718 }, { "epoch": 0.86, "learning_rate": 1.6633040325053746e-05, "loss": 0.438, "step": 720 }, { "epoch": 0.86, "learning_rate": 1.661415838021841e-05, "loss": 0.4526, "step": 722 }, { "epoch": 0.86, "learning_rate": 1.659523442181754e-05, "loss": 0.4427, "step": 724 }, { "epoch": 0.86, "learning_rate": 1.6576268570057363e-05, "loss": 0.5268, "step": 726 }, { "epoch": 0.87, "learning_rate": 1.655726094541021e-05, "loss": 0.4135, "step": 728 }, { "epoch": 0.87, "learning_rate": 1.653821166861374e-05, "loss": 0.42, "step": 730 }, { "epoch": 0.87, "learning_rate": 1.6519120860670215e-05, "loss": 0.449, "step": 732 }, { "epoch": 0.87, "learning_rate": 1.6499988642845686e-05, "loss": 0.4751, "step": 734 }, { "epoch": 0.88, "learning_rate": 1.6480815136669248e-05, "loss": 0.3826, "step": 736 }, { "epoch": 0.88, "learning_rate": 1.6461600463932266e-05, "loss": 0.4712, "step": 738 }, { "epoch": 0.88, "learning_rate": 1.6442344746687594e-05, "loss": 0.4128, "step": 740 }, { "epoch": 0.88, "learning_rate": 1.64230481072488e-05, "loss": 0.4679, "step": 742 }, { "epoch": 0.88, "learning_rate": 1.640371066818941e-05, "loss": 0.4768, "step": 744 }, { "epoch": 0.89, "learning_rate": 1.638433255234208e-05, "loss": 0.4785, "step": 746 }, { "epoch": 0.89, "learning_rate": 1.6364913882797875e-05, "loss": 0.4334, "step": 748 }, { "epoch": 0.89, "learning_rate": 1.6345454782905454e-05, "loss": 0.4015, "step": 750 }, { "epoch": 0.89, "learning_rate": 1.6325955376270286e-05, "loss": 0.439, "step": 752 }, { "epoch": 0.9, "learning_rate": 1.630641578675387e-05, "loss": 0.3951, "step": 754 }, { "epoch": 0.9, "learning_rate": 1.6296630962191733e-05, "loss": 0.5453, "step": 756 }, { "epoch": 0.9, "learning_rate": 1.62770313311519e-05, "loss": 0.3902, "step": 758 }, { "epoch": 0.9, "learning_rate": 1.625739182799955e-05, "loss": 0.3943, "step": 760 }, { "epoch": 0.91, "learning_rate": 1.6237712577486092e-05, "loss": 0.3312, "step": 762 }, { "epoch": 0.91, "learning_rate": 1.62179937046154e-05, "loss": 0.4366, "step": 764 }, { "epoch": 0.91, "learning_rate": 1.6198235334643045e-05, "loss": 0.3924, "step": 766 }, { "epoch": 0.91, "learning_rate": 1.6178437593075487e-05, "loss": 0.378, "step": 768 }, { "epoch": 0.92, "learning_rate": 1.6158600605669264e-05, "loss": 0.4624, "step": 770 }, { "epoch": 0.92, "learning_rate": 1.613872449843022e-05, "loss": 0.4411, "step": 772 }, { "epoch": 0.92, "learning_rate": 1.6118809397612678e-05, "loss": 0.4695, "step": 774 }, { "epoch": 0.92, "learning_rate": 1.6098855429718662e-05, "loss": 0.4348, "step": 776 }, { "epoch": 0.93, "learning_rate": 1.607886272149708e-05, "loss": 0.4048, "step": 778 }, { "epoch": 0.93, "learning_rate": 1.6058831399942917e-05, "loss": 0.3485, "step": 780 }, { "epoch": 0.93, "learning_rate": 1.6038761592296435e-05, "loss": 0.4146, "step": 782 }, { "epoch": 0.93, "learning_rate": 1.6018653426042357e-05, "loss": 0.4398, "step": 784 }, { "epoch": 0.93, "learning_rate": 1.5998507028909074e-05, "loss": 0.5815, "step": 786 }, { "epoch": 0.94, "learning_rate": 1.597832252886781e-05, "loss": 0.4502, "step": 788 }, { "epoch": 0.94, "learning_rate": 1.5958100054131828e-05, "loss": 0.4275, "step": 790 }, { "epoch": 0.94, "learning_rate": 1.5937839733155603e-05, "loss": 0.4269, "step": 792 }, { "epoch": 0.94, "learning_rate": 1.591754169463402e-05, "loss": 0.4211, "step": 794 }, { "epoch": 0.95, "learning_rate": 1.5897206067501544e-05, "loss": 0.4194, "step": 796 }, { "epoch": 0.95, "learning_rate": 1.5876832980931405e-05, "loss": 0.3833, "step": 798 }, { "epoch": 0.95, "learning_rate": 1.5856422564334772e-05, "loss": 0.4176, "step": 800 }, { "epoch": 0.95, "learning_rate": 1.5835974947359952e-05, "loss": 0.5327, "step": 802 }, { "epoch": 0.96, "learning_rate": 1.581549025989154e-05, "loss": 0.4776, "step": 804 }, { "epoch": 0.96, "learning_rate": 1.5794968632049598e-05, "loss": 0.3573, "step": 806 }, { "epoch": 0.96, "learning_rate": 1.5774410194188856e-05, "loss": 0.464, "step": 808 }, { "epoch": 0.96, "learning_rate": 1.5753815076897848e-05, "loss": 0.4549, "step": 810 }, { "epoch": 0.97, "learning_rate": 1.57331834109981e-05, "loss": 0.463, "step": 812 }, { "epoch": 0.97, "learning_rate": 1.5712515327543307e-05, "loss": 0.4438, "step": 814 }, { "epoch": 0.97, "learning_rate": 1.5691810957818475e-05, "loss": 0.4306, "step": 816 }, { "epoch": 0.97, "learning_rate": 1.5671070433339116e-05, "loss": 0.4135, "step": 818 }, { "epoch": 0.98, "learning_rate": 1.5650293885850393e-05, "loss": 0.4706, "step": 820 }, { "epoch": 0.98, "learning_rate": 1.5629481447326297e-05, "loss": 0.4427, "step": 822 }, { "epoch": 0.98, "learning_rate": 1.5608633249968783e-05, "loss": 0.4661, "step": 824 }, { "epoch": 0.98, "learning_rate": 1.558774942620697e-05, "loss": 0.3674, "step": 826 }, { "epoch": 0.98, "learning_rate": 1.5566830108696265e-05, "loss": 0.4204, "step": 828 }, { "epoch": 0.99, "learning_rate": 1.5545875430317546e-05, "loss": 0.4685, "step": 830 }, { "epoch": 0.99, "learning_rate": 1.5524885524176287e-05, "loss": 0.4583, "step": 832 }, { "epoch": 0.99, "learning_rate": 1.550386052360174e-05, "loss": 0.4306, "step": 834 }, { "epoch": 0.99, "learning_rate": 1.548280056214609e-05, "loss": 0.4203, "step": 836 }, { "epoch": 1.0, "learning_rate": 1.546170577358358e-05, "loss": 0.4664, "step": 838 }, { "epoch": 1.0, "learning_rate": 1.544057629190969e-05, "loss": 0.3553, "step": 840 }, { "epoch": 1.0, "learning_rate": 1.541941225134025e-05, "loss": 0.368, "step": 842 }, { "epoch": 1.0, "learning_rate": 1.5398213786310643e-05, "loss": 0.3176, "step": 844 }, { "epoch": 1.01, "learning_rate": 1.537698103147489e-05, "loss": 0.2801, "step": 846 }, { "epoch": 1.01, "learning_rate": 1.5355714121704846e-05, "loss": 0.2576, "step": 848 }, { "epoch": 1.01, "learning_rate": 1.53344131920893e-05, "loss": 0.2689, "step": 850 }, { "epoch": 1.01, "learning_rate": 1.531307837793315e-05, "loss": 0.3045, "step": 852 }, { "epoch": 1.02, "learning_rate": 1.529170981475653e-05, "loss": 0.2506, "step": 854 }, { "epoch": 1.02, "learning_rate": 1.5270307638293943e-05, "loss": 0.2546, "step": 856 }, { "epoch": 1.02, "learning_rate": 1.524887198449341e-05, "loss": 0.2853, "step": 858 }, { "epoch": 1.02, "learning_rate": 1.5227402989515607e-05, "loss": 0.2772, "step": 860 }, { "epoch": 1.02, "learning_rate": 1.5205900789732986e-05, "loss": 0.2763, "step": 862 }, { "epoch": 1.03, "learning_rate": 1.5184365521728928e-05, "loss": 0.2578, "step": 864 }, { "epoch": 1.03, "learning_rate": 1.5162797322296855e-05, "loss": 0.3121, "step": 866 }, { "epoch": 1.03, "learning_rate": 1.5141196328439377e-05, "loss": 0.3037, "step": 868 }, { "epoch": 1.03, "learning_rate": 1.5119562677367421e-05, "loss": 0.2877, "step": 870 }, { "epoch": 1.04, "learning_rate": 1.5097896506499349e-05, "loss": 0.2856, "step": 872 }, { "epoch": 1.04, "learning_rate": 1.5076197953460087e-05, "loss": 0.3417, "step": 874 }, { "epoch": 1.04, "learning_rate": 1.5054467156080262e-05, "loss": 0.285, "step": 876 }, { "epoch": 1.04, "learning_rate": 1.5032704252395315e-05, "loss": 0.3137, "step": 878 }, { "epoch": 1.05, "learning_rate": 1.5010909380644636e-05, "loss": 0.2204, "step": 880 }, { "epoch": 1.05, "learning_rate": 1.4989082679270668e-05, "loss": 0.2808, "step": 882 }, { "epoch": 1.05, "learning_rate": 1.496722428691804e-05, "loss": 0.2691, "step": 884 }, { "epoch": 1.05, "learning_rate": 1.4945334342432688e-05, "loss": 0.2638, "step": 886 }, { "epoch": 1.06, "learning_rate": 1.492341298486097e-05, "loss": 0.26, "step": 888 }, { "epoch": 1.06, "learning_rate": 1.490146035344878e-05, "loss": 0.2764, "step": 890 }, { "epoch": 1.06, "learning_rate": 1.4879476587640657e-05, "loss": 0.2558, "step": 892 }, { "epoch": 1.06, "learning_rate": 1.4868473072968645e-05, "loss": 0.5349, "step": 894 }, { "epoch": 1.07, "learning_rate": 1.4846442867457533e-05, "loss": 0.2937, "step": 896 }, { "epoch": 1.07, "learning_rate": 1.4824381877025154e-05, "loss": 0.2684, "step": 898 }, { "epoch": 1.07, "learning_rate": 1.4802290241804355e-05, "loss": 0.2491, "step": 900 }, { "epoch": 1.07, "learning_rate": 1.478016810212265e-05, "loss": 0.2634, "step": 902 }, { "epoch": 1.07, "learning_rate": 1.4758015598501308e-05, "loss": 0.2889, "step": 904 }, { "epoch": 1.08, "learning_rate": 1.473583287165448e-05, "loss": 0.2843, "step": 906 }, { "epoch": 1.08, "learning_rate": 1.4713620062488296e-05, "loss": 0.2705, "step": 908 }, { "epoch": 1.08, "learning_rate": 1.4691377312099965e-05, "loss": 0.2765, "step": 910 }, { "epoch": 1.08, "learning_rate": 1.4669104761776892e-05, "loss": 0.2595, "step": 912 }, { "epoch": 1.09, "learning_rate": 1.4646802552995767e-05, "loss": 0.2101, "step": 914 }, { "epoch": 1.09, "learning_rate": 1.4624470827421675e-05, "loss": 0.263, "step": 916 }, { "epoch": 1.09, "learning_rate": 1.4602109726907197e-05, "loss": 0.2592, "step": 918 }, { "epoch": 1.09, "learning_rate": 1.4579719393491496e-05, "loss": 0.2732, "step": 920 }, { "epoch": 1.1, "learning_rate": 1.455729996939944e-05, "loss": 0.3056, "step": 922 }, { "epoch": 1.1, "learning_rate": 1.4534851597040666e-05, "loss": 0.2886, "step": 924 }, { "epoch": 1.1, "learning_rate": 1.45123744190087e-05, "loss": 0.2493, "step": 926 }, { "epoch": 1.1, "learning_rate": 1.4489868578080046e-05, "loss": 0.271, "step": 928 }, { "epoch": 1.11, "learning_rate": 1.4467334217213274e-05, "loss": 0.2752, "step": 930 }, { "epoch": 1.11, "learning_rate": 1.4444771479548115e-05, "loss": 0.3108, "step": 932 }, { "epoch": 1.11, "learning_rate": 1.4422180508404544e-05, "loss": 0.2946, "step": 934 }, { "epoch": 1.11, "learning_rate": 1.439956144728189e-05, "loss": 0.2401, "step": 936 }, { "epoch": 1.12, "learning_rate": 1.4376914439857905e-05, "loss": 0.3501, "step": 938 }, { "epoch": 1.12, "learning_rate": 1.4354239629987857e-05, "loss": 0.2895, "step": 940 }, { "epoch": 1.12, "learning_rate": 1.4331537161703612e-05, "loss": 0.2632, "step": 942 }, { "epoch": 1.12, "learning_rate": 1.4308807179212736e-05, "loss": 0.261, "step": 944 }, { "epoch": 1.12, "learning_rate": 1.4286049826897559e-05, "loss": 0.3207, "step": 946 }, { "epoch": 1.13, "learning_rate": 1.4263265249314269e-05, "loss": 0.2592, "step": 948 }, { "epoch": 1.13, "learning_rate": 1.4240453591191984e-05, "loss": 0.2468, "step": 950 }, { "epoch": 1.13, "learning_rate": 1.4217614997431847e-05, "loss": 0.2483, "step": 952 }, { "epoch": 1.13, "learning_rate": 1.41947496131061e-05, "loss": 0.2657, "step": 954 }, { "epoch": 1.14, "learning_rate": 1.4171857583457154e-05, "loss": 0.2389, "step": 956 }, { "epoch": 1.14, "learning_rate": 1.4148939053896669e-05, "loss": 0.2404, "step": 958 }, { "epoch": 1.14, "learning_rate": 1.4125994170004644e-05, "loss": 0.2539, "step": 960 }, { "epoch": 1.14, "learning_rate": 1.4103023077528482e-05, "loss": 0.2721, "step": 962 }, { "epoch": 1.15, "learning_rate": 1.4080025922382056e-05, "loss": 0.3314, "step": 964 }, { "epoch": 1.15, "learning_rate": 1.4057002850644796e-05, "loss": 0.2668, "step": 966 }, { "epoch": 1.15, "learning_rate": 1.4033954008560758e-05, "loss": 0.2295, "step": 968 }, { "epoch": 1.15, "learning_rate": 1.401087954253769e-05, "loss": 0.284, "step": 970 }, { "epoch": 1.16, "learning_rate": 1.3987779599146105e-05, "loss": 0.2595, "step": 972 }, { "epoch": 1.16, "learning_rate": 1.396465432511835e-05, "loss": 0.2849, "step": 974 }, { "epoch": 1.16, "learning_rate": 1.3941503867347672e-05, "loss": 0.271, "step": 976 }, { "epoch": 1.16, "learning_rate": 1.3918328372887295e-05, "loss": 0.2943, "step": 978 }, { "epoch": 1.17, "learning_rate": 1.3895127988949471e-05, "loss": 0.2751, "step": 980 }, { "epoch": 1.17, "learning_rate": 1.3871902862904544e-05, "loss": 0.276, "step": 982 }, { "epoch": 1.17, "learning_rate": 1.3848653142280037e-05, "loss": 0.2251, "step": 984 }, { "epoch": 1.17, "learning_rate": 1.3825378974759696e-05, "loss": 0.2722, "step": 986 }, { "epoch": 1.17, "learning_rate": 1.3802080508182543e-05, "loss": 0.2927, "step": 988 }, { "epoch": 1.18, "learning_rate": 1.377875789054196e-05, "loss": 0.2473, "step": 990 }, { "epoch": 1.18, "learning_rate": 1.376708757136279e-05, "loss": 0.3166, "step": 992 }, { "epoch": 1.18, "learning_rate": 1.3743729004949972e-05, "loss": 0.3079, "step": 994 }, { "epoch": 1.18, "learning_rate": 1.3720346658126286e-05, "loss": 0.2695, "step": 996 }, { "epoch": 1.19, "learning_rate": 1.3696940679417918e-05, "loss": 0.3125, "step": 998 }, { "epoch": 1.19, "learning_rate": 1.3673511217501172e-05, "loss": 0.2874, "step": 1000 }, { "epoch": 1.19, "learning_rate": 1.3650058421201517e-05, "loss": 0.31, "step": 1002 }, { "epoch": 1.19, "learning_rate": 1.362658243949265e-05, "loss": 0.2795, "step": 1004 }, { "epoch": 1.2, "learning_rate": 1.3603083421495535e-05, "loss": 0.2693, "step": 1006 }, { "epoch": 1.2, "learning_rate": 1.3579561516477467e-05, "loss": 0.2659, "step": 1008 }, { "epoch": 1.2, "learning_rate": 1.355601687385112e-05, "loss": 0.2909, "step": 1010 }, { "epoch": 1.2, "learning_rate": 1.3532449643173604e-05, "loss": 0.262, "step": 1012 }, { "epoch": 1.21, "learning_rate": 1.3508859974145504e-05, "loss": 0.2538, "step": 1014 }, { "epoch": 1.21, "learning_rate": 1.3485248016609937e-05, "loss": 0.2674, "step": 1016 }, { "epoch": 1.21, "learning_rate": 1.3461613920551598e-05, "loss": 0.2863, "step": 1018 }, { "epoch": 1.21, "learning_rate": 1.3437957836095804e-05, "loss": 0.3213, "step": 1020 }, { "epoch": 1.22, "learning_rate": 1.3414279913507548e-05, "loss": 0.2932, "step": 1022 }, { "epoch": 1.22, "learning_rate": 1.3390580303190541e-05, "loss": 0.2604, "step": 1024 }, { "epoch": 1.22, "learning_rate": 1.3366859155686253e-05, "loss": 0.275, "step": 1026 }, { "epoch": 1.22, "learning_rate": 1.3343116621672959e-05, "loss": 0.2625, "step": 1028 }, { "epoch": 1.22, "learning_rate": 1.3319352851964787e-05, "loss": 0.2664, "step": 1030 }, { "epoch": 1.23, "learning_rate": 1.3295567997510747e-05, "loss": 0.2567, "step": 1032 }, { "epoch": 1.23, "learning_rate": 1.3271762209393793e-05, "loss": 0.2469, "step": 1034 }, { "epoch": 1.23, "learning_rate": 1.3247935638829838e-05, "loss": 0.2596, "step": 1036 }, { "epoch": 1.23, "learning_rate": 1.3224088437166818e-05, "loss": 0.3033, "step": 1038 }, { "epoch": 1.24, "learning_rate": 1.320022075588371e-05, "loss": 0.3582, "step": 1040 }, { "epoch": 1.24, "learning_rate": 1.3176332746589587e-05, "loss": 0.2339, "step": 1042 }, { "epoch": 1.24, "learning_rate": 1.3152424561022634e-05, "loss": 0.2622, "step": 1044 }, { "epoch": 1.24, "learning_rate": 1.3128496351049216e-05, "loss": 0.2388, "step": 1046 }, { "epoch": 1.25, "learning_rate": 1.3104548268662873e-05, "loss": 0.2322, "step": 1048 }, { "epoch": 1.25, "learning_rate": 1.3080580465983397e-05, "loss": 0.3108, "step": 1050 }, { "epoch": 1.25, "learning_rate": 1.3056593095255825e-05, "loss": 0.2339, "step": 1052 }, { "epoch": 1.25, "learning_rate": 1.3032586308849512e-05, "loss": 0.2731, "step": 1054 }, { "epoch": 1.26, "learning_rate": 1.3008560259257117e-05, "loss": 0.2677, "step": 1056 }, { "epoch": 1.26, "learning_rate": 1.2984515099093687e-05, "loss": 0.2907, "step": 1058 }, { "epoch": 1.26, "learning_rate": 1.2960450981095643e-05, "loss": 0.2836, "step": 1060 }, { "epoch": 1.26, "learning_rate": 1.2936368058119828e-05, "loss": 0.2621, "step": 1062 }, { "epoch": 1.27, "learning_rate": 1.2912266483142545e-05, "loss": 0.3009, "step": 1064 }, { "epoch": 1.27, "learning_rate": 1.2888146409258575e-05, "loss": 0.252, "step": 1066 }, { "epoch": 1.27, "learning_rate": 1.2864007989680194e-05, "loss": 0.3354, "step": 1068 }, { "epoch": 1.27, "learning_rate": 1.2839851377736216e-05, "loss": 0.2908, "step": 1070 }, { "epoch": 1.27, "learning_rate": 1.281567672687102e-05, "loss": 0.284, "step": 1072 }, { "epoch": 1.28, "learning_rate": 1.2791484190643571e-05, "loss": 0.2882, "step": 1074 }, { "epoch": 1.28, "learning_rate": 1.2767273922726427e-05, "loss": 0.3096, "step": 1076 }, { "epoch": 1.28, "learning_rate": 1.2743046076904795e-05, "loss": 0.2674, "step": 1078 }, { "epoch": 1.28, "learning_rate": 1.271880080707553e-05, "loss": 0.2207, "step": 1080 }, { "epoch": 1.29, "learning_rate": 1.2694538267246168e-05, "loss": 0.2605, "step": 1082 }, { "epoch": 1.29, "learning_rate": 1.2670258611533947e-05, "loss": 0.2598, "step": 1084 }, { "epoch": 1.29, "learning_rate": 1.2645961994164822e-05, "loss": 0.2856, "step": 1086 }, { "epoch": 1.29, "learning_rate": 1.2621648569472491e-05, "loss": 0.2436, "step": 1088 }, { "epoch": 1.3, "learning_rate": 1.2597318491897416e-05, "loss": 0.2606, "step": 1090 }, { "epoch": 1.3, "learning_rate": 1.257297191598584e-05, "loss": 0.2602, "step": 1092 }, { "epoch": 1.3, "learning_rate": 1.2548608996388792e-05, "loss": 0.2465, "step": 1094 }, { "epoch": 1.3, "learning_rate": 1.2524229887861132e-05, "loss": 0.2536, "step": 1096 }, { "epoch": 1.31, "learning_rate": 1.2499834745260553e-05, "loss": 0.2859, "step": 1098 }, { "epoch": 1.31, "learning_rate": 1.2475423723546584e-05, "loss": 0.2539, "step": 1100 }, { "epoch": 1.31, "learning_rate": 1.245099697777963e-05, "loss": 0.2299, "step": 1102 }, { "epoch": 1.31, "learning_rate": 1.2426554663119975e-05, "loss": 0.4215, "step": 1104 }, { "epoch": 1.32, "learning_rate": 1.2402096934826794e-05, "loss": 0.2575, "step": 1106 }, { "epoch": 1.32, "learning_rate": 1.237762394825718e-05, "loss": 0.3257, "step": 1108 }, { "epoch": 1.32, "learning_rate": 1.2353135858865128e-05, "loss": 0.2778, "step": 1110 }, { "epoch": 1.32, "learning_rate": 1.232863282220059e-05, "loss": 0.2592, "step": 1112 }, { "epoch": 1.32, "learning_rate": 1.230411499390845e-05, "loss": 0.2539, "step": 1114 }, { "epoch": 1.33, "learning_rate": 1.2279582529727552e-05, "loss": 0.2831, "step": 1116 }, { "epoch": 1.33, "learning_rate": 1.2255035585489705e-05, "loss": 0.2806, "step": 1118 }, { "epoch": 1.33, "learning_rate": 1.2230474317118708e-05, "loss": 0.2777, "step": 1120 }, { "epoch": 1.33, "learning_rate": 1.2205898880629336e-05, "loss": 0.3334, "step": 1122 }, { "epoch": 1.34, "learning_rate": 1.2181309432126366e-05, "loss": 0.302, "step": 1124 }, { "epoch": 1.34, "learning_rate": 1.2156706127803578e-05, "loss": 0.2659, "step": 1126 }, { "epoch": 1.34, "learning_rate": 1.2132089123942764e-05, "loss": 0.297, "step": 1128 }, { "epoch": 1.34, "learning_rate": 1.2107458576912743e-05, "loss": 0.3207, "step": 1130 }, { "epoch": 1.35, "learning_rate": 1.2082814643168357e-05, "loss": 0.2224, "step": 1132 }, { "epoch": 1.35, "learning_rate": 1.2058157479249475e-05, "loss": 0.295, "step": 1134 }, { "epoch": 1.35, "learning_rate": 1.2033487241780014e-05, "loss": 0.2238, "step": 1136 }, { "epoch": 1.35, "learning_rate": 1.2008804087466931e-05, "loss": 0.277, "step": 1138 }, { "epoch": 1.36, "learning_rate": 1.1984108173099238e-05, "loss": 0.2906, "step": 1140 }, { "epoch": 1.36, "learning_rate": 1.1959399655546989e-05, "loss": 0.2649, "step": 1142 }, { "epoch": 1.36, "learning_rate": 1.1934678691760296e-05, "loss": 0.3147, "step": 1144 }, { "epoch": 1.36, "learning_rate": 1.190994543876834e-05, "loss": 0.2761, "step": 1146 }, { "epoch": 1.37, "learning_rate": 1.188520005367836e-05, "loss": 0.2252, "step": 1148 }, { "epoch": 1.37, "learning_rate": 1.1860442693674648e-05, "loss": 0.2521, "step": 1150 }, { "epoch": 1.37, "learning_rate": 1.1835673516017571e-05, "loss": 0.2618, "step": 1152 }, { "epoch": 1.37, "learning_rate": 1.1810892678042565e-05, "loss": 0.2869, "step": 1154 }, { "epoch": 1.37, "learning_rate": 1.1786100337159132e-05, "loss": 0.2124, "step": 1156 }, { "epoch": 1.38, "learning_rate": 1.177369990233723e-05, "loss": 0.3826, "step": 1158 }, { "epoch": 1.38, "learning_rate": 1.1748890602393521e-05, "loss": 0.2805, "step": 1160 }, { "epoch": 1.38, "learning_rate": 1.172407019338261e-05, "loss": 0.2467, "step": 1162 }, { "epoch": 1.38, "learning_rate": 1.1699238832965358e-05, "loss": 0.2405, "step": 1164 }, { "epoch": 1.39, "learning_rate": 1.1674396678872186e-05, "loss": 0.3017, "step": 1166 }, { "epoch": 1.39, "learning_rate": 1.164954388890207e-05, "loss": 0.2584, "step": 1168 }, { "epoch": 1.39, "learning_rate": 1.162468062092156e-05, "loss": 0.3141, "step": 1170 }, { "epoch": 1.39, "learning_rate": 1.1599807032863756e-05, "loss": 0.3254, "step": 1172 }, { "epoch": 1.4, "learning_rate": 1.1574923282727314e-05, "loss": 0.2703, "step": 1174 }, { "epoch": 1.4, "learning_rate": 1.1550029528575428e-05, "loss": 0.2207, "step": 1176 }, { "epoch": 1.4, "learning_rate": 1.152512592853486e-05, "loss": 0.2634, "step": 1178 }, { "epoch": 1.4, "learning_rate": 1.1500212640794895e-05, "loss": 0.3368, "step": 1180 }, { "epoch": 1.41, "learning_rate": 1.1475289823606364e-05, "loss": 0.2535, "step": 1182 }, { "epoch": 1.41, "learning_rate": 1.1450357635280628e-05, "loss": 0.287, "step": 1184 }, { "epoch": 1.41, "learning_rate": 1.1425416234188578e-05, "loss": 0.3052, "step": 1186 }, { "epoch": 1.41, "learning_rate": 1.1400465778759611e-05, "loss": 0.2909, "step": 1188 }, { "epoch": 1.41, "learning_rate": 1.1375506427480658e-05, "loss": 0.2904, "step": 1190 }, { "epoch": 1.42, "learning_rate": 1.135053833889514e-05, "loss": 0.339, "step": 1192 }, { "epoch": 1.42, "learning_rate": 1.1325561671601987e-05, "loss": 0.292, "step": 1194 }, { "epoch": 1.42, "learning_rate": 1.1300576584254617e-05, "loss": 0.2424, "step": 1196 }, { "epoch": 1.42, "learning_rate": 1.127558323555994e-05, "loss": 0.267, "step": 1198 }, { "epoch": 1.43, "learning_rate": 1.125058178427733e-05, "loss": 0.2641, "step": 1200 }, { "epoch": 1.43, "learning_rate": 1.1225572389217643e-05, "loss": 0.269, "step": 1202 }, { "epoch": 1.43, "learning_rate": 1.1200555209242182e-05, "loss": 0.2903, "step": 1204 }, { "epoch": 1.43, "learning_rate": 1.1175530403261716e-05, "loss": 0.2622, "step": 1206 }, { "epoch": 1.44, "learning_rate": 1.1150498130235435e-05, "loss": 0.3199, "step": 1208 }, { "epoch": 1.44, "learning_rate": 1.1125458549169977e-05, "loss": 0.2469, "step": 1210 }, { "epoch": 1.44, "learning_rate": 1.1100411819118387e-05, "loss": 0.2781, "step": 1212 }, { "epoch": 1.44, "learning_rate": 1.1075358099179136e-05, "loss": 0.3293, "step": 1214 }, { "epoch": 1.45, "learning_rate": 1.1050297548495084e-05, "loss": 0.2065, "step": 1216 }, { "epoch": 1.45, "learning_rate": 1.1025230326252484e-05, "loss": 0.2548, "step": 1218 }, { "epoch": 1.45, "learning_rate": 1.1000156591679971e-05, "loss": 0.3063, "step": 1220 }, { "epoch": 1.45, "learning_rate": 1.0975076504047535e-05, "loss": 0.3099, "step": 1222 }, { "epoch": 1.46, "learning_rate": 1.0949990222665532e-05, "loss": 0.2805, "step": 1224 }, { "epoch": 1.46, "learning_rate": 1.0924897906883663e-05, "loss": 0.288, "step": 1226 }, { "epoch": 1.46, "learning_rate": 1.0899799716089949e-05, "loss": 0.3014, "step": 1228 }, { "epoch": 1.46, "learning_rate": 1.0874695809709737e-05, "loss": 0.2768, "step": 1230 }, { "epoch": 1.46, "learning_rate": 1.0849586347204677e-05, "loss": 0.2894, "step": 1232 }, { "epoch": 1.47, "learning_rate": 1.0824471488071714e-05, "loss": 0.2718, "step": 1234 }, { "epoch": 1.47, "learning_rate": 1.0799351391842074e-05, "loss": 0.2476, "step": 1236 }, { "epoch": 1.47, "learning_rate": 1.0774226218080244e-05, "loss": 0.2318, "step": 1238 }, { "epoch": 1.47, "learning_rate": 1.0749096126382965e-05, "loss": 0.2545, "step": 1240 }, { "epoch": 1.48, "learning_rate": 1.0723961276378225e-05, "loss": 0.2708, "step": 1242 }, { "epoch": 1.48, "learning_rate": 1.0698821827724225e-05, "loss": 0.3471, "step": 1244 }, { "epoch": 1.48, "learning_rate": 1.0673677940108386e-05, "loss": 0.2528, "step": 1246 }, { "epoch": 1.48, "learning_rate": 1.0648529773246324e-05, "loss": 0.2625, "step": 1248 }, { "epoch": 1.49, "learning_rate": 1.0623377486880831e-05, "loss": 0.2634, "step": 1250 }, { "epoch": 1.49, "learning_rate": 1.0598221240780874e-05, "loss": 0.2506, "step": 1252 }, { "epoch": 1.49, "learning_rate": 1.0573061194740568e-05, "loss": 0.2659, "step": 1254 }, { "epoch": 1.49, "learning_rate": 1.054789750857817e-05, "loss": 0.239, "step": 1256 }, { "epoch": 1.5, "learning_rate": 1.052273034213505e-05, "loss": 0.2465, "step": 1258 }, { "epoch": 1.5, "learning_rate": 1.0497559855274699e-05, "loss": 0.2512, "step": 1260 }, { "epoch": 1.5, "learning_rate": 1.0472386207881684e-05, "loss": 0.303, "step": 1262 }, { "epoch": 1.5, "learning_rate": 1.0447209559860658e-05, "loss": 0.2542, "step": 1264 }, { "epoch": 1.51, "learning_rate": 1.0422030071135336e-05, "loss": 0.2995, "step": 1266 }, { "epoch": 1.51, "learning_rate": 1.0396847901647469e-05, "loss": 0.2597, "step": 1268 }, { "epoch": 1.51, "learning_rate": 1.037166321135584e-05, "loss": 0.2773, "step": 1270 }, { "epoch": 1.51, "learning_rate": 1.0346476160235246e-05, "loss": 0.2771, "step": 1272 }, { "epoch": 1.51, "learning_rate": 1.0321286908275476e-05, "loss": 0.2906, "step": 1274 }, { "epoch": 1.52, "learning_rate": 1.0296095615480309e-05, "loss": 0.3002, "step": 1276 }, { "epoch": 1.52, "learning_rate": 1.0270902441866474e-05, "loss": 0.2267, "step": 1278 }, { "epoch": 1.52, "learning_rate": 1.0245707547462654e-05, "loss": 0.2545, "step": 1280 }, { "epoch": 1.52, "learning_rate": 1.0220511092308463e-05, "loss": 0.2412, "step": 1282 }, { "epoch": 1.53, "learning_rate": 1.0195313236453431e-05, "loss": 0.2567, "step": 1284 }, { "epoch": 1.53, "learning_rate": 1.0170114139955975e-05, "loss": 0.2589, "step": 1286 }, { "epoch": 1.53, "learning_rate": 1.0144913962882406e-05, "loss": 0.2834, "step": 1288 }, { "epoch": 1.53, "learning_rate": 1.0119712865305891e-05, "loss": 0.2504, "step": 1290 }, { "epoch": 1.54, "learning_rate": 1.0094511007305445e-05, "loss": 0.2788, "step": 1292 }, { "epoch": 1.54, "learning_rate": 1.0069308548964915e-05, "loss": 0.2664, "step": 1294 }, { "epoch": 1.54, "learning_rate": 1.0044105650371961e-05, "loss": 0.2695, "step": 1296 }, { "epoch": 1.54, "learning_rate": 1.0018902471617037e-05, "loss": 0.2309, "step": 1298 }, { "epoch": 1.55, "learning_rate": 9.993699172792381e-06, "loss": 0.2949, "step": 1300 }, { "epoch": 1.55, "learning_rate": 9.96849591399099e-06, "loss": 0.3248, "step": 1302 }, { "epoch": 1.55, "learning_rate": 9.943292855305611e-06, "loss": 0.2719, "step": 1304 }, { "epoch": 1.55, "learning_rate": 9.918090156827712e-06, "loss": 0.2417, "step": 1306 }, { "epoch": 1.56, "learning_rate": 9.892887978646483e-06, "loss": 0.2371, "step": 1308 }, { "epoch": 1.56, "learning_rate": 9.867686480847801e-06, "loss": 0.271, "step": 1310 }, { "epoch": 1.56, "learning_rate": 9.842485823513222e-06, "loss": 0.2791, "step": 1312 }, { "epoch": 1.56, "learning_rate": 9.817286166718971e-06, "loss": 0.2759, "step": 1314 }, { "epoch": 1.56, "learning_rate": 9.792087670534908e-06, "loss": 0.2867, "step": 1316 }, { "epoch": 1.57, "learning_rate": 9.766890495023522e-06, "loss": 0.2964, "step": 1318 }, { "epoch": 1.57, "learning_rate": 9.741694800238923e-06, "loss": 0.2466, "step": 1320 }, { "epoch": 1.57, "learning_rate": 9.716500746225802e-06, "loss": 0.2745, "step": 1322 }, { "epoch": 1.57, "learning_rate": 9.691308493018439e-06, "loss": 0.2429, "step": 1324 }, { "epoch": 1.58, "learning_rate": 9.666118200639667e-06, "loss": 0.2561, "step": 1326 }, { "epoch": 1.58, "learning_rate": 9.640930029099863e-06, "loss": 0.2462, "step": 1328 }, { "epoch": 1.58, "learning_rate": 9.615744138395941e-06, "loss": 0.2294, "step": 1330 }, { "epoch": 1.58, "learning_rate": 9.590560688510323e-06, "loss": 0.2462, "step": 1332 }, { "epoch": 1.59, "learning_rate": 9.565379839409916e-06, "loss": 0.2755, "step": 1334 }, { "epoch": 1.59, "learning_rate": 9.540201751045127e-06, "loss": 0.2623, "step": 1336 }, { "epoch": 1.59, "learning_rate": 9.515026583348811e-06, "loss": 0.3047, "step": 1338 }, { "epoch": 1.59, "learning_rate": 9.489854496235278e-06, "loss": 0.2489, "step": 1340 }, { "epoch": 1.6, "learning_rate": 9.464685649599266e-06, "loss": 0.23, "step": 1342 }, { "epoch": 1.6, "learning_rate": 9.439520203314927e-06, "loss": 0.2517, "step": 1344 }, { "epoch": 1.6, "learning_rate": 9.414358317234826e-06, "loss": 0.3041, "step": 1346 }, { "epoch": 1.6, "learning_rate": 9.3892001511889e-06, "loss": 0.2922, "step": 1348 }, { "epoch": 1.61, "learning_rate": 9.364045864983454e-06, "loss": 0.2451, "step": 1350 }, { "epoch": 1.61, "learning_rate": 9.338895618400168e-06, "loss": 0.2457, "step": 1352 }, { "epoch": 1.61, "learning_rate": 9.313749571195041e-06, "loss": 0.2488, "step": 1354 }, { "epoch": 1.61, "learning_rate": 9.28860788309741e-06, "loss": 0.2599, "step": 1356 }, { "epoch": 1.61, "learning_rate": 9.263470713808917e-06, "loss": 0.2192, "step": 1358 }, { "epoch": 1.62, "learning_rate": 9.238338223002496e-06, "loss": 0.2296, "step": 1360 }, { "epoch": 1.62, "learning_rate": 9.213210570321374e-06, "loss": 0.2907, "step": 1362 }, { "epoch": 1.62, "learning_rate": 9.188087915378037e-06, "loss": 0.2644, "step": 1364 }, { "epoch": 1.62, "learning_rate": 9.162970417753229e-06, "loss": 0.2307, "step": 1366 }, { "epoch": 1.63, "learning_rate": 9.137858236994932e-06, "loss": 0.2493, "step": 1368 }, { "epoch": 1.63, "learning_rate": 9.112751532617361e-06, "loss": 0.2546, "step": 1370 }, { "epoch": 1.63, "learning_rate": 9.087650464099937e-06, "loss": 0.2835, "step": 1372 }, { "epoch": 1.63, "learning_rate": 9.062555190886287e-06, "loss": 0.2701, "step": 1374 }, { "epoch": 1.64, "learning_rate": 9.037465872383219e-06, "loss": 0.2514, "step": 1376 }, { "epoch": 1.64, "learning_rate": 9.012382667959724e-06, "loss": 0.2423, "step": 1378 }, { "epoch": 1.64, "learning_rate": 8.987305736945955e-06, "loss": 0.2289, "step": 1380 }, { "epoch": 1.64, "learning_rate": 8.962235238632208e-06, "loss": 0.2504, "step": 1382 }, { "epoch": 1.65, "learning_rate": 8.937171332267927e-06, "loss": 0.2912, "step": 1384 }, { "epoch": 1.65, "learning_rate": 8.912114177060681e-06, "loss": 0.2368, "step": 1386 }, { "epoch": 1.65, "learning_rate": 8.887063932175156e-06, "loss": 0.2823, "step": 1388 }, { "epoch": 1.65, "learning_rate": 8.862020756732141e-06, "loss": 0.2289, "step": 1390 }, { "epoch": 1.66, "learning_rate": 8.836984809807514e-06, "loss": 0.2332, "step": 1392 }, { "epoch": 1.66, "learning_rate": 8.811956250431253e-06, "loss": 0.2627, "step": 1394 }, { "epoch": 1.66, "learning_rate": 8.786935237586394e-06, "loss": 0.2613, "step": 1396 }, { "epoch": 1.66, "learning_rate": 8.761921930208044e-06, "loss": 0.2353, "step": 1398 }, { "epoch": 1.66, "learning_rate": 8.73691648718236e-06, "loss": 0.2668, "step": 1400 }, { "epoch": 1.67, "learning_rate": 8.71191906734555e-06, "loss": 0.2292, "step": 1402 }, { "epoch": 1.67, "learning_rate": 8.686929829482862e-06, "loss": 0.2577, "step": 1404 }, { "epoch": 1.67, "learning_rate": 8.661948932327558e-06, "loss": 0.2177, "step": 1406 }, { "epoch": 1.67, "learning_rate": 8.636976534559926e-06, "loss": 0.2849, "step": 1408 }, { "epoch": 1.68, "learning_rate": 8.61201279480627e-06, "loss": 0.2575, "step": 1410 }, { "epoch": 1.68, "learning_rate": 8.587057871637891e-06, "loss": 0.267, "step": 1412 }, { "epoch": 1.68, "learning_rate": 8.562111923570091e-06, "loss": 0.2572, "step": 1414 }, { "epoch": 1.68, "learning_rate": 8.537175109061154e-06, "loss": 0.2546, "step": 1416 }, { "epoch": 1.69, "learning_rate": 8.512247586511354e-06, "loss": 0.3069, "step": 1418 }, { "epoch": 1.69, "learning_rate": 8.487329514261948e-06, "loss": 0.2915, "step": 1420 }, { "epoch": 1.69, "learning_rate": 8.46242105059415e-06, "loss": 0.3033, "step": 1422 }, { "epoch": 1.69, "learning_rate": 8.437522353728147e-06, "loss": 0.2389, "step": 1424 }, { "epoch": 1.7, "learning_rate": 8.412633581822086e-06, "loss": 0.4257, "step": 1426 }, { "epoch": 1.7, "learning_rate": 8.387754892971073e-06, "loss": 0.2206, "step": 1428 }, { "epoch": 1.7, "learning_rate": 8.36288644520616e-06, "loss": 0.2217, "step": 1430 }, { "epoch": 1.7, "learning_rate": 8.338028396493345e-06, "loss": 0.27, "step": 1432 }, { "epoch": 1.71, "learning_rate": 8.313180904732578e-06, "loss": 0.266, "step": 1434 }, { "epoch": 1.71, "learning_rate": 8.288344127756755e-06, "loss": 0.2922, "step": 1436 }, { "epoch": 1.71, "learning_rate": 8.263518223330698e-06, "loss": 0.2122, "step": 1438 }, { "epoch": 1.71, "learning_rate": 8.238703349150169e-06, "loss": 0.2822, "step": 1440 }, { "epoch": 1.71, "learning_rate": 8.213899662840871e-06, "loss": 0.2687, "step": 1442 }, { "epoch": 1.72, "learning_rate": 8.189107321957437e-06, "loss": 0.2783, "step": 1444 }, { "epoch": 1.72, "learning_rate": 8.164326483982434e-06, "loss": 0.2499, "step": 1446 }, { "epoch": 1.72, "learning_rate": 8.139557306325359e-06, "loss": 0.2408, "step": 1448 }, { "epoch": 1.72, "learning_rate": 8.114799946321647e-06, "loss": 0.2584, "step": 1450 }, { "epoch": 1.73, "learning_rate": 8.090054561231659e-06, "loss": 0.237, "step": 1452 }, { "epoch": 1.73, "learning_rate": 8.065321308239706e-06, "loss": 0.2553, "step": 1454 }, { "epoch": 1.73, "learning_rate": 8.040600344453013e-06, "loss": 0.2224, "step": 1456 }, { "epoch": 1.73, "learning_rate": 8.015891826900764e-06, "loss": 0.3115, "step": 1458 }, { "epoch": 1.74, "learning_rate": 7.99119591253307e-06, "loss": 0.223, "step": 1460 }, { "epoch": 1.74, "learning_rate": 7.966512758219991e-06, "loss": 0.2604, "step": 1462 }, { "epoch": 1.74, "learning_rate": 7.941842520750529e-06, "loss": 0.262, "step": 1464 }, { "epoch": 1.74, "learning_rate": 7.91718535683165e-06, "loss": 0.2583, "step": 1466 }, { "epoch": 1.75, "learning_rate": 7.892541423087258e-06, "loss": 0.2318, "step": 1468 }, { "epoch": 1.75, "learning_rate": 7.867910876057238e-06, "loss": 0.2489, "step": 1470 }, { "epoch": 1.75, "learning_rate": 7.843293872196425e-06, "loss": 0.2609, "step": 1472 }, { "epoch": 1.75, "learning_rate": 7.818690567873637e-06, "loss": 0.2592, "step": 1474 }, { "epoch": 1.76, "learning_rate": 7.794101119370668e-06, "loss": 0.269, "step": 1476 }, { "epoch": 1.76, "learning_rate": 7.769525682881295e-06, "loss": 0.2532, "step": 1478 }, { "epoch": 1.76, "learning_rate": 7.744964414510297e-06, "loss": 0.2223, "step": 1480 }, { "epoch": 1.76, "learning_rate": 7.720417470272455e-06, "loss": 0.2525, "step": 1482 }, { "epoch": 1.76, "learning_rate": 7.695885006091552e-06, "loss": 0.2701, "step": 1484 }, { "epoch": 1.77, "learning_rate": 7.67136717779941e-06, "loss": 0.3059, "step": 1486 }, { "epoch": 1.77, "learning_rate": 7.646864141134874e-06, "loss": 0.2591, "step": 1488 }, { "epoch": 1.77, "learning_rate": 7.622376051742824e-06, "loss": 0.2618, "step": 1490 }, { "epoch": 1.77, "learning_rate": 7.5979030651732065e-06, "loss": 0.2814, "step": 1492 }, { "epoch": 1.78, "learning_rate": 7.573445336880029e-06, "loss": 0.2389, "step": 1494 }, { "epoch": 1.78, "learning_rate": 7.549003022220374e-06, "loss": 0.2347, "step": 1496 }, { "epoch": 1.78, "learning_rate": 7.524576276453422e-06, "loss": 0.2642, "step": 1498 }, { "epoch": 1.78, "learning_rate": 7.500165254739453e-06, "loss": 0.2591, "step": 1500 }, { "epoch": 1.79, "learning_rate": 7.475770112138867e-06, "loss": 0.2591, "step": 1502 }, { "epoch": 1.79, "learning_rate": 7.4513910036112105e-06, "loss": 0.2878, "step": 1504 }, { "epoch": 1.79, "learning_rate": 7.427028084014163e-06, "loss": 0.305, "step": 1506 }, { "epoch": 1.79, "learning_rate": 7.402681508102585e-06, "loss": 0.263, "step": 1508 }, { "epoch": 1.8, "learning_rate": 7.378351430527511e-06, "loss": 0.2868, "step": 1510 }, { "epoch": 1.8, "learning_rate": 7.35403800583518e-06, "loss": 0.2299, "step": 1512 }, { "epoch": 1.8, "learning_rate": 7.329741388466056e-06, "loss": 0.2942, "step": 1514 }, { "epoch": 1.8, "learning_rate": 7.305461732753836e-06, "loss": 0.2993, "step": 1516 }, { "epoch": 1.8, "learning_rate": 7.281199192924473e-06, "loss": 0.263, "step": 1518 }, { "epoch": 1.81, "learning_rate": 7.256953923095209e-06, "loss": 0.2591, "step": 1520 }, { "epoch": 1.81, "learning_rate": 7.232726077273575e-06, "loss": 0.2612, "step": 1522 }, { "epoch": 1.81, "learning_rate": 7.208515809356434e-06, "loss": 0.2354, "step": 1524 }, { "epoch": 1.81, "learning_rate": 7.184323273128981e-06, "loss": 0.2504, "step": 1526 }, { "epoch": 1.82, "learning_rate": 7.160148622263786e-06, "loss": 0.2437, "step": 1528 }, { "epoch": 1.82, "learning_rate": 7.135992010319812e-06, "loss": 0.2154, "step": 1530 }, { "epoch": 1.82, "learning_rate": 7.123920516899151e-06, "loss": 0.2963, "step": 1532 }, { "epoch": 1.82, "learning_rate": 7.0997912510091335e-06, "loss": 0.2307, "step": 1534 }, { "epoch": 1.83, "learning_rate": 7.075680407434289e-06, "loss": 0.2826, "step": 1536 }, { "epoch": 1.83, "learning_rate": 7.051588139328276e-06, "loss": 0.2344, "step": 1538 }, { "epoch": 1.83, "learning_rate": 7.0275145997267544e-06, "loss": 0.3482, "step": 1540 }, { "epoch": 1.83, "learning_rate": 7.0034599415464135e-06, "loss": 0.2818, "step": 1542 }, { "epoch": 1.84, "learning_rate": 6.979424317584014e-06, "loss": 0.3444, "step": 1544 }, { "epoch": 1.84, "learning_rate": 6.955407880515404e-06, "loss": 0.2712, "step": 1546 }, { "epoch": 1.84, "learning_rate": 6.931410782894563e-06, "loss": 0.2794, "step": 1548 }, { "epoch": 1.84, "learning_rate": 6.907433177152618e-06, "loss": 0.2701, "step": 1550 }, { "epoch": 1.85, "learning_rate": 6.883475215596882e-06, "loss": 0.2943, "step": 1552 }, { "epoch": 1.85, "learning_rate": 6.859537050409895e-06, "loss": 0.2488, "step": 1554 }, { "epoch": 1.85, "learning_rate": 6.835618833648443e-06, "loss": 0.2451, "step": 1556 }, { "epoch": 1.85, "learning_rate": 6.8117207172425996e-06, "loss": 0.3123, "step": 1558 }, { "epoch": 1.85, "learning_rate": 6.787842852994757e-06, "loss": 0.2522, "step": 1560 }, { "epoch": 1.86, "learning_rate": 6.763985392578667e-06, "loss": 0.2766, "step": 1562 }, { "epoch": 1.86, "learning_rate": 6.740148487538476e-06, "loss": 0.2473, "step": 1564 }, { "epoch": 1.86, "learning_rate": 6.716332289287759e-06, "loss": 0.214, "step": 1566 }, { "epoch": 1.86, "learning_rate": 6.692536949108562e-06, "loss": 0.2742, "step": 1568 }, { "epoch": 1.87, "learning_rate": 6.6687626181504315e-06, "loss": 0.2348, "step": 1570 }, { "epoch": 1.87, "learning_rate": 6.64500944742948e-06, "loss": 0.2363, "step": 1572 }, { "epoch": 1.87, "learning_rate": 6.6212775878273925e-06, "loss": 0.2686, "step": 1574 }, { "epoch": 1.87, "learning_rate": 6.59756719009049e-06, "loss": 0.2852, "step": 1576 }, { "epoch": 1.88, "learning_rate": 6.5738784048287615e-06, "loss": 0.2272, "step": 1578 }, { "epoch": 1.88, "learning_rate": 6.550211382514922e-06, "loss": 0.2975, "step": 1580 }, { "epoch": 1.88, "learning_rate": 6.526566273483439e-06, "loss": 0.2563, "step": 1582 }, { "epoch": 1.88, "learning_rate": 6.502943227929586e-06, "loss": 0.2971, "step": 1584 }, { "epoch": 1.89, "learning_rate": 6.479342395908487e-06, "loss": 0.2601, "step": 1586 }, { "epoch": 1.89, "learning_rate": 6.455763927334177e-06, "loss": 0.258, "step": 1588 }, { "epoch": 1.89, "learning_rate": 6.432207971978619e-06, "loss": 0.2953, "step": 1590 }, { "epoch": 1.89, "learning_rate": 6.4086746794707795e-06, "loss": 0.2961, "step": 1592 }, { "epoch": 1.9, "learning_rate": 6.385164199295666e-06, "loss": 0.262, "step": 1594 }, { "epoch": 1.9, "learning_rate": 6.3616766807933875e-06, "loss": 0.2151, "step": 1596 }, { "epoch": 1.9, "learning_rate": 6.338212273158188e-06, "loss": 0.2525, "step": 1598 }, { "epoch": 1.9, "learning_rate": 6.314771125437517e-06, "loss": 0.2393, "step": 1600 }, { "epoch": 1.9, "learning_rate": 6.291353386531074e-06, "loss": 0.2758, "step": 1602 }, { "epoch": 1.91, "learning_rate": 6.2679592051898685e-06, "loss": 0.2312, "step": 1604 }, { "epoch": 1.91, "learning_rate": 6.244588730015264e-06, "loss": 0.2428, "step": 1606 }, { "epoch": 1.91, "learning_rate": 6.221242109458043e-06, "loss": 0.2551, "step": 1608 }, { "epoch": 1.91, "learning_rate": 6.197919491817459e-06, "loss": 0.2715, "step": 1610 }, { "epoch": 1.92, "learning_rate": 6.174621025240307e-06, "loss": 0.2359, "step": 1612 }, { "epoch": 1.92, "learning_rate": 6.151346857719964e-06, "loss": 0.2671, "step": 1614 }, { "epoch": 1.92, "learning_rate": 6.128097137095458e-06, "loss": 0.1967, "step": 1616 }, { "epoch": 1.92, "learning_rate": 6.104872011050534e-06, "loss": 0.2563, "step": 1618 }, { "epoch": 1.93, "learning_rate": 6.081671627112704e-06, "loss": 0.2613, "step": 1620 }, { "epoch": 1.93, "learning_rate": 6.0584961326523285e-06, "loss": 0.2962, "step": 1622 }, { "epoch": 1.93, "learning_rate": 6.0353456748816545e-06, "loss": 0.2087, "step": 1624 }, { "epoch": 1.93, "learning_rate": 6.012220400853899e-06, "loss": 0.2902, "step": 1626 }, { "epoch": 1.94, "learning_rate": 5.989120457462314e-06, "loss": 0.2565, "step": 1628 }, { "epoch": 1.94, "learning_rate": 5.9660459914392465e-06, "loss": 0.243, "step": 1630 }, { "epoch": 1.94, "learning_rate": 5.942997149355208e-06, "loss": 0.2895, "step": 1632 }, { "epoch": 1.94, "learning_rate": 5.9199740776179494e-06, "loss": 0.3008, "step": 1634 }, { "epoch": 1.95, "learning_rate": 5.89697692247152e-06, "loss": 0.2124, "step": 1636 }, { "epoch": 1.95, "learning_rate": 5.874005829995358e-06, "loss": 0.2094, "step": 1638 }, { "epoch": 1.95, "learning_rate": 5.851060946103334e-06, "loss": 0.2355, "step": 1640 }, { "epoch": 1.95, "learning_rate": 5.828142416542852e-06, "loss": 0.304, "step": 1642 }, { "epoch": 1.95, "learning_rate": 5.8052503868939005e-06, "loss": 0.213, "step": 1644 }, { "epoch": 1.96, "learning_rate": 5.782385002568153e-06, "loss": 0.2471, "step": 1646 }, { "epoch": 1.96, "learning_rate": 5.759546408808019e-06, "loss": 0.2432, "step": 1648 }, { "epoch": 1.96, "learning_rate": 5.736734750685737e-06, "loss": 0.2565, "step": 1650 }, { "epoch": 1.96, "learning_rate": 5.713950173102441e-06, "loss": 0.2502, "step": 1652 }, { "epoch": 1.97, "learning_rate": 5.691192820787266e-06, "loss": 0.2105, "step": 1654 }, { "epoch": 1.97, "learning_rate": 5.6684628382963905e-06, "loss": 0.2437, "step": 1656 }, { "epoch": 1.97, "learning_rate": 5.645760370012149e-06, "loss": 0.2149, "step": 1658 }, { "epoch": 1.97, "learning_rate": 5.623085560142099e-06, "loss": 0.29, "step": 1660 }, { "epoch": 1.98, "learning_rate": 5.60043855271811e-06, "loss": 0.2718, "step": 1662 }, { "epoch": 1.98, "learning_rate": 5.577819491595457e-06, "loss": 0.2147, "step": 1664 }, { "epoch": 1.98, "learning_rate": 5.555228520451891e-06, "loss": 0.1925, "step": 1666 }, { "epoch": 1.98, "learning_rate": 5.53266578278673e-06, "loss": 0.2404, "step": 1668 }, { "epoch": 1.99, "learning_rate": 5.510131421919955e-06, "loss": 0.2405, "step": 1670 }, { "epoch": 1.99, "learning_rate": 5.487625580991303e-06, "loss": 0.1999, "step": 1672 }, { "epoch": 1.99, "learning_rate": 5.465148402959339e-06, "loss": 0.2185, "step": 1674 }, { "epoch": 1.99, "learning_rate": 5.442700030600565e-06, "loss": 0.3006, "step": 1676 }, { "epoch": 2.0, "learning_rate": 5.420280606508503e-06, "loss": 0.2406, "step": 1678 }, { "epoch": 2.0, "learning_rate": 5.397890273092807e-06, "loss": 0.2317, "step": 1680 }, { "epoch": 2.0, "learning_rate": 5.375529172578329e-06, "loss": 0.2616, "step": 1682 }, { "epoch": 2.0, "learning_rate": 5.353197447004239e-06, "loss": 0.1408, "step": 1684 }, { "epoch": 2.0, "learning_rate": 5.33089523822311e-06, "loss": 0.1778, "step": 1686 }, { "epoch": 2.01, "learning_rate": 5.308622687900038e-06, "loss": 0.1207, "step": 1688 }, { "epoch": 2.01, "learning_rate": 5.286379937511707e-06, "loss": 0.1327, "step": 1690 }, { "epoch": 2.01, "learning_rate": 5.264167128345523e-06, "loss": 0.125, "step": 1692 }, { "epoch": 2.01, "learning_rate": 5.241984401498693e-06, "loss": 0.17, "step": 1694 }, { "epoch": 2.02, "learning_rate": 5.219831897877353e-06, "loss": 0.159, "step": 1696 }, { "epoch": 2.02, "learning_rate": 5.197709758195648e-06, "loss": 0.1156, "step": 1698 }, { "epoch": 2.02, "learning_rate": 5.175618122974851e-06, "loss": 0.1356, "step": 1700 }, { "epoch": 2.02, "learning_rate": 5.153557132542473e-06, "loss": 0.1645, "step": 1702 }, { "epoch": 2.03, "learning_rate": 5.131526927031356e-06, "loss": 0.1626, "step": 1704 }, { "epoch": 2.03, "learning_rate": 5.109527646378815e-06, "loss": 0.1278, "step": 1706 }, { "epoch": 2.03, "learning_rate": 5.087559430325708e-06, "loss": 0.136, "step": 1708 }, { "epoch": 2.03, "learning_rate": 5.0656224184155764e-06, "loss": 0.1141, "step": 1710 }, { "epoch": 2.04, "learning_rate": 5.043716749993757e-06, "loss": 0.1444, "step": 1712 }, { "epoch": 2.04, "learning_rate": 5.02184256420648e-06, "loss": 0.1151, "step": 1714 }, { "epoch": 2.04, "learning_rate": 5.000000000000003e-06, "loss": 0.1474, "step": 1716 }, { "epoch": 2.04, "learning_rate": 4.978189196119716e-06, "loss": 0.121, "step": 1718 }, { "epoch": 2.05, "learning_rate": 4.9564102911092646e-06, "loss": 0.1284, "step": 1720 }, { "epoch": 2.05, "learning_rate": 4.934663423309685e-06, "loss": 0.153, "step": 1722 }, { "epoch": 2.05, "learning_rate": 4.912948730858492e-06, "loss": 0.145, "step": 1724 }, { "epoch": 2.05, "learning_rate": 4.891266351688829e-06, "loss": 0.1167, "step": 1726 }, { "epoch": 2.05, "learning_rate": 4.869616423528588e-06, "loss": 0.1338, "step": 1728 }, { "epoch": 2.06, "learning_rate": 4.847999083899522e-06, "loss": 0.1208, "step": 1730 }, { "epoch": 2.06, "learning_rate": 4.826414470116382e-06, "loss": 0.1403, "step": 1732 }, { "epoch": 2.06, "learning_rate": 4.804862719286044e-06, "loss": 0.1463, "step": 1734 }, { "epoch": 2.06, "learning_rate": 4.783343968306631e-06, "loss": 0.1276, "step": 1736 }, { "epoch": 2.07, "learning_rate": 4.7618583538666605e-06, "loss": 0.1242, "step": 1738 }, { "epoch": 2.07, "learning_rate": 4.740406012444153e-06, "loss": 0.1402, "step": 1740 }, { "epoch": 2.07, "learning_rate": 4.718987080305778e-06, "loss": 0.3846, "step": 1742 }, { "epoch": 2.07, "learning_rate": 4.697601693505996e-06, "loss": 0.161, "step": 1744 }, { "epoch": 2.08, "learning_rate": 4.6762499878861764e-06, "loss": 0.1102, "step": 1746 }, { "epoch": 2.08, "learning_rate": 4.654932099073746e-06, "loss": 0.1343, "step": 1748 }, { "epoch": 2.08, "learning_rate": 4.633648162481326e-06, "loss": 0.146, "step": 1750 }, { "epoch": 2.08, "learning_rate": 4.612398313305867e-06, "loss": 0.1533, "step": 1752 }, { "epoch": 2.09, "learning_rate": 4.5911826865277975e-06, "loss": 0.1346, "step": 1754 }, { "epoch": 2.09, "learning_rate": 4.570001416910168e-06, "loss": 0.1579, "step": 1756 }, { "epoch": 2.09, "learning_rate": 4.548854638997778e-06, "loss": 0.1382, "step": 1758 }, { "epoch": 2.09, "learning_rate": 4.527742487116349e-06, "loss": 0.133, "step": 1760 }, { "epoch": 2.1, "learning_rate": 4.506665095371642e-06, "loss": 0.121, "step": 1762 }, { "epoch": 2.1, "learning_rate": 4.485622597648624e-06, "loss": 0.1225, "step": 1764 }, { "epoch": 2.1, "learning_rate": 4.464615127610615e-06, "loss": 0.1139, "step": 1766 }, { "epoch": 2.1, "learning_rate": 4.443642818698434e-06, "loss": 0.1329, "step": 1768 }, { "epoch": 2.1, "learning_rate": 4.4227058041295515e-06, "loss": 0.1131, "step": 1770 }, { "epoch": 2.11, "learning_rate": 4.401804216897258e-06, "loss": 0.1301, "step": 1772 }, { "epoch": 2.11, "learning_rate": 4.380938189769791e-06, "loss": 0.1443, "step": 1774 }, { "epoch": 2.11, "learning_rate": 4.3601078552895245e-06, "loss": 0.1306, "step": 1776 }, { "epoch": 2.11, "learning_rate": 4.339313345772098e-06, "loss": 0.1501, "step": 1778 }, { "epoch": 2.12, "learning_rate": 4.318554793305592e-06, "loss": 0.1076, "step": 1780 }, { "epoch": 2.12, "learning_rate": 4.297832329749687e-06, "loss": 0.1671, "step": 1782 }, { "epoch": 2.12, "learning_rate": 4.277146086734823e-06, "loss": 0.1364, "step": 1784 }, { "epoch": 2.12, "learning_rate": 4.2564961956613605e-06, "loss": 0.1347, "step": 1786 }, { "epoch": 2.13, "learning_rate": 4.235882787698763e-06, "loss": 0.1667, "step": 1788 }, { "epoch": 2.13, "learning_rate": 4.2153059937847355e-06, "loss": 0.1366, "step": 1790 }, { "epoch": 2.13, "learning_rate": 4.194765944624423e-06, "loss": 0.1142, "step": 1792 }, { "epoch": 2.13, "learning_rate": 4.174262770689552e-06, "loss": 0.1188, "step": 1794 }, { "epoch": 2.14, "learning_rate": 4.153796602217623e-06, "loss": 0.1068, "step": 1796 }, { "epoch": 2.14, "learning_rate": 4.133367569211074e-06, "loss": 0.1359, "step": 1798 }, { "epoch": 2.14, "learning_rate": 4.112975801436454e-06, "loss": 0.1584, "step": 1800 }, { "epoch": 2.14, "learning_rate": 4.092621428423601e-06, "loss": 0.1308, "step": 1802 }, { "epoch": 2.15, "learning_rate": 4.07230457946482e-06, "loss": 0.1324, "step": 1804 }, { "epoch": 2.15, "learning_rate": 4.052025383614061e-06, "loss": 0.1339, "step": 1806 }, { "epoch": 2.15, "learning_rate": 4.031783969686105e-06, "loss": 0.1178, "step": 1808 }, { "epoch": 2.15, "learning_rate": 4.011580466255729e-06, "loss": 0.127, "step": 1810 }, { "epoch": 2.15, "learning_rate": 3.991415001656906e-06, "loss": 0.1334, "step": 1812 }, { "epoch": 2.16, "learning_rate": 3.971287703981982e-06, "loss": 0.1201, "step": 1814 }, { "epoch": 2.16, "learning_rate": 3.9511987010808635e-06, "loss": 0.1435, "step": 1816 }, { "epoch": 2.16, "learning_rate": 3.931148120560211e-06, "loss": 0.1394, "step": 1818 }, { "epoch": 2.16, "learning_rate": 3.911136089782613e-06, "loss": 0.1229, "step": 1820 }, { "epoch": 2.17, "learning_rate": 3.8911627358658e-06, "loss": 0.123, "step": 1822 }, { "epoch": 2.17, "learning_rate": 3.871228185681822e-06, "loss": 0.1433, "step": 1824 }, { "epoch": 2.17, "learning_rate": 3.8513325658562395e-06, "loss": 0.1238, "step": 1826 }, { "epoch": 2.17, "learning_rate": 3.831476002767327e-06, "loss": 0.1456, "step": 1828 }, { "epoch": 2.18, "learning_rate": 3.811658622545268e-06, "loss": 0.1478, "step": 1830 }, { "epoch": 2.18, "learning_rate": 3.7918805510713553e-06, "loss": 0.1269, "step": 1832 }, { "epoch": 2.18, "learning_rate": 3.7721419139771886e-06, "loss": 0.1379, "step": 1834 }, { "epoch": 2.18, "learning_rate": 3.7524428366438757e-06, "loss": 0.1365, "step": 1836 }, { "epoch": 2.19, "learning_rate": 3.7327834442012433e-06, "loss": 0.1263, "step": 1838 }, { "epoch": 2.19, "learning_rate": 3.7131638615270404e-06, "loss": 0.1055, "step": 1840 }, { "epoch": 2.19, "learning_rate": 3.6935842132461307e-06, "loss": 0.1053, "step": 1842 }, { "epoch": 2.19, "learning_rate": 3.6740446237297177e-06, "loss": 0.1259, "step": 1844 }, { "epoch": 2.2, "learning_rate": 3.6545452170945496e-06, "loss": 0.1428, "step": 1846 }, { "epoch": 2.2, "learning_rate": 3.635086117202128e-06, "loss": 0.1375, "step": 1848 }, { "epoch": 2.2, "learning_rate": 3.6156674476579266e-06, "loss": 0.1469, "step": 1850 }, { "epoch": 2.2, "learning_rate": 3.5962893318105963e-06, "loss": 0.123, "step": 1852 }, { "epoch": 2.2, "learning_rate": 3.576951892751197e-06, "loss": 0.128, "step": 1854 }, { "epoch": 2.21, "learning_rate": 3.5576552533124074e-06, "loss": 0.1403, "step": 1856 }, { "epoch": 2.21, "learning_rate": 3.538399536067736e-06, "loss": 0.1392, "step": 1858 }, { "epoch": 2.21, "learning_rate": 3.5191848633307545e-06, "loss": 0.1259, "step": 1860 }, { "epoch": 2.21, "learning_rate": 3.5000113571543183e-06, "loss": 0.1252, "step": 1862 }, { "epoch": 2.22, "learning_rate": 3.480879139329789e-06, "loss": 0.1641, "step": 1864 }, { "epoch": 2.22, "learning_rate": 3.4617883313862633e-06, "loss": 0.1396, "step": 1866 }, { "epoch": 2.22, "learning_rate": 3.4427390545897955e-06, "loss": 0.1284, "step": 1868 }, { "epoch": 2.22, "learning_rate": 3.423731429942636e-06, "loss": 0.1255, "step": 1870 }, { "epoch": 2.23, "learning_rate": 3.4047655781824605e-06, "loss": 0.1381, "step": 1872 }, { "epoch": 2.23, "learning_rate": 3.3858416197815947e-06, "loss": 0.1587, "step": 1874 }, { "epoch": 2.23, "learning_rate": 3.3669596749462562e-06, "loss": 0.1148, "step": 1876 }, { "epoch": 2.23, "learning_rate": 3.3481198636157908e-06, "loss": 0.1187, "step": 1878 }, { "epoch": 2.24, "learning_rate": 3.3293223054619073e-06, "loss": 0.1328, "step": 1880 }, { "epoch": 2.24, "learning_rate": 3.3105671198879243e-06, "loss": 0.1166, "step": 1882 }, { "epoch": 2.24, "learning_rate": 3.2918544260279985e-06, "loss": 0.133, "step": 1884 }, { "epoch": 2.24, "learning_rate": 3.2731843427463894e-06, "loss": 0.127, "step": 1886 }, { "epoch": 2.24, "learning_rate": 3.254556988636678e-06, "loss": 0.1678, "step": 1888 }, { "epoch": 2.25, "learning_rate": 3.2359724820210394e-06, "loss": 0.1156, "step": 1890 }, { "epoch": 2.25, "learning_rate": 3.2174309409494675e-06, "loss": 0.1384, "step": 1892 }, { "epoch": 2.25, "learning_rate": 3.198932483199041e-06, "loss": 0.1324, "step": 1894 }, { "epoch": 2.25, "learning_rate": 3.180477226273172e-06, "loss": 0.1498, "step": 1896 }, { "epoch": 2.26, "learning_rate": 3.162065287400855e-06, "loss": 0.1482, "step": 1898 }, { "epoch": 2.26, "learning_rate": 3.1436967835359245e-06, "loss": 0.1001, "step": 1900 }, { "epoch": 2.26, "learning_rate": 3.1253718313563207e-06, "loss": 0.1328, "step": 1902 }, { "epoch": 2.26, "learning_rate": 3.1070905472633307e-06, "loss": 0.1343, "step": 1904 }, { "epoch": 2.27, "learning_rate": 3.0888530473808677e-06, "loss": 0.1721, "step": 1906 }, { "epoch": 2.27, "learning_rate": 3.070659447554719e-06, "loss": 0.1211, "step": 1908 }, { "epoch": 2.27, "learning_rate": 3.052509863351818e-06, "loss": 0.1267, "step": 1910 }, { "epoch": 2.27, "learning_rate": 3.0344044100595073e-06, "loss": 0.1257, "step": 1912 }, { "epoch": 2.28, "learning_rate": 3.016343202684807e-06, "loss": 0.1769, "step": 1914 }, { "epoch": 2.28, "learning_rate": 2.9983263559536813e-06, "loss": 0.1398, "step": 1916 }, { "epoch": 2.28, "learning_rate": 2.9803539843103226e-06, "loss": 0.0818, "step": 1918 }, { "epoch": 2.28, "learning_rate": 2.962426201916402e-06, "loss": 0.1552, "step": 1920 }, { "epoch": 2.29, "learning_rate": 2.9445431226503683e-06, "loss": 0.1296, "step": 1922 }, { "epoch": 2.29, "learning_rate": 2.926704860106706e-06, "loss": 0.1082, "step": 1924 }, { "epoch": 2.29, "learning_rate": 2.9089115275952217e-06, "loss": 0.143, "step": 1926 }, { "epoch": 2.29, "learning_rate": 2.891163238140323e-06, "loss": 0.1019, "step": 1928 }, { "epoch": 2.29, "learning_rate": 2.8734601044803056e-06, "loss": 0.1256, "step": 1930 }, { "epoch": 2.3, "learning_rate": 2.855802239066623e-06, "loss": 0.1159, "step": 1932 }, { "epoch": 2.3, "learning_rate": 2.8381897540631964e-06, "loss": 0.1187, "step": 1934 }, { "epoch": 2.3, "learning_rate": 2.820622761345676e-06, "loss": 0.1375, "step": 1936 }, { "epoch": 2.3, "learning_rate": 2.8031013725007415e-06, "loss": 0.1305, "step": 1938 }, { "epoch": 2.31, "learning_rate": 2.785625698825406e-06, "loss": 0.1443, "step": 1940 }, { "epoch": 2.31, "learning_rate": 2.768195851326285e-06, "loss": 0.1351, "step": 1942 }, { "epoch": 2.31, "learning_rate": 2.750811940718906e-06, "loss": 0.1378, "step": 1944 }, { "epoch": 2.31, "learning_rate": 2.733474077427004e-06, "loss": 0.0981, "step": 1946 }, { "epoch": 2.32, "learning_rate": 2.716182371581814e-06, "loss": 0.146, "step": 1948 }, { "epoch": 2.32, "learning_rate": 2.6989369330213865e-06, "loss": 0.1286, "step": 1950 }, { "epoch": 2.32, "learning_rate": 2.681737871289869e-06, "loss": 0.1551, "step": 1952 }, { "epoch": 2.32, "learning_rate": 2.6645852956368214e-06, "loss": 0.1166, "step": 1954 }, { "epoch": 2.33, "learning_rate": 2.647479315016528e-06, "loss": 0.1181, "step": 1956 }, { "epoch": 2.33, "learning_rate": 2.6304200380872913e-06, "loss": 0.1341, "step": 1958 }, { "epoch": 2.33, "learning_rate": 2.61340757321075e-06, "loss": 0.1196, "step": 1960 }, { "epoch": 2.33, "learning_rate": 2.596442028451194e-06, "loss": 0.1364, "step": 1962 }, { "epoch": 2.34, "learning_rate": 2.579523511574864e-06, "loss": 0.1209, "step": 1964 }, { "epoch": 2.34, "learning_rate": 2.56265213004929e-06, "loss": 0.1174, "step": 1966 }, { "epoch": 2.34, "learning_rate": 2.5458279910425865e-06, "loss": 0.1383, "step": 1968 }, { "epoch": 2.34, "learning_rate": 2.5290512014227774e-06, "loss": 0.1044, "step": 1970 }, { "epoch": 2.34, "learning_rate": 2.5123218677571313e-06, "loss": 0.1163, "step": 1972 }, { "epoch": 2.35, "learning_rate": 2.4956400963114647e-06, "loss": 0.137, "step": 1974 }, { "epoch": 2.35, "learning_rate": 2.479005993049478e-06, "loss": 0.1591, "step": 1976 }, { "epoch": 2.35, "learning_rate": 2.4624196636320795e-06, "loss": 0.137, "step": 1978 }, { "epoch": 2.35, "learning_rate": 2.445881213416713e-06, "loss": 0.1583, "step": 1980 }, { "epoch": 2.36, "learning_rate": 2.429390747456699e-06, "loss": 0.1252, "step": 1982 }, { "epoch": 2.36, "learning_rate": 2.412948370500551e-06, "loss": 0.1552, "step": 1984 }, { "epoch": 2.36, "learning_rate": 2.3965541869913188e-06, "loss": 0.1481, "step": 1986 }, { "epoch": 2.36, "learning_rate": 2.3802083010659238e-06, "loss": 0.1243, "step": 1988 }, { "epoch": 2.37, "learning_rate": 2.3639108165545057e-06, "loss": 0.1273, "step": 1990 }, { "epoch": 2.37, "learning_rate": 2.3476618369797457e-06, "loss": 0.1403, "step": 1992 }, { "epoch": 2.37, "learning_rate": 2.331461465556222e-06, "loss": 0.1391, "step": 1994 }, { "epoch": 2.37, "learning_rate": 2.315309805189748e-06, "loss": 0.1376, "step": 1996 }, { "epoch": 2.38, "learning_rate": 2.299206958476731e-06, "loss": 0.1253, "step": 1998 }, { "epoch": 2.38, "learning_rate": 2.2831530277034985e-06, "loss": 0.131, "step": 2000 }, { "epoch": 2.38, "learning_rate": 2.2671481148456685e-06, "loss": 0.1377, "step": 2002 }, { "epoch": 2.38, "learning_rate": 2.251192321567488e-06, "loss": 0.1077, "step": 2004 }, { "epoch": 2.39, "learning_rate": 2.235285749221201e-06, "loss": 0.1253, "step": 2006 }, { "epoch": 2.39, "learning_rate": 2.219428498846393e-06, "loss": 0.1271, "step": 2008 }, { "epoch": 2.39, "learning_rate": 2.2036206711693508e-06, "loss": 0.1449, "step": 2010 }, { "epoch": 2.39, "learning_rate": 2.1878623666024233e-06, "loss": 0.1024, "step": 2012 }, { "epoch": 2.39, "learning_rate": 2.1721536852433976e-06, "loss": 0.1141, "step": 2014 }, { "epoch": 2.4, "learning_rate": 2.1564947268748382e-06, "loss": 0.1023, "step": 2016 }, { "epoch": 2.4, "learning_rate": 2.1408855909634696e-06, "loss": 0.1113, "step": 2018 }, { "epoch": 2.4, "learning_rate": 2.125326376659539e-06, "loss": 0.1467, "step": 2020 }, { "epoch": 2.4, "learning_rate": 2.1098171827961965e-06, "loss": 0.1194, "step": 2022 }, { "epoch": 2.41, "learning_rate": 2.094358107888852e-06, "loss": 0.1676, "step": 2024 }, { "epoch": 2.41, "learning_rate": 2.0789492501345553e-06, "loss": 0.1367, "step": 2026 }, { "epoch": 2.41, "learning_rate": 2.0635907074113737e-06, "loss": 0.1231, "step": 2028 }, { "epoch": 2.41, "learning_rate": 2.0482825772777804e-06, "loss": 0.1626, "step": 2030 }, { "epoch": 2.42, "learning_rate": 2.0330249569720116e-06, "loss": 0.1117, "step": 2032 }, { "epoch": 2.42, "learning_rate": 2.0178179434114674e-06, "loss": 0.1303, "step": 2034 }, { "epoch": 2.42, "learning_rate": 2.00266163319209e-06, "loss": 0.1336, "step": 2036 }, { "epoch": 2.42, "learning_rate": 1.9875561225877482e-06, "loss": 0.1143, "step": 2038 }, { "epoch": 2.43, "learning_rate": 1.972501507549637e-06, "loss": 0.1449, "step": 2040 }, { "epoch": 2.43, "learning_rate": 1.957497883705649e-06, "loss": 0.1331, "step": 2042 }, { "epoch": 2.43, "learning_rate": 1.9425453463597798e-06, "loss": 0.0957, "step": 2044 }, { "epoch": 2.43, "learning_rate": 1.927643990491528e-06, "loss": 0.1182, "step": 2046 }, { "epoch": 2.44, "learning_rate": 1.912793910755275e-06, "loss": 0.1394, "step": 2048 }, { "epoch": 2.44, "learning_rate": 1.8979952014796954e-06, "loss": 0.1155, "step": 2050 }, { "epoch": 2.44, "learning_rate": 1.883247956667157e-06, "loss": 0.1681, "step": 2052 }, { "epoch": 2.44, "learning_rate": 1.8685522699931169e-06, "loss": 0.1517, "step": 2054 }, { "epoch": 2.44, "learning_rate": 1.8539082348055427e-06, "loss": 0.1491, "step": 2056 }, { "epoch": 2.45, "learning_rate": 1.839315944124298e-06, "loss": 0.1276, "step": 2058 }, { "epoch": 2.45, "learning_rate": 1.8247754906405624e-06, "loss": 0.1343, "step": 2060 }, { "epoch": 2.45, "learning_rate": 1.8102869667162494e-06, "loss": 0.1477, "step": 2062 }, { "epoch": 2.45, "learning_rate": 1.7958504643834062e-06, "loss": 0.1584, "step": 2064 }, { "epoch": 2.46, "learning_rate": 1.7814660753436386e-06, "loss": 0.1316, "step": 2066 }, { "epoch": 2.46, "learning_rate": 1.7671338909675218e-06, "loss": 0.1373, "step": 2068 }, { "epoch": 2.46, "learning_rate": 1.7528540022940288e-06, "loss": 0.131, "step": 2070 }, { "epoch": 2.46, "learning_rate": 1.7386265000299385e-06, "loss": 0.1206, "step": 2072 }, { "epoch": 2.47, "learning_rate": 1.7244514745492813e-06, "loss": 0.117, "step": 2074 }, { "epoch": 2.47, "learning_rate": 1.71032901589274e-06, "loss": 0.1368, "step": 2076 }, { "epoch": 2.47, "learning_rate": 1.6962592137670897e-06, "loss": 0.1176, "step": 2078 }, { "epoch": 2.47, "learning_rate": 1.6822421575446378e-06, "loss": 0.1501, "step": 2080 }, { "epoch": 2.48, "learning_rate": 1.6682779362626378e-06, "loss": 0.1326, "step": 2082 }, { "epoch": 2.48, "learning_rate": 1.6543666386227343e-06, "loss": 0.1357, "step": 2084 }, { "epoch": 2.48, "learning_rate": 1.6405083529903954e-06, "loss": 0.1039, "step": 2086 }, { "epoch": 2.48, "learning_rate": 1.6267031673943546e-06, "loss": 0.1407, "step": 2088 }, { "epoch": 2.49, "learning_rate": 1.6129511695260558e-06, "loss": 0.1312, "step": 2090 }, { "epoch": 2.49, "learning_rate": 1.5992524467390858e-06, "loss": 0.1198, "step": 2092 }, { "epoch": 2.49, "learning_rate": 1.5856070860486205e-06, "loss": 0.1091, "step": 2094 }, { "epoch": 2.49, "learning_rate": 1.5720151741308875e-06, "loss": 0.119, "step": 2096 }, { "epoch": 2.49, "learning_rate": 1.5584767973225967e-06, "loss": 0.1316, "step": 2098 }, { "epoch": 2.5, "learning_rate": 1.544992041620398e-06, "loss": 0.1108, "step": 2100 }, { "epoch": 2.5, "learning_rate": 1.531560992680341e-06, "loss": 0.1267, "step": 2102 }, { "epoch": 2.5, "learning_rate": 1.5181837358173223e-06, "loss": 0.1292, "step": 2104 }, { "epoch": 2.5, "learning_rate": 1.5048603560045549e-06, "loss": 0.124, "step": 2106 }, { "epoch": 2.51, "learning_rate": 1.4915909378730143e-06, "loss": 0.1466, "step": 2108 }, { "epoch": 2.51, "learning_rate": 1.4783755657109079e-06, "loss": 0.103, "step": 2110 }, { "epoch": 2.51, "learning_rate": 1.4652143234631465e-06, "loss": 0.1478, "step": 2112 }, { "epoch": 2.51, "learning_rate": 1.4521072947307957e-06, "loss": 0.1196, "step": 2114 }, { "epoch": 2.52, "learning_rate": 1.4390545627705588e-06, "loss": 0.1203, "step": 2116 }, { "epoch": 2.52, "learning_rate": 1.426056210494241e-06, "loss": 0.125, "step": 2118 }, { "epoch": 2.52, "learning_rate": 1.413112320468223e-06, "loss": 0.1612, "step": 2120 }, { "epoch": 2.52, "learning_rate": 1.400222974912936e-06, "loss": 0.1226, "step": 2122 }, { "epoch": 2.53, "learning_rate": 1.3873882557023488e-06, "loss": 0.1304, "step": 2124 }, { "epoch": 2.53, "learning_rate": 1.3746082443634311e-06, "loss": 0.1172, "step": 2126 }, { "epoch": 2.53, "learning_rate": 1.361883022075653e-06, "loss": 0.1441, "step": 2128 }, { "epoch": 2.53, "learning_rate": 1.3492126696704544e-06, "loss": 0.1232, "step": 2130 }, { "epoch": 2.54, "learning_rate": 1.3365972676307403e-06, "loss": 0.1127, "step": 2132 }, { "epoch": 2.54, "learning_rate": 1.3240368960903671e-06, "loss": 0.1298, "step": 2134 }, { "epoch": 2.54, "learning_rate": 1.3115316348336348e-06, "loss": 0.1358, "step": 2136 }, { "epoch": 2.54, "learning_rate": 1.2990815632947763e-06, "loss": 0.1689, "step": 2138 }, { "epoch": 2.54, "learning_rate": 1.2866867605574628e-06, "loss": 0.1101, "step": 2140 }, { "epoch": 2.55, "learning_rate": 1.2743473053542842e-06, "loss": 0.1308, "step": 2142 }, { "epoch": 2.55, "learning_rate": 1.262063276066272e-06, "loss": 0.1472, "step": 2144 }, { "epoch": 2.55, "learning_rate": 1.2498347507223763e-06, "loss": 0.1298, "step": 2146 }, { "epoch": 2.55, "learning_rate": 1.237661806998991e-06, "loss": 0.1323, "step": 2148 }, { "epoch": 2.56, "learning_rate": 1.2255445222194462e-06, "loss": 0.0947, "step": 2150 }, { "epoch": 2.56, "learning_rate": 1.2134829733535269e-06, "loss": 0.1199, "step": 2152 }, { "epoch": 2.56, "learning_rate": 1.2014772370169747e-06, "loss": 0.1284, "step": 2154 }, { "epoch": 2.56, "learning_rate": 1.1895273894710157e-06, "loss": 0.1323, "step": 2156 }, { "epoch": 2.57, "learning_rate": 1.177633506621857e-06, "loss": 0.1188, "step": 2158 }, { "epoch": 2.57, "learning_rate": 1.1657956640202217e-06, "loss": 0.1448, "step": 2160 }, { "epoch": 2.57, "learning_rate": 1.1540139368608572e-06, "loss": 0.1819, "step": 2162 }, { "epoch": 2.57, "learning_rate": 1.142288399982061e-06, "loss": 0.2198, "step": 2164 }, { "epoch": 2.58, "learning_rate": 1.1306191278652112e-06, "loss": 0.1341, "step": 2166 }, { "epoch": 2.58, "learning_rate": 1.1190061946342835e-06, "loss": 0.2365, "step": 2168 }, { "epoch": 2.58, "learning_rate": 1.1074496740553853e-06, "loss": 0.1264, "step": 2170 }, { "epoch": 2.58, "learning_rate": 1.0959496395362946e-06, "loss": 0.1328, "step": 2172 }, { "epoch": 2.59, "learning_rate": 1.0845061641259757e-06, "loss": 0.1076, "step": 2174 }, { "epoch": 2.59, "learning_rate": 1.0731193205141354e-06, "loss": 0.1372, "step": 2176 }, { "epoch": 2.59, "learning_rate": 1.0617891810307458e-06, "loss": 0.1536, "step": 2178 }, { "epoch": 2.59, "learning_rate": 1.050515817645591e-06, "loss": 0.1243, "step": 2180 }, { "epoch": 2.59, "learning_rate": 1.039299301967811e-06, "loss": 0.2138, "step": 2182 }, { "epoch": 2.6, "learning_rate": 1.0281397052454457e-06, "loss": 0.1357, "step": 2184 }, { "epoch": 2.6, "learning_rate": 1.0170370983649792e-06, "loss": 0.1623, "step": 2186 }, { "epoch": 2.6, "learning_rate": 1.005991551850899e-06, "loss": 0.1314, "step": 2188 }, { "epoch": 2.6, "learning_rate": 9.950031358652313e-07, "loss": 0.1163, "step": 2190 }, { "epoch": 2.61, "learning_rate": 9.84071920207118e-07, "loss": 0.1045, "step": 2192 }, { "epoch": 2.61, "learning_rate": 9.73197974312351e-07, "loss": 0.1449, "step": 2194 }, { "epoch": 2.61, "learning_rate": 9.623813672529437e-07, "loss": 0.1287, "step": 2196 }, { "epoch": 2.61, "learning_rate": 9.516221677366888e-07, "loss": 0.1193, "step": 2198 }, { "epoch": 2.62, "learning_rate": 9.409204441067254e-07, "loss": 0.1306, "step": 2200 }, { "epoch": 2.62, "learning_rate": 9.302762643411e-07, "loss": 0.1151, "step": 2202 }, { "epoch": 2.62, "learning_rate": 9.196896960523349e-07, "loss": 0.1287, "step": 2204 }, { "epoch": 2.62, "learning_rate": 9.091608064870028e-07, "loss": 0.099, "step": 2206 }, { "epoch": 2.63, "learning_rate": 8.986896625253006e-07, "loss": 0.1151, "step": 2208 }, { "epoch": 2.63, "learning_rate": 8.882763306806163e-07, "loss": 0.1466, "step": 2210 }, { "epoch": 2.63, "learning_rate": 8.779208770991121e-07, "loss": 0.1133, "step": 2212 }, { "epoch": 2.63, "learning_rate": 8.676233675593038e-07, "loss": 0.157, "step": 2214 }, { "epoch": 2.63, "learning_rate": 8.573838674716461e-07, "loss": 0.1184, "step": 2216 }, { "epoch": 2.64, "learning_rate": 8.472024418781099e-07, "loss": 0.125, "step": 2218 }, { "epoch": 2.64, "learning_rate": 8.370791554517743e-07, "loss": 0.146, "step": 2220 }, { "epoch": 2.64, "learning_rate": 8.270140724964159e-07, "loss": 0.0981, "step": 2222 }, { "epoch": 2.64, "learning_rate": 8.170072569460996e-07, "loss": 0.1288, "step": 2224 }, { "epoch": 2.65, "learning_rate": 8.070587723647705e-07, "loss": 0.1714, "step": 2226 }, { "epoch": 2.65, "learning_rate": 7.971686819458502e-07, "loss": 0.1147, "step": 2228 }, { "epoch": 2.65, "learning_rate": 7.873370485118381e-07, "loss": 0.1307, "step": 2230 }, { "epoch": 2.65, "learning_rate": 7.77563934513913e-07, "loss": 0.1278, "step": 2232 }, { "epoch": 2.66, "learning_rate": 7.678494020315308e-07, "loss": 0.118, "step": 2234 }, { "epoch": 2.66, "learning_rate": 7.581935127720352e-07, "loss": 0.1289, "step": 2236 }, { "epoch": 2.66, "learning_rate": 7.485963280702646e-07, "loss": 0.1139, "step": 2238 }, { "epoch": 2.66, "learning_rate": 7.390579088881655e-07, "loss": 0.1164, "step": 2240 }, { "epoch": 2.67, "learning_rate": 7.295783158143976e-07, "loss": 0.0974, "step": 2242 }, { "epoch": 2.67, "learning_rate": 7.201576090639529e-07, "loss": 0.1444, "step": 2244 }, { "epoch": 2.67, "learning_rate": 7.107958484777755e-07, "loss": 0.1599, "step": 2246 }, { "epoch": 2.67, "learning_rate": 7.014930935223807e-07, "loss": 0.1482, "step": 2248 }, { "epoch": 2.68, "learning_rate": 6.922494032894744e-07, "loss": 0.1095, "step": 2250 }, { "epoch": 2.68, "learning_rate": 6.830648364955772e-07, "loss": 0.1398, "step": 2252 }, { "epoch": 2.68, "learning_rate": 6.739394514816622e-07, "loss": 0.1333, "step": 2254 }, { "epoch": 2.68, "learning_rate": 6.648733062127643e-07, "loss": 0.1209, "step": 2256 }, { "epoch": 2.68, "learning_rate": 6.558664582776341e-07, "loss": 0.1019, "step": 2258 }, { "epoch": 2.69, "learning_rate": 6.469189648883567e-07, "loss": 0.1099, "step": 2260 }, { "epoch": 2.69, "learning_rate": 6.380308828799919e-07, "loss": 0.1176, "step": 2262 }, { "epoch": 2.69, "learning_rate": 6.292022687102184e-07, "loss": 0.1138, "step": 2264 }, { "epoch": 2.69, "learning_rate": 6.204331784589679e-07, "loss": 0.1311, "step": 2266 }, { "epoch": 2.7, "learning_rate": 6.117236678280736e-07, "loss": 0.1296, "step": 2268 }, { "epoch": 2.7, "learning_rate": 6.030737921409169e-07, "loss": 0.1377, "step": 2270 }, { "epoch": 2.7, "learning_rate": 5.9448360634207e-07, "loss": 0.1579, "step": 2272 }, { "epoch": 2.7, "learning_rate": 5.859531649969563e-07, "loss": 0.1133, "step": 2274 }, { "epoch": 2.71, "learning_rate": 5.774825222914948e-07, "loss": 0.1324, "step": 2276 }, { "epoch": 2.71, "learning_rate": 5.690717320317595e-07, "loss": 0.1227, "step": 2278 }, { "epoch": 2.71, "learning_rate": 5.60720847643641e-07, "loss": 0.116, "step": 2280 }, { "epoch": 2.71, "learning_rate": 5.524299221724993e-07, "loss": 0.1574, "step": 2282 }, { "epoch": 2.72, "learning_rate": 5.44199008282833e-07, "loss": 0.1149, "step": 2284 }, { "epoch": 2.72, "learning_rate": 5.360281582579474e-07, "loss": 0.0964, "step": 2286 }, { "epoch": 2.72, "learning_rate": 5.279174239996132e-07, "loss": 0.1096, "step": 2288 }, { "epoch": 2.72, "learning_rate": 5.198668570277443e-07, "loss": 0.1395, "step": 2290 }, { "epoch": 2.73, "learning_rate": 5.11876508480067e-07, "loss": 0.1232, "step": 2292 }, { "epoch": 2.73, "learning_rate": 5.039464291117968e-07, "loss": 0.1222, "step": 2294 }, { "epoch": 2.73, "learning_rate": 4.960766692953145e-07, "loss": 0.16, "step": 2296 }, { "epoch": 2.73, "learning_rate": 4.882672790198473e-07, "loss": 0.1558, "step": 2298 }, { "epoch": 2.73, "learning_rate": 4.805183078911524e-07, "loss": 0.1193, "step": 2300 }, { "epoch": 2.74, "learning_rate": 4.728298051312008e-07, "loss": 0.1342, "step": 2302 }, { "epoch": 2.74, "learning_rate": 4.652018195778629e-07, "loss": 0.1598, "step": 2304 }, { "epoch": 2.74, "learning_rate": 4.576343996845989e-07, "loss": 0.1324, "step": 2306 }, { "epoch": 2.74, "learning_rate": 4.5012759352015766e-07, "loss": 0.0991, "step": 2308 }, { "epoch": 2.75, "learning_rate": 4.4268144876825846e-07, "loss": 0.1399, "step": 2310 }, { "epoch": 2.75, "learning_rate": 4.352960127272987e-07, "loss": 0.1098, "step": 2312 }, { "epoch": 2.75, "learning_rate": 4.2797133231005207e-07, "loss": 0.1343, "step": 2314 }, { "epoch": 2.75, "learning_rate": 4.207074540433631e-07, "loss": 0.1038, "step": 2316 }, { "epoch": 2.76, "learning_rate": 4.1350442406786317e-07, "loss": 0.1445, "step": 2318 }, { "epoch": 2.76, "learning_rate": 4.063622881376683e-07, "loss": 0.1484, "step": 2320 }, { "epoch": 2.76, "learning_rate": 3.9928109162008953e-07, "loss": 0.1116, "step": 2322 }, { "epoch": 2.76, "learning_rate": 3.922608794953531e-07, "loss": 0.1271, "step": 2324 }, { "epoch": 2.77, "learning_rate": 3.8530169635630055e-07, "loss": 0.1471, "step": 2326 }, { "epoch": 2.77, "learning_rate": 3.7840358640812036e-07, "loss": 0.1074, "step": 2328 }, { "epoch": 2.77, "learning_rate": 3.715665934680546e-07, "loss": 0.1571, "step": 2330 }, { "epoch": 2.77, "learning_rate": 3.64790760965128e-07, "loss": 0.0928, "step": 2332 }, { "epoch": 2.78, "learning_rate": 3.580761319398729e-07, "loss": 0.1362, "step": 2334 }, { "epoch": 2.78, "learning_rate": 3.514227490440503e-07, "loss": 0.1168, "step": 2336 }, { "epoch": 2.78, "learning_rate": 3.4483065454038123e-07, "loss": 0.1497, "step": 2338 }, { "epoch": 2.78, "learning_rate": 3.3829989030228163e-07, "loss": 0.1244, "step": 2340 }, { "epoch": 2.78, "learning_rate": 3.3183049781359e-07, "loss": 0.1058, "step": 2342 }, { "epoch": 2.79, "learning_rate": 3.2542251816831237e-07, "loss": 0.1158, "step": 2344 }, { "epoch": 2.79, "learning_rate": 3.190759920703512e-07, "loss": 0.1208, "step": 2346 }, { "epoch": 2.79, "learning_rate": 3.127909598332535e-07, "loss": 0.1214, "step": 2348 }, { "epoch": 2.79, "learning_rate": 3.065674613799574e-07, "loss": 0.1258, "step": 2350 }, { "epoch": 2.8, "learning_rate": 3.0040553624252844e-07, "loss": 0.136, "step": 2352 }, { "epoch": 2.8, "learning_rate": 2.9430522356191814e-07, "loss": 0.1553, "step": 2354 }, { "epoch": 2.8, "learning_rate": 2.88266562087709e-07, "loss": 0.164, "step": 2356 }, { "epoch": 2.8, "learning_rate": 2.822895901778744e-07, "loss": 0.1372, "step": 2358 }, { "epoch": 2.81, "learning_rate": 2.7637434579853016e-07, "loss": 0.1196, "step": 2360 }, { "epoch": 2.81, "learning_rate": 2.7052086652369356e-07, "loss": 0.1351, "step": 2362 }, { "epoch": 2.81, "learning_rate": 2.6472918953504566e-07, "loss": 0.1407, "step": 2364 }, { "epoch": 2.81, "learning_rate": 2.589993516216993e-07, "loss": 0.1181, "step": 2366 }, { "epoch": 2.82, "learning_rate": 2.5333138917995714e-07, "loss": 0.1484, "step": 2368 }, { "epoch": 2.82, "learning_rate": 2.477253382130862e-07, "loss": 0.125, "step": 2370 }, { "epoch": 2.82, "learning_rate": 2.4218123433108696e-07, "loss": 0.1309, "step": 2372 }, { "epoch": 2.82, "learning_rate": 2.366991127504714e-07, "loss": 0.1325, "step": 2374 }, { "epoch": 2.83, "learning_rate": 2.3127900829403305e-07, "loss": 0.1297, "step": 2376 }, { "epoch": 2.83, "learning_rate": 2.259209553906272e-07, "loss": 0.1242, "step": 2378 }, { "epoch": 2.83, "learning_rate": 2.2062498807495669e-07, "loss": 0.155, "step": 2380 }, { "epoch": 2.83, "learning_rate": 2.1539113998735094e-07, "loss": 0.1266, "step": 2382 }, { "epoch": 2.83, "learning_rate": 2.10219444373555e-07, "loss": 0.1459, "step": 2384 }, { "epoch": 2.84, "learning_rate": 2.051099340845164e-07, "loss": 0.1544, "step": 2386 }, { "epoch": 2.84, "learning_rate": 2.000626415761786e-07, "loss": 0.1347, "step": 2388 }, { "epoch": 2.84, "learning_rate": 1.9507759890927125e-07, "loss": 0.146, "step": 2390 }, { "epoch": 2.84, "learning_rate": 1.9015483774911249e-07, "loss": 0.1342, "step": 2392 }, { "epoch": 2.85, "learning_rate": 1.8529438936540022e-07, "loss": 0.1166, "step": 2394 }, { "epoch": 2.85, "learning_rate": 1.8049628463202128e-07, "loss": 0.1313, "step": 2396 }, { "epoch": 2.85, "learning_rate": 1.7576055402685034e-07, "loss": 0.1251, "step": 2398 }, { "epoch": 2.85, "learning_rate": 1.710872276315556e-07, "loss": 0.1077, "step": 2400 }, { "epoch": 2.86, "learning_rate": 1.664763351314125e-07, "loss": 0.1095, "step": 2402 }, { "epoch": 2.86, "learning_rate": 1.619279058151102e-07, "loss": 0.0939, "step": 2404 }, { "epoch": 2.86, "learning_rate": 1.5744196857456874e-07, "loss": 0.1118, "step": 2406 }, { "epoch": 2.86, "learning_rate": 1.5301855190475445e-07, "loss": 0.1477, "step": 2408 }, { "epoch": 2.87, "learning_rate": 1.4865768390349812e-07, "loss": 0.1073, "step": 2410 }, { "epoch": 2.87, "learning_rate": 1.4435939227131712e-07, "loss": 0.1194, "step": 2412 }, { "epoch": 2.87, "learning_rate": 1.4012370431124133e-07, "loss": 0.1222, "step": 2414 }, { "epoch": 2.87, "learning_rate": 1.3595064692863757e-07, "loss": 0.1367, "step": 2416 }, { "epoch": 2.88, "learning_rate": 1.3184024663103755e-07, "loss": 0.1182, "step": 2418 }, { "epoch": 2.88, "learning_rate": 1.277925295279725e-07, "loss": 0.1297, "step": 2420 }, { "epoch": 2.88, "learning_rate": 1.2380752133080433e-07, "loss": 0.1067, "step": 2422 }, { "epoch": 2.88, "learning_rate": 1.198852473525669e-07, "loss": 0.1483, "step": 2424 }, { "epoch": 2.88, "learning_rate": 1.1602573250779958e-07, "loss": 0.1534, "step": 2426 }, { "epoch": 2.89, "learning_rate": 1.1222900131239279e-07, "loss": 0.1177, "step": 2428 }, { "epoch": 2.89, "learning_rate": 1.0849507788343038e-07, "loss": 0.2059, "step": 2430 }, { "epoch": 2.89, "learning_rate": 1.0482398593903764e-07, "loss": 0.1369, "step": 2432 }, { "epoch": 2.89, "learning_rate": 1.0121574879823015e-07, "loss": 0.1178, "step": 2434 }, { "epoch": 2.9, "learning_rate": 9.767038938076511e-08, "loss": 0.1323, "step": 2436 }, { "epoch": 2.9, "learning_rate": 9.418793020699813e-08, "loss": 0.1272, "step": 2438 }, { "epoch": 2.9, "learning_rate": 9.076839339773547e-08, "loss": 0.112, "step": 2440 }, { "epoch": 2.9, "learning_rate": 8.741180067409982e-08, "loss": 0.1444, "step": 2442 }, { "epoch": 2.91, "learning_rate": 8.411817335738482e-08, "loss": 0.1269, "step": 2444 }, { "epoch": 2.91, "learning_rate": 8.088753236892843e-08, "loss": 0.1206, "step": 2446 }, { "epoch": 2.91, "learning_rate": 7.771989822997206e-08, "loss": 0.1349, "step": 2448 }, { "epoch": 2.91, "learning_rate": 7.461529106153387e-08, "loss": 0.1141, "step": 2450 }, { "epoch": 2.92, "learning_rate": 7.15737305842823e-08, "loss": 0.1265, "step": 2452 }, { "epoch": 2.92, "learning_rate": 6.859523611840612e-08, "loss": 0.12, "step": 2454 }, { "epoch": 2.92, "learning_rate": 6.567982658349792e-08, "loss": 0.1161, "step": 2456 }, { "epoch": 2.92, "learning_rate": 6.282752049842855e-08, "loss": 0.1389, "step": 2458 }, { "epoch": 2.93, "learning_rate": 6.003833598123287e-08, "loss": 0.1444, "step": 2460 }, { "epoch": 2.93, "learning_rate": 5.731229074899203e-08, "loss": 0.11, "step": 2462 }, { "epoch": 2.93, "learning_rate": 5.464940211772574e-08, "loss": 0.1272, "step": 2464 }, { "epoch": 2.93, "learning_rate": 5.204968700227242e-08, "loss": 0.1016, "step": 2466 }, { "epoch": 2.93, "learning_rate": 4.951316191619593e-08, "loss": 0.1521, "step": 2468 }, { "epoch": 2.94, "learning_rate": 4.703984297166564e-08, "loss": 0.142, "step": 2470 }, { "epoch": 2.94, "learning_rate": 4.4629745879367634e-08, "loss": 0.1034, "step": 2472 }, { "epoch": 2.94, "learning_rate": 4.228288594839369e-08, "loss": 0.118, "step": 2474 }, { "epoch": 2.94, "learning_rate": 3.999927808615245e-08, "loss": 0.1166, "step": 2476 }, { "epoch": 2.95, "learning_rate": 3.777893679827061e-08, "loss": 0.1518, "step": 2478 }, { "epoch": 2.95, "learning_rate": 3.562187618849855e-08, "loss": 0.1538, "step": 2480 }, { "epoch": 2.95, "learning_rate": 3.352810995862932e-08, "loss": 0.1053, "step": 2482 }, { "epoch": 2.95, "learning_rate": 3.1497651408399774e-08, "loss": 0.1095, "step": 2484 }, { "epoch": 2.96, "learning_rate": 2.9530513435416243e-08, "loss": 0.1574, "step": 2486 }, { "epoch": 2.96, "learning_rate": 2.762670853506677e-08, "loss": 0.1147, "step": 2488 }, { "epoch": 2.96, "learning_rate": 2.578624880044567e-08, "loss": 0.143, "step": 2490 }, { "epoch": 2.96, "learning_rate": 2.4009145922271327e-08, "loss": 0.0848, "step": 2492 }, { "epoch": 2.97, "learning_rate": 2.2295411188819616e-08, "loss": 0.1291, "step": 2494 }, { "epoch": 2.97, "learning_rate": 2.0645055485842837e-08, "loss": 0.132, "step": 2496 }, { "epoch": 2.97, "learning_rate": 1.9058089296509762e-08, "loss": 0.106, "step": 2498 }, { "epoch": 2.97, "learning_rate": 1.753452270133238e-08, "loss": 0.119, "step": 2500 }, { "epoch": 2.98, "learning_rate": 1.6074365378105915e-08, "loss": 0.1213, "step": 2502 }, { "epoch": 2.98, "learning_rate": 1.4677626601843353e-08, "loss": 0.1392, "step": 2504 }, { "epoch": 2.98, "learning_rate": 1.3344315244722128e-08, "loss": 0.1121, "step": 2506 }, { "epoch": 2.98, "learning_rate": 1.2074439776021962e-08, "loss": 0.1163, "step": 2508 }, { "epoch": 2.98, "learning_rate": 1.0868008262076013e-08, "loss": 0.149, "step": 2510 }, { "epoch": 2.99, "learning_rate": 9.725028366214251e-09, "loss": 0.1174, "step": 2512 }, { "epoch": 2.99, "learning_rate": 8.64550734872016e-09, "loss": 0.1226, "step": 2514 }, { "epoch": 2.99, "learning_rate": 7.629452066783006e-09, "loss": 0.1374, "step": 2516 }, { "epoch": 2.99, "learning_rate": 6.6768689744500796e-09, "loss": 0.112, "step": 2518 }, { "epoch": 3.0, "learning_rate": 5.787764122592299e-09, "loss": 0.1154, "step": 2520 }, { "epoch": 3.0, "learning_rate": 4.9621431588620096e-09, "loss": 0.1275, "step": 2522 }, { "epoch": 3.0, "step": 2523, "total_flos": 2232048114991104.0, "train_loss": 0.2896275484415448, "train_runtime": 144327.3604, "train_samples_per_second": 0.559, "train_steps_per_second": 0.017 } ], "logging_steps": 2, "max_steps": 2523, "num_train_epochs": 3, "save_steps": 1000, "total_flos": 2232048114991104.0, "trial_name": null, "trial_params": null }