{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.99958796868562, "global_step": 7280, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 1.0000000000000001e-07, "loss": 11.954, "step": 2 }, { "epoch": 0.0, "learning_rate": 2.0000000000000002e-07, "loss": 11.7574, "step": 4 }, { "epoch": 0.0, "learning_rate": 4.0000000000000003e-07, "loss": 11.7662, "step": 6 }, { "epoch": 0.0, "learning_rate": 6.000000000000001e-07, "loss": 11.6897, "step": 8 }, { "epoch": 0.0, "learning_rate": 8.000000000000001e-07, "loss": 11.7292, "step": 10 }, { "epoch": 0.0, "learning_rate": 1.0000000000000002e-06, "loss": 11.7205, "step": 12 }, { "epoch": 0.01, "learning_rate": 1.2000000000000002e-06, "loss": 11.4608, "step": 14 }, { "epoch": 0.01, "learning_rate": 1.4000000000000001e-06, "loss": 11.282, "step": 16 }, { "epoch": 0.01, "learning_rate": 1.6000000000000001e-06, "loss": 11.0321, "step": 18 }, { "epoch": 0.01, "learning_rate": 1.8e-06, "loss": 10.9982, "step": 20 }, { "epoch": 0.01, "learning_rate": 2.0000000000000003e-06, "loss": 10.7355, "step": 22 }, { "epoch": 0.01, "learning_rate": 2.2e-06, "loss": 10.4186, "step": 24 }, { "epoch": 0.01, "learning_rate": 2.4000000000000003e-06, "loss": 10.2501, "step": 26 }, { "epoch": 0.01, "learning_rate": 2.6e-06, "loss": 10.1457, "step": 28 }, { "epoch": 0.01, "learning_rate": 2.8000000000000003e-06, "loss": 9.7192, "step": 30 }, { "epoch": 0.01, "learning_rate": 3e-06, "loss": 9.4468, "step": 32 }, { "epoch": 0.01, "learning_rate": 3.2000000000000003e-06, "loss": 9.3485, "step": 34 }, { "epoch": 0.01, "learning_rate": 3.4000000000000005e-06, "loss": 9.1247, "step": 36 }, { "epoch": 0.02, "learning_rate": 3.6e-06, "loss": 8.8148, "step": 38 }, { "epoch": 0.02, "learning_rate": 3.8e-06, "loss": 8.5495, "step": 40 }, { "epoch": 0.02, "learning_rate": 4.000000000000001e-06, "loss": 8.4185, "step": 42 }, { "epoch": 0.02, "learning_rate": 4.2000000000000004e-06, "loss": 8.2214, "step": 44 }, { "epoch": 0.02, "learning_rate": 4.4e-06, "loss": 8.0612, "step": 46 }, { "epoch": 0.02, "learning_rate": 4.6e-06, "loss": 7.8571, "step": 48 }, { "epoch": 0.02, "learning_rate": 4.800000000000001e-06, "loss": 7.6998, "step": 50 }, { "epoch": 0.02, "learning_rate": 5e-06, "loss": 7.6994, "step": 52 }, { "epoch": 0.02, "learning_rate": 5.2e-06, "loss": 7.4386, "step": 54 }, { "epoch": 0.02, "learning_rate": 5.4e-06, "loss": 7.4094, "step": 56 }, { "epoch": 0.02, "learning_rate": 5.600000000000001e-06, "loss": 7.3206, "step": 58 }, { "epoch": 0.02, "learning_rate": 5.8e-06, "loss": 7.2811, "step": 60 }, { "epoch": 0.03, "learning_rate": 6e-06, "loss": 7.2123, "step": 62 }, { "epoch": 0.03, "learning_rate": 6.2e-06, "loss": 7.1106, "step": 64 }, { "epoch": 0.03, "learning_rate": 6.4000000000000006e-06, "loss": 7.182, "step": 66 }, { "epoch": 0.03, "learning_rate": 6.6e-06, "loss": 7.0434, "step": 68 }, { "epoch": 0.03, "learning_rate": 6.800000000000001e-06, "loss": 7.0735, "step": 70 }, { "epoch": 0.03, "learning_rate": 7.000000000000001e-06, "loss": 6.9208, "step": 72 }, { "epoch": 0.03, "learning_rate": 7.2e-06, "loss": 6.9198, "step": 74 }, { "epoch": 0.03, "learning_rate": 7.4e-06, "loss": 6.8729, "step": 76 }, { "epoch": 0.03, "learning_rate": 7.6e-06, "loss": 6.7778, "step": 78 }, { "epoch": 0.03, "learning_rate": 7.8e-06, "loss": 6.7507, "step": 80 }, { "epoch": 0.03, "learning_rate": 8.000000000000001e-06, "loss": 6.6864, "step": 82 }, { "epoch": 0.03, "learning_rate": 8.200000000000001e-06, "loss": 6.7855, "step": 84 }, { "epoch": 0.04, "learning_rate": 8.400000000000001e-06, "loss": 6.6503, "step": 86 }, { "epoch": 0.04, "learning_rate": 8.599999999999999e-06, "loss": 6.6798, "step": 88 }, { "epoch": 0.04, "learning_rate": 8.8e-06, "loss": 6.5844, "step": 90 }, { "epoch": 0.04, "learning_rate": 9e-06, "loss": 6.6041, "step": 92 }, { "epoch": 0.04, "learning_rate": 9.2e-06, "loss": 6.539, "step": 94 }, { "epoch": 0.04, "learning_rate": 9.4e-06, "loss": 6.5601, "step": 96 }, { "epoch": 0.04, "learning_rate": 9.600000000000001e-06, "loss": 6.5664, "step": 98 }, { "epoch": 0.04, "learning_rate": 9.800000000000001e-06, "loss": 6.5179, "step": 100 }, { "epoch": 0.04, "learning_rate": 1e-05, "loss": 6.5246, "step": 102 }, { "epoch": 0.04, "learning_rate": 1.02e-05, "loss": 6.5961, "step": 104 }, { "epoch": 0.04, "learning_rate": 1.04e-05, "loss": 6.435, "step": 106 }, { "epoch": 0.04, "learning_rate": 1.06e-05, "loss": 6.4144, "step": 108 }, { "epoch": 0.05, "learning_rate": 1.08e-05, "loss": 6.426, "step": 110 }, { "epoch": 0.05, "learning_rate": 1.1000000000000001e-05, "loss": 6.4593, "step": 112 }, { "epoch": 0.05, "learning_rate": 1.1200000000000001e-05, "loss": 6.44, "step": 114 }, { "epoch": 0.05, "learning_rate": 1.1400000000000001e-05, "loss": 6.2686, "step": 116 }, { "epoch": 0.05, "learning_rate": 1.16e-05, "loss": 6.482, "step": 118 }, { "epoch": 0.05, "learning_rate": 1.18e-05, "loss": 6.3319, "step": 120 }, { "epoch": 0.05, "learning_rate": 1.2e-05, "loss": 6.325, "step": 122 }, { "epoch": 0.05, "learning_rate": 1.22e-05, "loss": 6.3218, "step": 124 }, { "epoch": 0.05, "learning_rate": 1.24e-05, "loss": 6.3082, "step": 126 }, { "epoch": 0.05, "learning_rate": 1.2600000000000001e-05, "loss": 6.3179, "step": 128 }, { "epoch": 0.05, "learning_rate": 1.2800000000000001e-05, "loss": 6.3464, "step": 130 }, { "epoch": 0.05, "learning_rate": 1.3000000000000001e-05, "loss": 6.3182, "step": 132 }, { "epoch": 0.06, "learning_rate": 1.32e-05, "loss": 6.2068, "step": 134 }, { "epoch": 0.06, "learning_rate": 1.3400000000000002e-05, "loss": 6.0798, "step": 136 }, { "epoch": 0.06, "learning_rate": 1.3600000000000002e-05, "loss": 6.2293, "step": 138 }, { "epoch": 0.06, "learning_rate": 1.3800000000000002e-05, "loss": 6.1612, "step": 140 }, { "epoch": 0.06, "learning_rate": 1.4000000000000001e-05, "loss": 6.1521, "step": 142 }, { "epoch": 0.06, "learning_rate": 1.42e-05, "loss": 6.168, "step": 144 }, { "epoch": 0.06, "learning_rate": 1.44e-05, "loss": 6.0825, "step": 146 }, { "epoch": 0.06, "learning_rate": 1.4599999999999999e-05, "loss": 6.0993, "step": 148 }, { "epoch": 0.06, "learning_rate": 1.48e-05, "loss": 6.1154, "step": 150 }, { "epoch": 0.06, "learning_rate": 1.5e-05, "loss": 6.0066, "step": 152 }, { "epoch": 0.06, "learning_rate": 1.52e-05, "loss": 5.9245, "step": 154 }, { "epoch": 0.06, "learning_rate": 1.54e-05, "loss": 5.9375, "step": 156 }, { "epoch": 0.07, "learning_rate": 1.56e-05, "loss": 5.9601, "step": 158 }, { "epoch": 0.07, "learning_rate": 1.58e-05, "loss": 5.9027, "step": 160 }, { "epoch": 0.07, "learning_rate": 1.6000000000000003e-05, "loss": 5.8572, "step": 162 }, { "epoch": 0.07, "learning_rate": 1.62e-05, "loss": 5.7952, "step": 164 }, { "epoch": 0.07, "learning_rate": 1.6400000000000002e-05, "loss": 5.8793, "step": 166 }, { "epoch": 0.07, "learning_rate": 1.66e-05, "loss": 5.7643, "step": 168 }, { "epoch": 0.07, "learning_rate": 1.6800000000000002e-05, "loss": 5.7912, "step": 170 }, { "epoch": 0.07, "learning_rate": 1.7000000000000003e-05, "loss": 5.7259, "step": 172 }, { "epoch": 0.07, "learning_rate": 1.7199999999999998e-05, "loss": 5.7517, "step": 174 }, { "epoch": 0.07, "learning_rate": 1.74e-05, "loss": 5.7468, "step": 176 }, { "epoch": 0.07, "learning_rate": 1.76e-05, "loss": 5.6949, "step": 178 }, { "epoch": 0.07, "learning_rate": 1.78e-05, "loss": 5.7367, "step": 180 }, { "epoch": 0.07, "learning_rate": 1.8e-05, "loss": 5.6554, "step": 182 }, { "epoch": 0.08, "learning_rate": 1.8200000000000002e-05, "loss": 5.6252, "step": 184 }, { "epoch": 0.08, "learning_rate": 1.84e-05, "loss": 5.6907, "step": 186 }, { "epoch": 0.08, "learning_rate": 1.86e-05, "loss": 5.5224, "step": 188 }, { "epoch": 0.08, "learning_rate": 1.88e-05, "loss": 5.6275, "step": 190 }, { "epoch": 0.08, "learning_rate": 1.9e-05, "loss": 5.4918, "step": 192 }, { "epoch": 0.08, "learning_rate": 1.9200000000000003e-05, "loss": 5.4904, "step": 194 }, { "epoch": 0.08, "learning_rate": 1.94e-05, "loss": 5.5485, "step": 196 }, { "epoch": 0.08, "learning_rate": 1.9600000000000002e-05, "loss": 5.4543, "step": 198 }, { "epoch": 0.08, "learning_rate": 1.9800000000000004e-05, "loss": 5.4329, "step": 200 }, { "epoch": 0.08, "learning_rate": 2e-05, "loss": 5.6126, "step": 202 }, { "epoch": 0.08, "learning_rate": 2.0200000000000003e-05, "loss": 5.3985, "step": 204 }, { "epoch": 0.08, "learning_rate": 2.04e-05, "loss": 5.3746, "step": 206 }, { "epoch": 0.09, "learning_rate": 2.06e-05, "loss": 5.4144, "step": 208 }, { "epoch": 0.09, "learning_rate": 2.08e-05, "loss": 5.3626, "step": 210 }, { "epoch": 0.09, "learning_rate": 2.1e-05, "loss": 5.2793, "step": 212 }, { "epoch": 0.09, "learning_rate": 2.12e-05, "loss": 5.3459, "step": 214 }, { "epoch": 0.09, "learning_rate": 2.1400000000000002e-05, "loss": 5.4056, "step": 216 }, { "epoch": 0.09, "learning_rate": 2.16e-05, "loss": 5.2179, "step": 218 }, { "epoch": 0.09, "learning_rate": 2.18e-05, "loss": 5.2413, "step": 220 }, { "epoch": 0.09, "learning_rate": 2.2000000000000003e-05, "loss": 5.1814, "step": 222 }, { "epoch": 0.09, "learning_rate": 2.22e-05, "loss": 4.9489, "step": 224 }, { "epoch": 0.09, "learning_rate": 2.2400000000000002e-05, "loss": 4.975, "step": 226 }, { "epoch": 0.09, "learning_rate": 2.26e-05, "loss": 4.9943, "step": 228 }, { "epoch": 0.09, "learning_rate": 2.2800000000000002e-05, "loss": 4.7421, "step": 230 }, { "epoch": 0.1, "learning_rate": 2.3000000000000003e-05, "loss": 4.4864, "step": 232 }, { "epoch": 0.1, "learning_rate": 2.32e-05, "loss": 4.4499, "step": 234 }, { "epoch": 0.1, "learning_rate": 2.3400000000000003e-05, "loss": 4.1762, "step": 236 }, { "epoch": 0.1, "learning_rate": 2.36e-05, "loss": 3.9751, "step": 238 }, { "epoch": 0.1, "learning_rate": 2.37e-05, "loss": 3.9673, "step": 240 }, { "epoch": 0.1, "learning_rate": 2.39e-05, "loss": 3.413, "step": 242 }, { "epoch": 0.1, "learning_rate": 2.41e-05, "loss": 2.7623, "step": 244 }, { "epoch": 0.1, "learning_rate": 2.43e-05, "loss": 2.5487, "step": 246 }, { "epoch": 0.1, "learning_rate": 2.45e-05, "loss": 1.8896, "step": 248 }, { "epoch": 0.1, "learning_rate": 2.47e-05, "loss": 2.0058, "step": 250 }, { "epoch": 0.1, "learning_rate": 2.4900000000000002e-05, "loss": 1.6642, "step": 252 }, { "epoch": 0.1, "learning_rate": 2.51e-05, "loss": 1.3299, "step": 254 }, { "epoch": 0.11, "learning_rate": 2.5300000000000002e-05, "loss": 1.2625, "step": 256 }, { "epoch": 0.11, "learning_rate": 2.5500000000000003e-05, "loss": 1.2002, "step": 258 }, { "epoch": 0.11, "learning_rate": 2.57e-05, "loss": 1.1715, "step": 260 }, { "epoch": 0.11, "learning_rate": 2.5900000000000003e-05, "loss": 1.1752, "step": 262 }, { "epoch": 0.11, "learning_rate": 2.61e-05, "loss": 1.062, "step": 264 }, { "epoch": 0.11, "learning_rate": 2.6300000000000002e-05, "loss": 1.0381, "step": 266 }, { "epoch": 0.11, "learning_rate": 2.6500000000000004e-05, "loss": 0.9979, "step": 268 }, { "epoch": 0.11, "learning_rate": 2.6700000000000002e-05, "loss": 1.0363, "step": 270 }, { "epoch": 0.11, "learning_rate": 2.6900000000000003e-05, "loss": 0.9941, "step": 272 }, { "epoch": 0.11, "learning_rate": 2.7100000000000005e-05, "loss": 0.9799, "step": 274 }, { "epoch": 0.11, "learning_rate": 2.7300000000000003e-05, "loss": 0.9831, "step": 276 }, { "epoch": 0.11, "learning_rate": 2.7500000000000004e-05, "loss": 0.9996, "step": 278 }, { "epoch": 0.12, "learning_rate": 2.7700000000000002e-05, "loss": 0.9783, "step": 280 }, { "epoch": 0.12, "learning_rate": 2.7900000000000004e-05, "loss": 0.945, "step": 282 }, { "epoch": 0.12, "learning_rate": 2.8100000000000005e-05, "loss": 0.9243, "step": 284 }, { "epoch": 0.12, "learning_rate": 2.83e-05, "loss": 0.9685, "step": 286 }, { "epoch": 0.12, "learning_rate": 2.8499999999999998e-05, "loss": 0.9208, "step": 288 }, { "epoch": 0.12, "learning_rate": 2.87e-05, "loss": 0.9502, "step": 290 }, { "epoch": 0.12, "learning_rate": 2.8899999999999998e-05, "loss": 0.975, "step": 292 }, { "epoch": 0.12, "learning_rate": 2.91e-05, "loss": 0.9952, "step": 294 }, { "epoch": 0.12, "learning_rate": 2.93e-05, "loss": 0.9405, "step": 296 }, { "epoch": 0.12, "learning_rate": 2.95e-05, "loss": 0.8896, "step": 298 }, { "epoch": 0.12, "learning_rate": 2.97e-05, "loss": 1.0118, "step": 300 }, { "epoch": 0.12, "learning_rate": 2.9900000000000002e-05, "loss": 0.9056, "step": 302 }, { "epoch": 0.13, "learning_rate": 3.01e-05, "loss": 0.9318, "step": 304 }, { "epoch": 0.13, "learning_rate": 3.03e-05, "loss": 0.8904, "step": 306 }, { "epoch": 0.13, "learning_rate": 3.05e-05, "loss": 0.9237, "step": 308 }, { "epoch": 0.13, "learning_rate": 3.07e-05, "loss": 0.8975, "step": 310 }, { "epoch": 0.13, "learning_rate": 3.09e-05, "loss": 0.9402, "step": 312 }, { "epoch": 0.13, "learning_rate": 3.1100000000000004e-05, "loss": 0.9393, "step": 314 }, { "epoch": 0.13, "learning_rate": 3.13e-05, "loss": 0.8767, "step": 316 }, { "epoch": 0.13, "learning_rate": 3.15e-05, "loss": 0.8804, "step": 318 }, { "epoch": 0.13, "learning_rate": 3.1700000000000005e-05, "loss": 0.8926, "step": 320 }, { "epoch": 0.13, "learning_rate": 3.19e-05, "loss": 0.9633, "step": 322 }, { "epoch": 0.13, "learning_rate": 3.21e-05, "loss": 0.8566, "step": 324 }, { "epoch": 0.13, "learning_rate": 3.2300000000000006e-05, "loss": 0.8893, "step": 326 }, { "epoch": 0.14, "learning_rate": 3.2500000000000004e-05, "loss": 0.8954, "step": 328 }, { "epoch": 0.14, "learning_rate": 3.27e-05, "loss": 0.943, "step": 330 }, { "epoch": 0.14, "learning_rate": 3.29e-05, "loss": 0.9046, "step": 332 }, { "epoch": 0.14, "learning_rate": 3.3100000000000005e-05, "loss": 0.8691, "step": 334 }, { "epoch": 0.14, "learning_rate": 3.33e-05, "loss": 0.8766, "step": 336 }, { "epoch": 0.14, "learning_rate": 3.35e-05, "loss": 0.8979, "step": 338 }, { "epoch": 0.14, "learning_rate": 3.3700000000000006e-05, "loss": 0.9405, "step": 340 }, { "epoch": 0.14, "learning_rate": 3.3900000000000004e-05, "loss": 0.937, "step": 342 }, { "epoch": 0.14, "learning_rate": 3.41e-05, "loss": 0.9517, "step": 344 }, { "epoch": 0.14, "learning_rate": 3.430000000000001e-05, "loss": 0.8925, "step": 346 }, { "epoch": 0.14, "learning_rate": 3.45e-05, "loss": 0.9017, "step": 348 }, { "epoch": 0.14, "learning_rate": 3.4699999999999996e-05, "loss": 0.9481, "step": 350 }, { "epoch": 0.15, "learning_rate": 3.49e-05, "loss": 0.9065, "step": 352 }, { "epoch": 0.15, "learning_rate": 3.51e-05, "loss": 0.8895, "step": 354 }, { "epoch": 0.15, "learning_rate": 3.53e-05, "loss": 0.9089, "step": 356 }, { "epoch": 0.15, "learning_rate": 3.55e-05, "loss": 0.9032, "step": 358 }, { "epoch": 0.15, "learning_rate": 3.57e-05, "loss": 0.8839, "step": 360 }, { "epoch": 0.15, "learning_rate": 3.59e-05, "loss": 0.9038, "step": 362 }, { "epoch": 0.15, "learning_rate": 3.61e-05, "loss": 0.8902, "step": 364 }, { "epoch": 0.15, "learning_rate": 3.63e-05, "loss": 0.909, "step": 366 }, { "epoch": 0.15, "learning_rate": 3.65e-05, "loss": 0.9007, "step": 368 }, { "epoch": 0.15, "learning_rate": 3.6700000000000004e-05, "loss": 0.8603, "step": 370 }, { "epoch": 0.15, "learning_rate": 3.69e-05, "loss": 0.9076, "step": 372 }, { "epoch": 0.15, "learning_rate": 3.71e-05, "loss": 0.9497, "step": 374 }, { "epoch": 0.15, "learning_rate": 3.73e-05, "loss": 0.861, "step": 376 }, { "epoch": 0.16, "learning_rate": 3.7500000000000003e-05, "loss": 0.8768, "step": 378 }, { "epoch": 0.16, "learning_rate": 3.77e-05, "loss": 0.8686, "step": 380 }, { "epoch": 0.16, "learning_rate": 3.79e-05, "loss": 0.8976, "step": 382 }, { "epoch": 0.16, "learning_rate": 3.8100000000000005e-05, "loss": 0.9487, "step": 384 }, { "epoch": 0.16, "learning_rate": 3.83e-05, "loss": 0.8876, "step": 386 }, { "epoch": 0.16, "learning_rate": 3.85e-05, "loss": 0.8372, "step": 388 }, { "epoch": 0.16, "learning_rate": 3.8700000000000006e-05, "loss": 0.8659, "step": 390 }, { "epoch": 0.16, "learning_rate": 3.8900000000000004e-05, "loss": 0.8629, "step": 392 }, { "epoch": 0.16, "learning_rate": 3.91e-05, "loss": 0.8987, "step": 394 }, { "epoch": 0.16, "learning_rate": 3.9300000000000007e-05, "loss": 0.8044, "step": 396 }, { "epoch": 0.16, "learning_rate": 3.9500000000000005e-05, "loss": 0.8774, "step": 398 }, { "epoch": 0.16, "learning_rate": 3.97e-05, "loss": 0.8448, "step": 400 }, { "epoch": 0.17, "learning_rate": 3.99e-05, "loss": 0.8648, "step": 402 }, { "epoch": 0.17, "learning_rate": 4.0100000000000006e-05, "loss": 0.9247, "step": 404 }, { "epoch": 0.17, "learning_rate": 4.0300000000000004e-05, "loss": 0.9516, "step": 406 }, { "epoch": 0.17, "learning_rate": 4.05e-05, "loss": 0.9199, "step": 408 }, { "epoch": 0.17, "learning_rate": 4.07e-05, "loss": 0.8626, "step": 410 }, { "epoch": 0.17, "learning_rate": 4.09e-05, "loss": 0.8827, "step": 412 }, { "epoch": 0.17, "learning_rate": 4.11e-05, "loss": 0.8758, "step": 414 }, { "epoch": 0.17, "learning_rate": 4.13e-05, "loss": 0.8837, "step": 416 }, { "epoch": 0.17, "learning_rate": 4.15e-05, "loss": 0.8269, "step": 418 }, { "epoch": 0.17, "learning_rate": 4.17e-05, "loss": 0.8785, "step": 420 }, { "epoch": 0.17, "learning_rate": 4.19e-05, "loss": 0.8848, "step": 422 }, { "epoch": 0.17, "learning_rate": 4.21e-05, "loss": 0.8468, "step": 424 }, { "epoch": 0.18, "learning_rate": 4.23e-05, "loss": 0.8769, "step": 426 }, { "epoch": 0.18, "learning_rate": 4.25e-05, "loss": 0.8842, "step": 428 }, { "epoch": 0.18, "learning_rate": 4.27e-05, "loss": 0.8689, "step": 430 }, { "epoch": 0.18, "learning_rate": 4.29e-05, "loss": 0.8532, "step": 432 }, { "epoch": 0.18, "learning_rate": 4.3100000000000004e-05, "loss": 0.8785, "step": 434 }, { "epoch": 0.18, "learning_rate": 4.33e-05, "loss": 0.8459, "step": 436 }, { "epoch": 0.18, "learning_rate": 4.35e-05, "loss": 0.9716, "step": 438 }, { "epoch": 0.18, "learning_rate": 4.3700000000000005e-05, "loss": 0.8705, "step": 440 }, { "epoch": 0.18, "learning_rate": 4.39e-05, "loss": 0.8851, "step": 442 }, { "epoch": 0.18, "learning_rate": 4.41e-05, "loss": 0.8803, "step": 444 }, { "epoch": 0.18, "learning_rate": 4.43e-05, "loss": 0.8405, "step": 446 }, { "epoch": 0.18, "learning_rate": 4.4500000000000004e-05, "loss": 0.8476, "step": 448 }, { "epoch": 0.19, "learning_rate": 4.47e-05, "loss": 0.9052, "step": 450 }, { "epoch": 0.19, "learning_rate": 4.49e-05, "loss": 0.8506, "step": 452 }, { "epoch": 0.19, "learning_rate": 4.5100000000000005e-05, "loss": 0.8367, "step": 454 }, { "epoch": 0.19, "learning_rate": 4.53e-05, "loss": 0.8569, "step": 456 }, { "epoch": 0.19, "learning_rate": 4.55e-05, "loss": 0.8898, "step": 458 }, { "epoch": 0.19, "learning_rate": 4.5700000000000006e-05, "loss": 0.9018, "step": 460 }, { "epoch": 0.19, "learning_rate": 4.5900000000000004e-05, "loss": 0.8873, "step": 462 }, { "epoch": 0.19, "learning_rate": 4.61e-05, "loss": 0.8827, "step": 464 }, { "epoch": 0.19, "learning_rate": 4.630000000000001e-05, "loss": 0.8778, "step": 466 }, { "epoch": 0.19, "learning_rate": 4.6500000000000005e-05, "loss": 0.8425, "step": 468 }, { "epoch": 0.19, "learning_rate": 4.6700000000000003e-05, "loss": 0.8578, "step": 470 }, { "epoch": 0.19, "learning_rate": 4.69e-05, "loss": 0.9361, "step": 472 }, { "epoch": 0.2, "learning_rate": 4.71e-05, "loss": 0.8809, "step": 474 }, { "epoch": 0.2, "learning_rate": 4.73e-05, "loss": 0.8798, "step": 476 }, { "epoch": 0.2, "learning_rate": 4.75e-05, "loss": 0.8578, "step": 478 }, { "epoch": 0.2, "learning_rate": 4.77e-05, "loss": 0.8839, "step": 480 }, { "epoch": 0.2, "learning_rate": 4.79e-05, "loss": 0.8434, "step": 482 }, { "epoch": 0.2, "learning_rate": 4.8100000000000004e-05, "loss": 0.8752, "step": 484 }, { "epoch": 0.2, "learning_rate": 4.83e-05, "loss": 0.9135, "step": 486 }, { "epoch": 0.2, "learning_rate": 4.85e-05, "loss": 0.8467, "step": 488 }, { "epoch": 0.2, "learning_rate": 4.87e-05, "loss": 0.8543, "step": 490 }, { "epoch": 0.2, "learning_rate": 4.89e-05, "loss": 0.8767, "step": 492 }, { "epoch": 0.2, "learning_rate": 4.91e-05, "loss": 0.9209, "step": 494 }, { "epoch": 0.2, "learning_rate": 4.93e-05, "loss": 0.8609, "step": 496 }, { "epoch": 0.21, "learning_rate": 4.9500000000000004e-05, "loss": 0.8317, "step": 498 }, { "epoch": 0.21, "learning_rate": 4.97e-05, "loss": 0.8808, "step": 500 }, { "epoch": 0.21, "learning_rate": 4.99e-05, "loss": 0.8848, "step": 502 }, { "epoch": 0.21, "learning_rate": 4.999262645627489e-05, "loss": 0.8544, "step": 504 }, { "epoch": 0.21, "learning_rate": 4.997787936882466e-05, "loss": 0.8409, "step": 506 }, { "epoch": 0.21, "learning_rate": 4.996313228137443e-05, "loss": 0.8588, "step": 508 }, { "epoch": 0.21, "learning_rate": 4.99483851939242e-05, "loss": 0.8377, "step": 510 }, { "epoch": 0.21, "learning_rate": 4.9933638106473976e-05, "loss": 0.9157, "step": 512 }, { "epoch": 0.21, "learning_rate": 4.9918891019023746e-05, "loss": 0.8672, "step": 514 }, { "epoch": 0.21, "learning_rate": 4.9904143931573517e-05, "loss": 0.8357, "step": 516 }, { "epoch": 0.21, "learning_rate": 4.988939684412329e-05, "loss": 0.8829, "step": 518 }, { "epoch": 0.21, "learning_rate": 4.987464975667306e-05, "loss": 0.9034, "step": 520 }, { "epoch": 0.22, "learning_rate": 4.985990266922283e-05, "loss": 0.8809, "step": 522 }, { "epoch": 0.22, "learning_rate": 4.98451555817726e-05, "loss": 0.899, "step": 524 }, { "epoch": 0.22, "learning_rate": 4.9830408494322375e-05, "loss": 0.8546, "step": 526 }, { "epoch": 0.22, "learning_rate": 4.9815661406872146e-05, "loss": 0.8629, "step": 528 }, { "epoch": 0.22, "learning_rate": 4.9800914319421916e-05, "loss": 0.8685, "step": 530 }, { "epoch": 0.22, "learning_rate": 4.9786167231971686e-05, "loss": 0.8077, "step": 532 }, { "epoch": 0.22, "learning_rate": 4.977142014452146e-05, "loss": 0.878, "step": 534 }, { "epoch": 0.22, "learning_rate": 4.975667305707123e-05, "loss": 0.8549, "step": 536 }, { "epoch": 0.22, "learning_rate": 4.9741925969621e-05, "loss": 0.8689, "step": 538 }, { "epoch": 0.22, "learning_rate": 4.9727178882170774e-05, "loss": 0.8742, "step": 540 }, { "epoch": 0.22, "learning_rate": 4.9712431794720545e-05, "loss": 0.849, "step": 542 }, { "epoch": 0.22, "learning_rate": 4.9697684707270315e-05, "loss": 0.8723, "step": 544 }, { "epoch": 0.22, "learning_rate": 4.9682937619820086e-05, "loss": 0.8409, "step": 546 }, { "epoch": 0.23, "learning_rate": 4.9668190532369856e-05, "loss": 0.8383, "step": 548 }, { "epoch": 0.23, "learning_rate": 4.9653443444919626e-05, "loss": 0.8507, "step": 550 }, { "epoch": 0.23, "learning_rate": 4.96386963574694e-05, "loss": 0.8375, "step": 552 }, { "epoch": 0.23, "learning_rate": 4.9623949270019174e-05, "loss": 0.8783, "step": 554 }, { "epoch": 0.23, "learning_rate": 4.9609202182568944e-05, "loss": 0.8638, "step": 556 }, { "epoch": 0.23, "learning_rate": 4.9594455095118715e-05, "loss": 0.8609, "step": 558 }, { "epoch": 0.23, "learning_rate": 4.957970800766849e-05, "loss": 0.8278, "step": 560 }, { "epoch": 0.23, "learning_rate": 4.956496092021826e-05, "loss": 0.8456, "step": 562 }, { "epoch": 0.23, "learning_rate": 4.955021383276803e-05, "loss": 0.8514, "step": 564 }, { "epoch": 0.23, "learning_rate": 4.95354667453178e-05, "loss": 0.8789, "step": 566 }, { "epoch": 0.23, "learning_rate": 4.952071965786757e-05, "loss": 0.8477, "step": 568 }, { "epoch": 0.23, "learning_rate": 4.950597257041735e-05, "loss": 0.8417, "step": 570 }, { "epoch": 0.24, "learning_rate": 4.949122548296712e-05, "loss": 0.8501, "step": 572 }, { "epoch": 0.24, "learning_rate": 4.947647839551689e-05, "loss": 0.8384, "step": 574 }, { "epoch": 0.24, "learning_rate": 4.946173130806666e-05, "loss": 0.8437, "step": 576 }, { "epoch": 0.24, "learning_rate": 4.944698422061643e-05, "loss": 0.8619, "step": 578 }, { "epoch": 0.24, "learning_rate": 4.94322371331662e-05, "loss": 0.8724, "step": 580 }, { "epoch": 0.24, "learning_rate": 4.941749004571597e-05, "loss": 0.7811, "step": 582 }, { "epoch": 0.24, "learning_rate": 4.940274295826575e-05, "loss": 0.8787, "step": 584 }, { "epoch": 0.24, "learning_rate": 4.938799587081552e-05, "loss": 0.8281, "step": 586 }, { "epoch": 0.24, "learning_rate": 4.937324878336529e-05, "loss": 0.7954, "step": 588 }, { "epoch": 0.24, "learning_rate": 4.935850169591506e-05, "loss": 0.8124, "step": 590 }, { "epoch": 0.24, "learning_rate": 4.934375460846483e-05, "loss": 0.8741, "step": 592 }, { "epoch": 0.24, "learning_rate": 4.93290075210146e-05, "loss": 0.8196, "step": 594 }, { "epoch": 0.25, "learning_rate": 4.931426043356437e-05, "loss": 0.8318, "step": 596 }, { "epoch": 0.25, "learning_rate": 4.929951334611415e-05, "loss": 0.9004, "step": 598 }, { "epoch": 0.25, "learning_rate": 4.928476625866392e-05, "loss": 0.8497, "step": 600 }, { "epoch": 0.25, "learning_rate": 4.927001917121369e-05, "loss": 0.8404, "step": 602 }, { "epoch": 0.25, "learning_rate": 4.925527208376346e-05, "loss": 0.8338, "step": 604 }, { "epoch": 0.25, "learning_rate": 4.924052499631323e-05, "loss": 0.8533, "step": 606 }, { "epoch": 0.25, "learning_rate": 4.9225777908863e-05, "loss": 0.8912, "step": 608 }, { "epoch": 0.25, "learning_rate": 4.921103082141277e-05, "loss": 0.8374, "step": 610 }, { "epoch": 0.25, "learning_rate": 4.919628373396254e-05, "loss": 0.8796, "step": 612 }, { "epoch": 0.25, "learning_rate": 4.918153664651232e-05, "loss": 0.8042, "step": 614 }, { "epoch": 0.25, "learning_rate": 4.916678955906209e-05, "loss": 0.7941, "step": 616 }, { "epoch": 0.25, "learning_rate": 4.915204247161186e-05, "loss": 0.933, "step": 618 }, { "epoch": 0.26, "learning_rate": 4.913729538416163e-05, "loss": 0.8431, "step": 620 }, { "epoch": 0.26, "learning_rate": 4.91225482967114e-05, "loss": 0.8813, "step": 622 }, { "epoch": 0.26, "learning_rate": 4.910780120926117e-05, "loss": 0.8526, "step": 624 }, { "epoch": 0.26, "learning_rate": 4.909305412181094e-05, "loss": 0.8631, "step": 626 }, { "epoch": 0.26, "learning_rate": 4.907830703436072e-05, "loss": 0.8055, "step": 628 }, { "epoch": 0.26, "learning_rate": 4.906355994691049e-05, "loss": 0.8616, "step": 630 }, { "epoch": 0.26, "learning_rate": 4.904881285946026e-05, "loss": 0.9097, "step": 632 }, { "epoch": 0.26, "learning_rate": 4.903406577201003e-05, "loss": 0.8544, "step": 634 }, { "epoch": 0.26, "learning_rate": 4.90193186845598e-05, "loss": 0.8254, "step": 636 }, { "epoch": 0.26, "learning_rate": 4.900457159710957e-05, "loss": 0.8253, "step": 638 }, { "epoch": 0.26, "learning_rate": 4.898982450965934e-05, "loss": 0.7993, "step": 640 }, { "epoch": 0.26, "learning_rate": 4.897507742220912e-05, "loss": 0.879, "step": 642 }, { "epoch": 0.27, "learning_rate": 4.896033033475889e-05, "loss": 0.8417, "step": 644 }, { "epoch": 0.27, "learning_rate": 4.894558324730866e-05, "loss": 0.8124, "step": 646 }, { "epoch": 0.27, "learning_rate": 4.893083615985843e-05, "loss": 0.8684, "step": 648 }, { "epoch": 0.27, "learning_rate": 4.89160890724082e-05, "loss": 0.8399, "step": 650 }, { "epoch": 0.27, "learning_rate": 4.890134198495797e-05, "loss": 0.8246, "step": 652 }, { "epoch": 0.27, "learning_rate": 4.888659489750774e-05, "loss": 0.8645, "step": 654 }, { "epoch": 0.27, "learning_rate": 4.887184781005752e-05, "loss": 0.8278, "step": 656 }, { "epoch": 0.27, "learning_rate": 4.885710072260729e-05, "loss": 0.9073, "step": 658 }, { "epoch": 0.27, "learning_rate": 4.884235363515706e-05, "loss": 0.8387, "step": 660 }, { "epoch": 0.27, "learning_rate": 4.882760654770683e-05, "loss": 0.8482, "step": 662 }, { "epoch": 0.27, "learning_rate": 4.88128594602566e-05, "loss": 0.8576, "step": 664 }, { "epoch": 0.27, "learning_rate": 4.879811237280637e-05, "loss": 0.8412, "step": 666 }, { "epoch": 0.28, "learning_rate": 4.878336528535614e-05, "loss": 0.8005, "step": 668 }, { "epoch": 0.28, "learning_rate": 4.8768618197905916e-05, "loss": 0.8123, "step": 670 }, { "epoch": 0.28, "learning_rate": 4.875387111045569e-05, "loss": 0.8396, "step": 672 }, { "epoch": 0.28, "learning_rate": 4.8739124023005464e-05, "loss": 0.8386, "step": 674 }, { "epoch": 0.28, "learning_rate": 4.8724376935555234e-05, "loss": 0.9202, "step": 676 }, { "epoch": 0.28, "learning_rate": 4.8709629848105004e-05, "loss": 0.7992, "step": 678 }, { "epoch": 0.28, "learning_rate": 4.8694882760654775e-05, "loss": 0.876, "step": 680 }, { "epoch": 0.28, "learning_rate": 4.8680135673204545e-05, "loss": 0.8453, "step": 682 }, { "epoch": 0.28, "learning_rate": 4.8665388585754315e-05, "loss": 0.8252, "step": 684 }, { "epoch": 0.28, "learning_rate": 4.865064149830409e-05, "loss": 0.8319, "step": 686 }, { "epoch": 0.28, "learning_rate": 4.863589441085386e-05, "loss": 0.8373, "step": 688 }, { "epoch": 0.28, "learning_rate": 4.862114732340363e-05, "loss": 0.8236, "step": 690 }, { "epoch": 0.29, "learning_rate": 4.8606400235953404e-05, "loss": 0.7889, "step": 692 }, { "epoch": 0.29, "learning_rate": 4.8591653148503174e-05, "loss": 0.8248, "step": 694 }, { "epoch": 0.29, "learning_rate": 4.8576906061052944e-05, "loss": 0.8454, "step": 696 }, { "epoch": 0.29, "learning_rate": 4.8562158973602715e-05, "loss": 0.8636, "step": 698 }, { "epoch": 0.29, "learning_rate": 4.8547411886152485e-05, "loss": 0.884, "step": 700 }, { "epoch": 0.29, "learning_rate": 4.853266479870226e-05, "loss": 0.8469, "step": 702 }, { "epoch": 0.29, "learning_rate": 4.851791771125203e-05, "loss": 0.8405, "step": 704 }, { "epoch": 0.29, "learning_rate": 4.85031706238018e-05, "loss": 0.7826, "step": 706 }, { "epoch": 0.29, "learning_rate": 4.848842353635157e-05, "loss": 0.8442, "step": 708 }, { "epoch": 0.29, "learning_rate": 4.8473676448901344e-05, "loss": 0.8202, "step": 710 }, { "epoch": 0.29, "learning_rate": 4.8458929361451114e-05, "loss": 0.8535, "step": 712 }, { "epoch": 0.29, "learning_rate": 4.8444182274000884e-05, "loss": 0.8496, "step": 714 }, { "epoch": 0.3, "learning_rate": 4.842943518655066e-05, "loss": 0.8235, "step": 716 }, { "epoch": 0.3, "learning_rate": 4.841468809910043e-05, "loss": 0.8485, "step": 718 }, { "epoch": 0.3, "learning_rate": 4.83999410116502e-05, "loss": 0.8674, "step": 720 }, { "epoch": 0.3, "learning_rate": 4.838519392419997e-05, "loss": 0.8795, "step": 722 }, { "epoch": 0.3, "learning_rate": 4.837044683674974e-05, "loss": 0.8539, "step": 724 }, { "epoch": 0.3, "learning_rate": 4.8355699749299513e-05, "loss": 0.8626, "step": 726 }, { "epoch": 0.3, "learning_rate": 4.8340952661849284e-05, "loss": 0.8122, "step": 728 }, { "epoch": 0.3, "learning_rate": 4.832620557439906e-05, "loss": 0.8793, "step": 730 }, { "epoch": 0.3, "learning_rate": 4.831145848694883e-05, "loss": 0.81, "step": 732 }, { "epoch": 0.3, "learning_rate": 4.82967113994986e-05, "loss": 0.9482, "step": 734 }, { "epoch": 0.3, "learning_rate": 4.828196431204837e-05, "loss": 0.8222, "step": 736 }, { "epoch": 0.3, "learning_rate": 4.826721722459814e-05, "loss": 0.8519, "step": 738 }, { "epoch": 0.3, "learning_rate": 4.825247013714791e-05, "loss": 0.8499, "step": 740 }, { "epoch": 0.31, "learning_rate": 4.823772304969768e-05, "loss": 0.851, "step": 742 }, { "epoch": 0.31, "learning_rate": 4.822297596224746e-05, "loss": 0.8685, "step": 744 }, { "epoch": 0.31, "learning_rate": 4.820822887479723e-05, "loss": 0.8846, "step": 746 }, { "epoch": 0.31, "learning_rate": 4.8193481787347e-05, "loss": 0.8518, "step": 748 }, { "epoch": 0.31, "learning_rate": 4.817873469989677e-05, "loss": 0.8471, "step": 750 }, { "epoch": 0.31, "learning_rate": 4.816398761244654e-05, "loss": 0.8832, "step": 752 }, { "epoch": 0.31, "learning_rate": 4.814924052499631e-05, "loss": 0.8588, "step": 754 }, { "epoch": 0.31, "learning_rate": 4.813449343754608e-05, "loss": 0.8577, "step": 756 }, { "epoch": 0.31, "learning_rate": 4.811974635009586e-05, "loss": 0.8579, "step": 758 }, { "epoch": 0.31, "learning_rate": 4.810499926264563e-05, "loss": 0.8324, "step": 760 }, { "epoch": 0.31, "learning_rate": 4.80902521751954e-05, "loss": 0.8126, "step": 762 }, { "epoch": 0.31, "learning_rate": 4.807550508774517e-05, "loss": 0.832, "step": 764 }, { "epoch": 0.32, "learning_rate": 4.806075800029494e-05, "loss": 0.8345, "step": 766 }, { "epoch": 0.32, "learning_rate": 4.804601091284471e-05, "loss": 0.8391, "step": 768 }, { "epoch": 0.32, "learning_rate": 4.803126382539448e-05, "loss": 0.8035, "step": 770 }, { "epoch": 0.32, "learning_rate": 4.801651673794426e-05, "loss": 0.8089, "step": 772 }, { "epoch": 0.32, "learning_rate": 4.800176965049403e-05, "loss": 0.8706, "step": 774 }, { "epoch": 0.32, "learning_rate": 4.79870225630438e-05, "loss": 0.8626, "step": 776 }, { "epoch": 0.32, "learning_rate": 4.797227547559357e-05, "loss": 0.8638, "step": 778 }, { "epoch": 0.32, "learning_rate": 4.795752838814334e-05, "loss": 0.9007, "step": 780 }, { "epoch": 0.32, "learning_rate": 4.794278130069312e-05, "loss": 0.8611, "step": 782 }, { "epoch": 0.32, "learning_rate": 4.792803421324289e-05, "loss": 0.8556, "step": 784 }, { "epoch": 0.32, "learning_rate": 4.791328712579266e-05, "loss": 0.8462, "step": 786 }, { "epoch": 0.32, "learning_rate": 4.7898540038342435e-05, "loss": 0.8515, "step": 788 }, { "epoch": 0.33, "learning_rate": 4.7883792950892206e-05, "loss": 0.8073, "step": 790 }, { "epoch": 0.33, "learning_rate": 4.7869045863441976e-05, "loss": 0.8134, "step": 792 }, { "epoch": 0.33, "learning_rate": 4.7854298775991747e-05, "loss": 0.851, "step": 794 }, { "epoch": 0.33, "learning_rate": 4.783955168854152e-05, "loss": 0.848, "step": 796 }, { "epoch": 0.33, "learning_rate": 4.782480460109129e-05, "loss": 0.8346, "step": 798 }, { "epoch": 0.33, "learning_rate": 4.781005751364106e-05, "loss": 0.8662, "step": 800 }, { "epoch": 0.33, "learning_rate": 4.779531042619083e-05, "loss": 0.8581, "step": 802 }, { "epoch": 0.33, "learning_rate": 4.7780563338740605e-05, "loss": 0.8505, "step": 804 }, { "epoch": 0.33, "learning_rate": 4.7765816251290376e-05, "loss": 0.8554, "step": 806 }, { "epoch": 0.33, "learning_rate": 4.7751069163840146e-05, "loss": 0.9062, "step": 808 }, { "epoch": 0.33, "learning_rate": 4.7736322076389916e-05, "loss": 0.8006, "step": 810 }, { "epoch": 0.33, "learning_rate": 4.7721574988939687e-05, "loss": 0.8087, "step": 812 }, { "epoch": 0.34, "learning_rate": 4.770682790148946e-05, "loss": 0.8345, "step": 814 }, { "epoch": 0.34, "learning_rate": 4.769208081403923e-05, "loss": 0.8532, "step": 816 }, { "epoch": 0.34, "learning_rate": 4.7677333726589004e-05, "loss": 0.8842, "step": 818 }, { "epoch": 0.34, "learning_rate": 4.7662586639138775e-05, "loss": 0.8885, "step": 820 }, { "epoch": 0.34, "learning_rate": 4.7647839551688545e-05, "loss": 0.856, "step": 822 }, { "epoch": 0.34, "learning_rate": 4.7633092464238316e-05, "loss": 0.8625, "step": 824 }, { "epoch": 0.34, "learning_rate": 4.7618345376788086e-05, "loss": 0.8237, "step": 826 }, { "epoch": 0.34, "learning_rate": 4.7603598289337856e-05, "loss": 0.7523, "step": 828 }, { "epoch": 0.34, "learning_rate": 4.758885120188763e-05, "loss": 0.847, "step": 830 }, { "epoch": 0.34, "learning_rate": 4.7574104114437404e-05, "loss": 0.8751, "step": 832 }, { "epoch": 0.34, "learning_rate": 4.7559357026987174e-05, "loss": 0.8276, "step": 834 }, { "epoch": 0.34, "learning_rate": 4.7544609939536945e-05, "loss": 0.8154, "step": 836 }, { "epoch": 0.35, "learning_rate": 4.7529862852086715e-05, "loss": 0.8765, "step": 838 }, { "epoch": 0.35, "learning_rate": 4.7515115764636485e-05, "loss": 0.8684, "step": 840 }, { "epoch": 0.35, "learning_rate": 4.7500368677186256e-05, "loss": 0.8158, "step": 842 }, { "epoch": 0.35, "learning_rate": 4.7485621589736026e-05, "loss": 0.8219, "step": 844 }, { "epoch": 0.35, "learning_rate": 4.74708745022858e-05, "loss": 0.8181, "step": 846 }, { "epoch": 0.35, "learning_rate": 4.7456127414835574e-05, "loss": 0.8315, "step": 848 }, { "epoch": 0.35, "learning_rate": 4.7441380327385344e-05, "loss": 0.8109, "step": 850 }, { "epoch": 0.35, "learning_rate": 4.7426633239935114e-05, "loss": 0.8479, "step": 852 }, { "epoch": 0.35, "learning_rate": 4.7411886152484885e-05, "loss": 0.8438, "step": 854 }, { "epoch": 0.35, "learning_rate": 4.7397139065034655e-05, "loss": 0.8652, "step": 856 }, { "epoch": 0.35, "learning_rate": 4.7382391977584425e-05, "loss": 0.8379, "step": 858 }, { "epoch": 0.35, "learning_rate": 4.73676448901342e-05, "loss": 0.8482, "step": 860 }, { "epoch": 0.36, "learning_rate": 4.735289780268397e-05, "loss": 0.8513, "step": 862 }, { "epoch": 0.36, "learning_rate": 4.733815071523374e-05, "loss": 0.8628, "step": 864 }, { "epoch": 0.36, "learning_rate": 4.7323403627783514e-05, "loss": 0.8107, "step": 866 }, { "epoch": 0.36, "learning_rate": 4.7308656540333284e-05, "loss": 0.8356, "step": 868 }, { "epoch": 0.36, "learning_rate": 4.7293909452883054e-05, "loss": 0.8361, "step": 870 }, { "epoch": 0.36, "learning_rate": 4.7279162365432825e-05, "loss": 0.8223, "step": 872 }, { "epoch": 0.36, "learning_rate": 4.72644152779826e-05, "loss": 0.8262, "step": 874 }, { "epoch": 0.36, "learning_rate": 4.724966819053237e-05, "loss": 0.8054, "step": 876 }, { "epoch": 0.36, "learning_rate": 4.723492110308214e-05, "loss": 0.7922, "step": 878 }, { "epoch": 0.36, "learning_rate": 4.722017401563191e-05, "loss": 0.8221, "step": 880 }, { "epoch": 0.36, "learning_rate": 4.720542692818168e-05, "loss": 0.826, "step": 882 }, { "epoch": 0.36, "learning_rate": 4.7190679840731454e-05, "loss": 0.8521, "step": 884 }, { "epoch": 0.37, "learning_rate": 4.7175932753281224e-05, "loss": 0.8767, "step": 886 }, { "epoch": 0.37, "learning_rate": 4.7161185665831e-05, "loss": 0.8165, "step": 888 }, { "epoch": 0.37, "learning_rate": 4.714643857838077e-05, "loss": 0.8441, "step": 890 }, { "epoch": 0.37, "learning_rate": 4.713169149093054e-05, "loss": 0.7979, "step": 892 }, { "epoch": 0.37, "learning_rate": 4.711694440348032e-05, "loss": 0.8034, "step": 894 }, { "epoch": 0.37, "learning_rate": 4.710219731603009e-05, "loss": 0.8003, "step": 896 }, { "epoch": 0.37, "learning_rate": 4.708745022857986e-05, "loss": 0.8737, "step": 898 }, { "epoch": 0.37, "learning_rate": 4.707270314112963e-05, "loss": 0.8413, "step": 900 }, { "epoch": 0.37, "learning_rate": 4.70579560536794e-05, "loss": 0.8639, "step": 902 }, { "epoch": 0.37, "learning_rate": 4.704320896622917e-05, "loss": 0.841, "step": 904 }, { "epoch": 0.37, "learning_rate": 4.702846187877895e-05, "loss": 0.828, "step": 906 }, { "epoch": 0.37, "learning_rate": 4.701371479132872e-05, "loss": 0.8221, "step": 908 }, { "epoch": 0.37, "learning_rate": 4.699896770387849e-05, "loss": 0.8361, "step": 910 }, { "epoch": 0.38, "learning_rate": 4.698422061642826e-05, "loss": 0.8356, "step": 912 }, { "epoch": 0.38, "learning_rate": 4.696947352897803e-05, "loss": 0.8892, "step": 914 }, { "epoch": 0.38, "learning_rate": 4.69547264415278e-05, "loss": 0.8679, "step": 916 }, { "epoch": 0.38, "learning_rate": 4.693997935407757e-05, "loss": 0.857, "step": 918 }, { "epoch": 0.38, "learning_rate": 4.692523226662735e-05, "loss": 0.8376, "step": 920 }, { "epoch": 0.38, "learning_rate": 4.691048517917712e-05, "loss": 0.7994, "step": 922 }, { "epoch": 0.38, "learning_rate": 4.689573809172689e-05, "loss": 0.8265, "step": 924 }, { "epoch": 0.38, "learning_rate": 4.688099100427666e-05, "loss": 0.8302, "step": 926 }, { "epoch": 0.38, "learning_rate": 4.686624391682643e-05, "loss": 0.8605, "step": 928 }, { "epoch": 0.38, "learning_rate": 4.68514968293762e-05, "loss": 0.8348, "step": 930 }, { "epoch": 0.38, "learning_rate": 4.683674974192597e-05, "loss": 0.9294, "step": 932 }, { "epoch": 0.38, "learning_rate": 4.682200265447575e-05, "loss": 0.8049, "step": 934 }, { "epoch": 0.39, "learning_rate": 4.680725556702552e-05, "loss": 0.8367, "step": 936 }, { "epoch": 0.39, "learning_rate": 4.679250847957529e-05, "loss": 0.8214, "step": 938 }, { "epoch": 0.39, "learning_rate": 4.677776139212506e-05, "loss": 0.8354, "step": 940 }, { "epoch": 0.39, "learning_rate": 4.676301430467483e-05, "loss": 0.7735, "step": 942 }, { "epoch": 0.39, "learning_rate": 4.67482672172246e-05, "loss": 0.8547, "step": 944 }, { "epoch": 0.39, "learning_rate": 4.673352012977437e-05, "loss": 0.9028, "step": 946 }, { "epoch": 0.39, "learning_rate": 4.6718773042324146e-05, "loss": 0.8421, "step": 948 }, { "epoch": 0.39, "learning_rate": 4.6704025954873916e-05, "loss": 0.7982, "step": 950 }, { "epoch": 0.39, "learning_rate": 4.668927886742369e-05, "loss": 0.8219, "step": 952 }, { "epoch": 0.39, "learning_rate": 4.667453177997346e-05, "loss": 0.7998, "step": 954 }, { "epoch": 0.39, "learning_rate": 4.665978469252323e-05, "loss": 0.8252, "step": 956 }, { "epoch": 0.39, "learning_rate": 4.6645037605073e-05, "loss": 0.8232, "step": 958 }, { "epoch": 0.4, "learning_rate": 4.663029051762277e-05, "loss": 0.8422, "step": 960 }, { "epoch": 0.4, "learning_rate": 4.6615543430172545e-05, "loss": 0.8727, "step": 962 }, { "epoch": 0.4, "learning_rate": 4.6600796342722316e-05, "loss": 0.8578, "step": 964 }, { "epoch": 0.4, "learning_rate": 4.6586049255272086e-05, "loss": 0.837, "step": 966 }, { "epoch": 0.4, "learning_rate": 4.6571302167821856e-05, "loss": 0.9421, "step": 968 }, { "epoch": 0.4, "learning_rate": 4.655655508037163e-05, "loss": 0.8091, "step": 970 }, { "epoch": 0.4, "learning_rate": 4.65418079929214e-05, "loss": 0.8594, "step": 972 }, { "epoch": 0.4, "learning_rate": 4.652706090547117e-05, "loss": 0.8507, "step": 974 }, { "epoch": 0.4, "learning_rate": 4.6512313818020945e-05, "loss": 0.8609, "step": 976 }, { "epoch": 0.4, "learning_rate": 4.6497566730570715e-05, "loss": 0.8813, "step": 978 }, { "epoch": 0.4, "learning_rate": 4.6482819643120485e-05, "loss": 0.8057, "step": 980 }, { "epoch": 0.4, "learning_rate": 4.6468072555670256e-05, "loss": 0.8062, "step": 982 }, { "epoch": 0.41, "learning_rate": 4.6453325468220026e-05, "loss": 0.8508, "step": 984 }, { "epoch": 0.41, "learning_rate": 4.6438578380769797e-05, "loss": 0.8332, "step": 986 }, { "epoch": 0.41, "learning_rate": 4.642383129331957e-05, "loss": 0.8184, "step": 988 }, { "epoch": 0.41, "learning_rate": 4.6409084205869344e-05, "loss": 0.8128, "step": 990 }, { "epoch": 0.41, "learning_rate": 4.6394337118419114e-05, "loss": 0.8545, "step": 992 }, { "epoch": 0.41, "learning_rate": 4.6379590030968885e-05, "loss": 0.8364, "step": 994 }, { "epoch": 0.41, "learning_rate": 4.6364842943518655e-05, "loss": 0.8842, "step": 996 }, { "epoch": 0.41, "learning_rate": 4.6350095856068426e-05, "loss": 0.8796, "step": 998 }, { "epoch": 0.41, "learning_rate": 4.6335348768618196e-05, "loss": 0.8242, "step": 1000 }, { "epoch": 0.41, "learning_rate": 4.6320601681167966e-05, "loss": 0.8147, "step": 1002 }, { "epoch": 0.41, "learning_rate": 4.6305854593717743e-05, "loss": 0.8647, "step": 1004 }, { "epoch": 0.41, "learning_rate": 4.6291107506267514e-05, "loss": 0.8221, "step": 1006 }, { "epoch": 0.42, "learning_rate": 4.627636041881729e-05, "loss": 0.846, "step": 1008 }, { "epoch": 0.42, "learning_rate": 4.626161333136706e-05, "loss": 0.8636, "step": 1010 }, { "epoch": 0.42, "learning_rate": 4.624686624391683e-05, "loss": 0.8473, "step": 1012 }, { "epoch": 0.42, "learning_rate": 4.62321191564666e-05, "loss": 0.8087, "step": 1014 }, { "epoch": 0.42, "learning_rate": 4.621737206901637e-05, "loss": 0.8669, "step": 1016 }, { "epoch": 0.42, "learning_rate": 4.620262498156614e-05, "loss": 0.8327, "step": 1018 }, { "epoch": 0.42, "learning_rate": 4.618787789411591e-05, "loss": 0.8566, "step": 1020 }, { "epoch": 0.42, "learning_rate": 4.617313080666569e-05, "loss": 0.808, "step": 1022 }, { "epoch": 0.42, "learning_rate": 4.615838371921546e-05, "loss": 0.8282, "step": 1024 }, { "epoch": 0.42, "learning_rate": 4.614363663176523e-05, "loss": 0.8135, "step": 1026 }, { "epoch": 0.42, "learning_rate": 4.6128889544315e-05, "loss": 0.8184, "step": 1028 }, { "epoch": 0.42, "learning_rate": 4.611414245686477e-05, "loss": 0.8127, "step": 1030 }, { "epoch": 0.43, "learning_rate": 4.609939536941454e-05, "loss": 0.8339, "step": 1032 }, { "epoch": 0.43, "learning_rate": 4.608464828196431e-05, "loss": 0.8238, "step": 1034 }, { "epoch": 0.43, "learning_rate": 4.606990119451409e-05, "loss": 0.8214, "step": 1036 }, { "epoch": 0.43, "learning_rate": 4.605515410706386e-05, "loss": 0.8232, "step": 1038 }, { "epoch": 0.43, "learning_rate": 4.604040701961363e-05, "loss": 0.7824, "step": 1040 }, { "epoch": 0.43, "learning_rate": 4.60256599321634e-05, "loss": 0.8186, "step": 1042 }, { "epoch": 0.43, "learning_rate": 4.601091284471317e-05, "loss": 0.8536, "step": 1044 }, { "epoch": 0.43, "learning_rate": 4.599616575726294e-05, "loss": 0.8062, "step": 1046 }, { "epoch": 0.43, "learning_rate": 4.598141866981271e-05, "loss": 0.8714, "step": 1048 }, { "epoch": 0.43, "learning_rate": 4.596667158236249e-05, "loss": 0.8435, "step": 1050 }, { "epoch": 0.43, "learning_rate": 4.595192449491226e-05, "loss": 0.8228, "step": 1052 }, { "epoch": 0.43, "learning_rate": 4.593717740746203e-05, "loss": 0.7986, "step": 1054 }, { "epoch": 0.44, "learning_rate": 4.59224303200118e-05, "loss": 0.8247, "step": 1056 }, { "epoch": 0.44, "learning_rate": 4.590768323256157e-05, "loss": 0.8276, "step": 1058 }, { "epoch": 0.44, "learning_rate": 4.589293614511134e-05, "loss": 0.8052, "step": 1060 }, { "epoch": 0.44, "learning_rate": 4.587818905766111e-05, "loss": 0.8156, "step": 1062 }, { "epoch": 0.44, "learning_rate": 4.586344197021089e-05, "loss": 0.8256, "step": 1064 }, { "epoch": 0.44, "learning_rate": 4.584869488276066e-05, "loss": 0.8305, "step": 1066 }, { "epoch": 0.44, "learning_rate": 4.583394779531043e-05, "loss": 0.7752, "step": 1068 }, { "epoch": 0.44, "learning_rate": 4.58192007078602e-05, "loss": 0.8572, "step": 1070 }, { "epoch": 0.44, "learning_rate": 4.580445362040997e-05, "loss": 0.8221, "step": 1072 }, { "epoch": 0.44, "learning_rate": 4.578970653295974e-05, "loss": 0.8453, "step": 1074 }, { "epoch": 0.44, "learning_rate": 4.577495944550951e-05, "loss": 0.8213, "step": 1076 }, { "epoch": 0.44, "learning_rate": 4.576021235805929e-05, "loss": 0.8088, "step": 1078 }, { "epoch": 0.44, "learning_rate": 4.574546527060906e-05, "loss": 0.8339, "step": 1080 }, { "epoch": 0.45, "learning_rate": 4.573071818315883e-05, "loss": 0.8472, "step": 1082 }, { "epoch": 0.45, "learning_rate": 4.57159710957086e-05, "loss": 0.8698, "step": 1084 }, { "epoch": 0.45, "learning_rate": 4.570122400825837e-05, "loss": 0.7912, "step": 1086 }, { "epoch": 0.45, "learning_rate": 4.568647692080814e-05, "loss": 0.8813, "step": 1088 }, { "epoch": 0.45, "learning_rate": 4.567172983335791e-05, "loss": 0.7854, "step": 1090 }, { "epoch": 0.45, "learning_rate": 4.565698274590769e-05, "loss": 0.842, "step": 1092 }, { "epoch": 0.45, "learning_rate": 4.564223565845746e-05, "loss": 0.8375, "step": 1094 }, { "epoch": 0.45, "learning_rate": 4.562748857100723e-05, "loss": 0.8403, "step": 1096 }, { "epoch": 0.45, "learning_rate": 4.5612741483557e-05, "loss": 0.8051, "step": 1098 }, { "epoch": 0.45, "learning_rate": 4.559799439610677e-05, "loss": 0.9037, "step": 1100 }, { "epoch": 0.45, "learning_rate": 4.558324730865654e-05, "loss": 0.8582, "step": 1102 }, { "epoch": 0.45, "learning_rate": 4.556850022120631e-05, "loss": 0.8324, "step": 1104 }, { "epoch": 0.46, "learning_rate": 4.5553753133756086e-05, "loss": 0.84, "step": 1106 }, { "epoch": 0.46, "learning_rate": 4.553900604630586e-05, "loss": 0.8334, "step": 1108 }, { "epoch": 0.46, "learning_rate": 4.552425895885563e-05, "loss": 0.8086, "step": 1110 }, { "epoch": 0.46, "learning_rate": 4.55095118714054e-05, "loss": 0.8272, "step": 1112 }, { "epoch": 0.46, "learning_rate": 4.549476478395517e-05, "loss": 0.8429, "step": 1114 }, { "epoch": 0.46, "learning_rate": 4.5480017696504945e-05, "loss": 0.7907, "step": 1116 }, { "epoch": 0.46, "learning_rate": 4.5465270609054715e-05, "loss": 0.9171, "step": 1118 }, { "epoch": 0.46, "learning_rate": 4.5450523521604486e-05, "loss": 0.8435, "step": 1120 }, { "epoch": 0.46, "learning_rate": 4.5435776434154256e-05, "loss": 0.8191, "step": 1122 }, { "epoch": 0.46, "learning_rate": 4.542102934670403e-05, "loss": 0.8398, "step": 1124 }, { "epoch": 0.46, "learning_rate": 4.5406282259253803e-05, "loss": 0.8147, "step": 1126 }, { "epoch": 0.46, "learning_rate": 4.5391535171803574e-05, "loss": 0.8307, "step": 1128 }, { "epoch": 0.47, "learning_rate": 4.5376788084353344e-05, "loss": 0.7734, "step": 1130 }, { "epoch": 0.47, "learning_rate": 4.5362040996903115e-05, "loss": 0.8031, "step": 1132 }, { "epoch": 0.47, "learning_rate": 4.5347293909452885e-05, "loss": 0.8328, "step": 1134 }, { "epoch": 0.47, "learning_rate": 4.5332546822002655e-05, "loss": 0.8652, "step": 1136 }, { "epoch": 0.47, "learning_rate": 4.531779973455243e-05, "loss": 0.88, "step": 1138 }, { "epoch": 0.47, "learning_rate": 4.53030526471022e-05, "loss": 0.8037, "step": 1140 }, { "epoch": 0.47, "learning_rate": 4.528830555965197e-05, "loss": 0.8142, "step": 1142 }, { "epoch": 0.47, "learning_rate": 4.5273558472201744e-05, "loss": 0.8702, "step": 1144 }, { "epoch": 0.47, "learning_rate": 4.5258811384751514e-05, "loss": 0.8482, "step": 1146 }, { "epoch": 0.47, "learning_rate": 4.5244064297301284e-05, "loss": 0.7982, "step": 1148 }, { "epoch": 0.47, "learning_rate": 4.5229317209851055e-05, "loss": 0.807, "step": 1150 }, { "epoch": 0.47, "learning_rate": 4.521457012240083e-05, "loss": 0.834, "step": 1152 }, { "epoch": 0.48, "learning_rate": 4.51998230349506e-05, "loss": 0.7921, "step": 1154 }, { "epoch": 0.48, "learning_rate": 4.518507594750037e-05, "loss": 0.8493, "step": 1156 }, { "epoch": 0.48, "learning_rate": 4.517032886005014e-05, "loss": 0.8312, "step": 1158 }, { "epoch": 0.48, "learning_rate": 4.515558177259991e-05, "loss": 0.8723, "step": 1160 }, { "epoch": 0.48, "learning_rate": 4.5140834685149684e-05, "loss": 0.8187, "step": 1162 }, { "epoch": 0.48, "learning_rate": 4.5126087597699454e-05, "loss": 0.8091, "step": 1164 }, { "epoch": 0.48, "learning_rate": 4.511134051024923e-05, "loss": 0.8081, "step": 1166 }, { "epoch": 0.48, "learning_rate": 4.5096593422799e-05, "loss": 0.8407, "step": 1168 }, { "epoch": 0.48, "learning_rate": 4.508184633534877e-05, "loss": 0.8222, "step": 1170 }, { "epoch": 0.48, "learning_rate": 4.506709924789854e-05, "loss": 0.7976, "step": 1172 }, { "epoch": 0.48, "learning_rate": 4.505235216044831e-05, "loss": 0.8155, "step": 1174 }, { "epoch": 0.48, "learning_rate": 4.503760507299808e-05, "loss": 0.8629, "step": 1176 }, { "epoch": 0.49, "learning_rate": 4.502285798554785e-05, "loss": 0.8829, "step": 1178 }, { "epoch": 0.49, "learning_rate": 4.500811089809763e-05, "loss": 0.8536, "step": 1180 }, { "epoch": 0.49, "learning_rate": 4.49933638106474e-05, "loss": 0.8669, "step": 1182 }, { "epoch": 0.49, "learning_rate": 4.497861672319717e-05, "loss": 0.8298, "step": 1184 }, { "epoch": 0.49, "learning_rate": 4.496386963574694e-05, "loss": 0.8221, "step": 1186 }, { "epoch": 0.49, "learning_rate": 4.494912254829671e-05, "loss": 0.8372, "step": 1188 }, { "epoch": 0.49, "learning_rate": 4.493437546084648e-05, "loss": 0.8382, "step": 1190 }, { "epoch": 0.49, "learning_rate": 4.491962837339625e-05, "loss": 0.8355, "step": 1192 }, { "epoch": 0.49, "learning_rate": 4.490488128594603e-05, "loss": 0.8213, "step": 1194 }, { "epoch": 0.49, "learning_rate": 4.48901341984958e-05, "loss": 0.8561, "step": 1196 }, { "epoch": 0.49, "learning_rate": 4.487538711104557e-05, "loss": 0.7984, "step": 1198 }, { "epoch": 0.49, "learning_rate": 4.486064002359534e-05, "loss": 0.826, "step": 1200 }, { "epoch": 0.5, "learning_rate": 4.484589293614511e-05, "loss": 0.8236, "step": 1202 }, { "epoch": 0.5, "learning_rate": 4.483114584869488e-05, "loss": 0.8138, "step": 1204 }, { "epoch": 0.5, "learning_rate": 4.481639876124465e-05, "loss": 0.8331, "step": 1206 }, { "epoch": 0.5, "learning_rate": 4.480165167379443e-05, "loss": 0.873, "step": 1208 }, { "epoch": 0.5, "learning_rate": 4.47869045863442e-05, "loss": 0.8731, "step": 1210 }, { "epoch": 0.5, "learning_rate": 4.477215749889397e-05, "loss": 0.8127, "step": 1212 }, { "epoch": 0.5, "learning_rate": 4.475741041144374e-05, "loss": 0.8225, "step": 1214 }, { "epoch": 0.5, "learning_rate": 4.474266332399351e-05, "loss": 0.8282, "step": 1216 }, { "epoch": 0.5, "learning_rate": 4.472791623654328e-05, "loss": 0.8676, "step": 1218 }, { "epoch": 0.5, "learning_rate": 4.471316914909305e-05, "loss": 0.822, "step": 1220 }, { "epoch": 0.5, "learning_rate": 4.469842206164282e-05, "loss": 0.8263, "step": 1222 }, { "epoch": 0.5, "learning_rate": 4.46836749741926e-05, "loss": 0.8309, "step": 1224 }, { "epoch": 0.51, "learning_rate": 4.4668927886742376e-05, "loss": 0.8127, "step": 1226 }, { "epoch": 0.51, "learning_rate": 4.4654180799292146e-05, "loss": 0.8313, "step": 1228 }, { "epoch": 0.51, "learning_rate": 4.463943371184192e-05, "loss": 0.8109, "step": 1230 }, { "epoch": 0.51, "learning_rate": 4.462468662439169e-05, "loss": 0.8252, "step": 1232 }, { "epoch": 0.51, "learning_rate": 4.460993953694146e-05, "loss": 0.8146, "step": 1234 }, { "epoch": 0.51, "learning_rate": 4.459519244949123e-05, "loss": 0.8171, "step": 1236 }, { "epoch": 0.51, "learning_rate": 4.4580445362041e-05, "loss": 0.8425, "step": 1238 }, { "epoch": 0.51, "learning_rate": 4.4565698274590775e-05, "loss": 0.8072, "step": 1240 }, { "epoch": 0.51, "learning_rate": 4.4550951187140546e-05, "loss": 0.8385, "step": 1242 }, { "epoch": 0.51, "learning_rate": 4.4536204099690316e-05, "loss": 0.8169, "step": 1244 }, { "epoch": 0.51, "learning_rate": 4.4521457012240086e-05, "loss": 0.8715, "step": 1246 }, { "epoch": 0.51, "learning_rate": 4.450670992478986e-05, "loss": 0.8152, "step": 1248 }, { "epoch": 0.52, "learning_rate": 4.449196283733963e-05, "loss": 0.7933, "step": 1250 }, { "epoch": 0.52, "learning_rate": 4.44772157498894e-05, "loss": 0.8323, "step": 1252 }, { "epoch": 0.52, "learning_rate": 4.4462468662439175e-05, "loss": 0.8309, "step": 1254 }, { "epoch": 0.52, "learning_rate": 4.4447721574988945e-05, "loss": 0.8761, "step": 1256 }, { "epoch": 0.52, "learning_rate": 4.4432974487538715e-05, "loss": 0.8615, "step": 1258 }, { "epoch": 0.52, "learning_rate": 4.4418227400088486e-05, "loss": 0.8257, "step": 1260 }, { "epoch": 0.52, "learning_rate": 4.4403480312638256e-05, "loss": 0.8479, "step": 1262 }, { "epoch": 0.52, "learning_rate": 4.4388733225188027e-05, "loss": 0.8621, "step": 1264 }, { "epoch": 0.52, "learning_rate": 4.43739861377378e-05, "loss": 0.8419, "step": 1266 }, { "epoch": 0.52, "learning_rate": 4.4359239050287574e-05, "loss": 0.8843, "step": 1268 }, { "epoch": 0.52, "learning_rate": 4.4344491962837344e-05, "loss": 0.8091, "step": 1270 }, { "epoch": 0.52, "learning_rate": 4.4329744875387115e-05, "loss": 0.8366, "step": 1272 }, { "epoch": 0.52, "learning_rate": 4.4314997787936885e-05, "loss": 0.8674, "step": 1274 }, { "epoch": 0.53, "learning_rate": 4.4300250700486655e-05, "loss": 0.7962, "step": 1276 }, { "epoch": 0.53, "learning_rate": 4.4285503613036426e-05, "loss": 0.8376, "step": 1278 }, { "epoch": 0.53, "learning_rate": 4.4270756525586196e-05, "loss": 0.807, "step": 1280 }, { "epoch": 0.53, "learning_rate": 4.425600943813597e-05, "loss": 0.8511, "step": 1282 }, { "epoch": 0.53, "learning_rate": 4.4241262350685744e-05, "loss": 0.8021, "step": 1284 }, { "epoch": 0.53, "learning_rate": 4.4226515263235514e-05, "loss": 0.8676, "step": 1286 }, { "epoch": 0.53, "learning_rate": 4.4211768175785284e-05, "loss": 0.826, "step": 1288 }, { "epoch": 0.53, "learning_rate": 4.4197021088335055e-05, "loss": 0.7824, "step": 1290 }, { "epoch": 0.53, "learning_rate": 4.4182274000884825e-05, "loss": 0.8464, "step": 1292 }, { "epoch": 0.53, "learning_rate": 4.4167526913434596e-05, "loss": 0.8568, "step": 1294 }, { "epoch": 0.53, "learning_rate": 4.415277982598437e-05, "loss": 0.8431, "step": 1296 }, { "epoch": 0.53, "learning_rate": 4.413803273853414e-05, "loss": 0.8588, "step": 1298 }, { "epoch": 0.54, "learning_rate": 4.4123285651083913e-05, "loss": 0.7948, "step": 1300 }, { "epoch": 0.54, "learning_rate": 4.4108538563633684e-05, "loss": 0.8336, "step": 1302 }, { "epoch": 0.54, "learning_rate": 4.4093791476183454e-05, "loss": 0.8257, "step": 1304 }, { "epoch": 0.54, "learning_rate": 4.4079044388733225e-05, "loss": 0.8376, "step": 1306 }, { "epoch": 0.54, "learning_rate": 4.4064297301282995e-05, "loss": 0.8444, "step": 1308 }, { "epoch": 0.54, "learning_rate": 4.404955021383277e-05, "loss": 0.787, "step": 1310 }, { "epoch": 0.54, "learning_rate": 4.403480312638254e-05, "loss": 0.8364, "step": 1312 }, { "epoch": 0.54, "learning_rate": 4.402005603893231e-05, "loss": 0.8233, "step": 1314 }, { "epoch": 0.54, "learning_rate": 4.400530895148208e-05, "loss": 0.8191, "step": 1316 }, { "epoch": 0.54, "learning_rate": 4.3990561864031854e-05, "loss": 0.8113, "step": 1318 }, { "epoch": 0.54, "learning_rate": 4.3975814776581624e-05, "loss": 0.7833, "step": 1320 }, { "epoch": 0.54, "learning_rate": 4.3961067689131394e-05, "loss": 0.7891, "step": 1322 }, { "epoch": 0.55, "learning_rate": 4.3946320601681165e-05, "loss": 0.8593, "step": 1324 }, { "epoch": 0.55, "learning_rate": 4.393157351423094e-05, "loss": 0.8615, "step": 1326 }, { "epoch": 0.55, "learning_rate": 4.391682642678071e-05, "loss": 0.772, "step": 1328 }, { "epoch": 0.55, "learning_rate": 4.390207933933048e-05, "loss": 0.7948, "step": 1330 }, { "epoch": 0.55, "learning_rate": 4.388733225188025e-05, "loss": 0.8139, "step": 1332 }, { "epoch": 0.55, "learning_rate": 4.387258516443002e-05, "loss": 0.8183, "step": 1334 }, { "epoch": 0.55, "learning_rate": 4.3857838076979794e-05, "loss": 0.8156, "step": 1336 }, { "epoch": 0.55, "learning_rate": 4.384309098952957e-05, "loss": 0.8836, "step": 1338 }, { "epoch": 0.55, "learning_rate": 4.382834390207934e-05, "loss": 0.8565, "step": 1340 }, { "epoch": 0.55, "learning_rate": 4.381359681462912e-05, "loss": 0.8458, "step": 1342 }, { "epoch": 0.55, "learning_rate": 4.379884972717889e-05, "loss": 0.8234, "step": 1344 }, { "epoch": 0.55, "learning_rate": 4.378410263972866e-05, "loss": 0.8144, "step": 1346 }, { "epoch": 0.56, "learning_rate": 4.376935555227843e-05, "loss": 0.852, "step": 1348 }, { "epoch": 0.56, "learning_rate": 4.37546084648282e-05, "loss": 0.8161, "step": 1350 }, { "epoch": 0.56, "learning_rate": 4.373986137737797e-05, "loss": 0.838, "step": 1352 }, { "epoch": 0.56, "learning_rate": 4.372511428992774e-05, "loss": 0.8771, "step": 1354 }, { "epoch": 0.56, "learning_rate": 4.371036720247752e-05, "loss": 0.8506, "step": 1356 }, { "epoch": 0.56, "learning_rate": 4.369562011502729e-05, "loss": 0.8154, "step": 1358 }, { "epoch": 0.56, "learning_rate": 4.368087302757706e-05, "loss": 0.8098, "step": 1360 }, { "epoch": 0.56, "learning_rate": 4.366612594012683e-05, "loss": 0.8262, "step": 1362 }, { "epoch": 0.56, "learning_rate": 4.36513788526766e-05, "loss": 0.7663, "step": 1364 }, { "epoch": 0.56, "learning_rate": 4.363663176522637e-05, "loss": 0.8635, "step": 1366 }, { "epoch": 0.56, "learning_rate": 4.362188467777614e-05, "loss": 0.8576, "step": 1368 }, { "epoch": 0.56, "learning_rate": 4.360713759032592e-05, "loss": 0.7925, "step": 1370 }, { "epoch": 0.57, "learning_rate": 4.359239050287569e-05, "loss": 0.7896, "step": 1372 }, { "epoch": 0.57, "learning_rate": 4.357764341542546e-05, "loss": 0.8537, "step": 1374 }, { "epoch": 0.57, "learning_rate": 4.356289632797523e-05, "loss": 0.8585, "step": 1376 }, { "epoch": 0.57, "learning_rate": 4.3548149240525e-05, "loss": 0.8166, "step": 1378 }, { "epoch": 0.57, "learning_rate": 4.353340215307477e-05, "loss": 0.9279, "step": 1380 }, { "epoch": 0.57, "learning_rate": 4.351865506562454e-05, "loss": 0.8345, "step": 1382 }, { "epoch": 0.57, "learning_rate": 4.3503907978174316e-05, "loss": 0.8519, "step": 1384 }, { "epoch": 0.57, "learning_rate": 4.348916089072409e-05, "loss": 0.8501, "step": 1386 }, { "epoch": 0.57, "learning_rate": 4.347441380327386e-05, "loss": 0.8254, "step": 1388 }, { "epoch": 0.57, "learning_rate": 4.345966671582363e-05, "loss": 0.8241, "step": 1390 }, { "epoch": 0.57, "learning_rate": 4.34449196283734e-05, "loss": 0.7967, "step": 1392 }, { "epoch": 0.57, "learning_rate": 4.343017254092317e-05, "loss": 0.8526, "step": 1394 }, { "epoch": 0.58, "learning_rate": 4.341542545347294e-05, "loss": 0.8325, "step": 1396 }, { "epoch": 0.58, "learning_rate": 4.3400678366022716e-05, "loss": 0.8454, "step": 1398 }, { "epoch": 0.58, "learning_rate": 4.3385931278572486e-05, "loss": 0.7675, "step": 1400 }, { "epoch": 0.58, "learning_rate": 4.3371184191122256e-05, "loss": 0.8005, "step": 1402 }, { "epoch": 0.58, "learning_rate": 4.335643710367203e-05, "loss": 0.806, "step": 1404 }, { "epoch": 0.58, "learning_rate": 4.33416900162218e-05, "loss": 0.7897, "step": 1406 }, { "epoch": 0.58, "learning_rate": 4.332694292877157e-05, "loss": 0.8342, "step": 1408 }, { "epoch": 0.58, "learning_rate": 4.331219584132134e-05, "loss": 0.8208, "step": 1410 }, { "epoch": 0.58, "learning_rate": 4.329744875387111e-05, "loss": 0.8533, "step": 1412 }, { "epoch": 0.58, "learning_rate": 4.3282701666420885e-05, "loss": 0.802, "step": 1414 }, { "epoch": 0.58, "learning_rate": 4.3267954578970656e-05, "loss": 0.8007, "step": 1416 }, { "epoch": 0.58, "learning_rate": 4.3253207491520426e-05, "loss": 0.8142, "step": 1418 }, { "epoch": 0.59, "learning_rate": 4.3238460404070196e-05, "loss": 0.8091, "step": 1420 }, { "epoch": 0.59, "learning_rate": 4.322371331661997e-05, "loss": 0.8296, "step": 1422 }, { "epoch": 0.59, "learning_rate": 4.320896622916974e-05, "loss": 0.8258, "step": 1424 }, { "epoch": 0.59, "learning_rate": 4.319421914171951e-05, "loss": 0.841, "step": 1426 }, { "epoch": 0.59, "learning_rate": 4.3179472054269285e-05, "loss": 0.8215, "step": 1428 }, { "epoch": 0.59, "learning_rate": 4.3164724966819055e-05, "loss": 0.8278, "step": 1430 }, { "epoch": 0.59, "learning_rate": 4.3149977879368825e-05, "loss": 0.85, "step": 1432 }, { "epoch": 0.59, "learning_rate": 4.3135230791918596e-05, "loss": 0.8618, "step": 1434 }, { "epoch": 0.59, "learning_rate": 4.3120483704468366e-05, "loss": 0.788, "step": 1436 }, { "epoch": 0.59, "learning_rate": 4.3105736617018136e-05, "loss": 0.793, "step": 1438 }, { "epoch": 0.59, "learning_rate": 4.309098952956791e-05, "loss": 0.8573, "step": 1440 }, { "epoch": 0.59, "learning_rate": 4.3076242442117684e-05, "loss": 0.8381, "step": 1442 }, { "epoch": 0.59, "learning_rate": 4.3061495354667454e-05, "loss": 0.8146, "step": 1444 }, { "epoch": 0.6, "learning_rate": 4.3046748267217225e-05, "loss": 0.7729, "step": 1446 }, { "epoch": 0.6, "learning_rate": 4.3032001179767e-05, "loss": 0.8063, "step": 1448 }, { "epoch": 0.6, "learning_rate": 4.301725409231677e-05, "loss": 0.8212, "step": 1450 }, { "epoch": 0.6, "learning_rate": 4.300250700486654e-05, "loss": 0.7833, "step": 1452 }, { "epoch": 0.6, "learning_rate": 4.298775991741631e-05, "loss": 0.8, "step": 1454 }, { "epoch": 0.6, "learning_rate": 4.297301282996608e-05, "loss": 0.8289, "step": 1456 }, { "epoch": 0.6, "learning_rate": 4.295826574251586e-05, "loss": 0.8185, "step": 1458 }, { "epoch": 0.6, "learning_rate": 4.294351865506563e-05, "loss": 0.7993, "step": 1460 }, { "epoch": 0.6, "learning_rate": 4.29287715676154e-05, "loss": 0.8207, "step": 1462 }, { "epoch": 0.6, "learning_rate": 4.291402448016517e-05, "loss": 0.8709, "step": 1464 }, { "epoch": 0.6, "learning_rate": 4.289927739271494e-05, "loss": 0.7985, "step": 1466 }, { "epoch": 0.6, "learning_rate": 4.288453030526471e-05, "loss": 0.8283, "step": 1468 }, { "epoch": 0.61, "learning_rate": 4.286978321781448e-05, "loss": 0.8379, "step": 1470 }, { "epoch": 0.61, "learning_rate": 4.285503613036426e-05, "loss": 0.8803, "step": 1472 }, { "epoch": 0.61, "learning_rate": 4.284028904291403e-05, "loss": 0.8251, "step": 1474 }, { "epoch": 0.61, "learning_rate": 4.28255419554638e-05, "loss": 0.8228, "step": 1476 }, { "epoch": 0.61, "learning_rate": 4.281079486801357e-05, "loss": 0.7585, "step": 1478 }, { "epoch": 0.61, "learning_rate": 4.279604778056334e-05, "loss": 0.8203, "step": 1480 }, { "epoch": 0.61, "learning_rate": 4.278130069311311e-05, "loss": 0.8617, "step": 1482 }, { "epoch": 0.61, "learning_rate": 4.276655360566288e-05, "loss": 0.8306, "step": 1484 }, { "epoch": 0.61, "learning_rate": 4.275180651821266e-05, "loss": 0.827, "step": 1486 }, { "epoch": 0.61, "learning_rate": 4.273705943076243e-05, "loss": 0.8528, "step": 1488 }, { "epoch": 0.61, "learning_rate": 4.27223123433122e-05, "loss": 0.8362, "step": 1490 }, { "epoch": 0.61, "learning_rate": 4.270756525586197e-05, "loss": 0.8442, "step": 1492 }, { "epoch": 0.62, "learning_rate": 4.269281816841174e-05, "loss": 0.8021, "step": 1494 }, { "epoch": 0.62, "learning_rate": 4.267807108096151e-05, "loss": 0.8326, "step": 1496 }, { "epoch": 0.62, "learning_rate": 4.266332399351128e-05, "loss": 0.8301, "step": 1498 }, { "epoch": 0.62, "learning_rate": 4.264857690606106e-05, "loss": 0.8409, "step": 1500 }, { "epoch": 0.62, "learning_rate": 4.263382981861083e-05, "loss": 0.7758, "step": 1502 }, { "epoch": 0.62, "learning_rate": 4.26190827311606e-05, "loss": 0.8291, "step": 1504 }, { "epoch": 0.62, "learning_rate": 4.260433564371037e-05, "loss": 0.8028, "step": 1506 }, { "epoch": 0.62, "learning_rate": 4.258958855626014e-05, "loss": 0.8122, "step": 1508 }, { "epoch": 0.62, "learning_rate": 4.257484146880991e-05, "loss": 0.8234, "step": 1510 }, { "epoch": 0.62, "learning_rate": 4.256009438135968e-05, "loss": 0.8213, "step": 1512 }, { "epoch": 0.62, "learning_rate": 4.254534729390945e-05, "loss": 0.8344, "step": 1514 }, { "epoch": 0.62, "learning_rate": 4.253060020645923e-05, "loss": 0.8247, "step": 1516 }, { "epoch": 0.63, "learning_rate": 4.2515853119009e-05, "loss": 0.8342, "step": 1518 }, { "epoch": 0.63, "learning_rate": 4.250110603155877e-05, "loss": 0.8142, "step": 1520 }, { "epoch": 0.63, "learning_rate": 4.248635894410854e-05, "loss": 0.785, "step": 1522 }, { "epoch": 0.63, "learning_rate": 4.247161185665831e-05, "loss": 0.7957, "step": 1524 }, { "epoch": 0.63, "learning_rate": 4.245686476920808e-05, "loss": 0.8078, "step": 1526 }, { "epoch": 0.63, "learning_rate": 4.244211768175785e-05, "loss": 0.7706, "step": 1528 }, { "epoch": 0.63, "learning_rate": 4.242737059430763e-05, "loss": 0.7852, "step": 1530 }, { "epoch": 0.63, "learning_rate": 4.24126235068574e-05, "loss": 0.8091, "step": 1532 }, { "epoch": 0.63, "learning_rate": 4.239787641940717e-05, "loss": 0.7481, "step": 1534 }, { "epoch": 0.63, "learning_rate": 4.238312933195694e-05, "loss": 0.7796, "step": 1536 }, { "epoch": 0.63, "learning_rate": 4.236838224450671e-05, "loss": 0.7684, "step": 1538 }, { "epoch": 0.63, "learning_rate": 4.235363515705648e-05, "loss": 0.801, "step": 1540 }, { "epoch": 0.64, "learning_rate": 4.233888806960625e-05, "loss": 0.7736, "step": 1542 }, { "epoch": 0.64, "learning_rate": 4.232414098215603e-05, "loss": 0.8029, "step": 1544 }, { "epoch": 0.64, "learning_rate": 4.23093938947058e-05, "loss": 0.8057, "step": 1546 }, { "epoch": 0.64, "learning_rate": 4.229464680725557e-05, "loss": 0.7621, "step": 1548 }, { "epoch": 0.64, "learning_rate": 4.227989971980534e-05, "loss": 0.8406, "step": 1550 }, { "epoch": 0.64, "learning_rate": 4.226515263235511e-05, "loss": 0.7833, "step": 1552 }, { "epoch": 0.64, "learning_rate": 4.225040554490488e-05, "loss": 0.7774, "step": 1554 }, { "epoch": 0.64, "learning_rate": 4.223565845745465e-05, "loss": 0.733, "step": 1556 }, { "epoch": 0.64, "learning_rate": 4.2220911370004426e-05, "loss": 0.8172, "step": 1558 }, { "epoch": 0.64, "learning_rate": 4.22061642825542e-05, "loss": 0.7769, "step": 1560 }, { "epoch": 0.64, "learning_rate": 4.2191417195103974e-05, "loss": 0.7938, "step": 1562 }, { "epoch": 0.64, "learning_rate": 4.2176670107653744e-05, "loss": 0.8023, "step": 1564 }, { "epoch": 0.65, "learning_rate": 4.2161923020203514e-05, "loss": 0.7959, "step": 1566 }, { "epoch": 0.65, "learning_rate": 4.2147175932753285e-05, "loss": 0.8241, "step": 1568 }, { "epoch": 0.65, "learning_rate": 4.2132428845303055e-05, "loss": 0.7865, "step": 1570 }, { "epoch": 0.65, "learning_rate": 4.2117681757852826e-05, "loss": 0.715, "step": 1572 }, { "epoch": 0.65, "learning_rate": 4.21029346704026e-05, "loss": 0.7328, "step": 1574 }, { "epoch": 0.65, "learning_rate": 4.208818758295237e-05, "loss": 0.8034, "step": 1576 }, { "epoch": 0.65, "learning_rate": 4.2073440495502143e-05, "loss": 0.7578, "step": 1578 }, { "epoch": 0.65, "learning_rate": 4.2058693408051914e-05, "loss": 0.8372, "step": 1580 }, { "epoch": 0.65, "learning_rate": 4.2043946320601684e-05, "loss": 0.7244, "step": 1582 }, { "epoch": 0.65, "learning_rate": 4.2029199233151455e-05, "loss": 0.7408, "step": 1584 }, { "epoch": 0.65, "learning_rate": 4.2014452145701225e-05, "loss": 0.7625, "step": 1586 }, { "epoch": 0.65, "learning_rate": 4.1999705058251e-05, "loss": 0.7836, "step": 1588 }, { "epoch": 0.66, "learning_rate": 4.198495797080077e-05, "loss": 0.748, "step": 1590 }, { "epoch": 0.66, "learning_rate": 4.197021088335054e-05, "loss": 0.7324, "step": 1592 }, { "epoch": 0.66, "learning_rate": 4.195546379590031e-05, "loss": 0.7164, "step": 1594 }, { "epoch": 0.66, "learning_rate": 4.1940716708450083e-05, "loss": 0.7824, "step": 1596 }, { "epoch": 0.66, "learning_rate": 4.1925969620999854e-05, "loss": 0.7625, "step": 1598 }, { "epoch": 0.66, "learning_rate": 4.1911222533549624e-05, "loss": 0.787, "step": 1600 }, { "epoch": 0.66, "learning_rate": 4.18964754460994e-05, "loss": 0.761, "step": 1602 }, { "epoch": 0.66, "learning_rate": 4.188172835864917e-05, "loss": 0.7429, "step": 1604 }, { "epoch": 0.66, "learning_rate": 4.186698127119894e-05, "loss": 0.7039, "step": 1606 }, { "epoch": 0.66, "learning_rate": 4.185223418374871e-05, "loss": 0.7159, "step": 1608 }, { "epoch": 0.66, "learning_rate": 4.183748709629848e-05, "loss": 0.7142, "step": 1610 }, { "epoch": 0.66, "learning_rate": 4.182274000884825e-05, "loss": 0.74, "step": 1612 }, { "epoch": 0.67, "learning_rate": 4.1807992921398024e-05, "loss": 0.6937, "step": 1614 }, { "epoch": 0.67, "learning_rate": 4.1793245833947794e-05, "loss": 0.7695, "step": 1616 }, { "epoch": 0.67, "learning_rate": 4.177849874649757e-05, "loss": 0.7314, "step": 1618 }, { "epoch": 0.67, "learning_rate": 4.176375165904734e-05, "loss": 0.6929, "step": 1620 }, { "epoch": 0.67, "learning_rate": 4.174900457159711e-05, "loss": 0.7062, "step": 1622 }, { "epoch": 0.67, "learning_rate": 4.173425748414688e-05, "loss": 0.6904, "step": 1624 }, { "epoch": 0.67, "learning_rate": 4.171951039669665e-05, "loss": 0.6807, "step": 1626 }, { "epoch": 0.67, "learning_rate": 4.170476330924642e-05, "loss": 0.675, "step": 1628 }, { "epoch": 0.67, "learning_rate": 4.169001622179619e-05, "loss": 0.6715, "step": 1630 }, { "epoch": 0.67, "learning_rate": 4.167526913434597e-05, "loss": 0.6523, "step": 1632 }, { "epoch": 0.67, "learning_rate": 4.166052204689574e-05, "loss": 0.7049, "step": 1634 }, { "epoch": 0.67, "learning_rate": 4.164577495944551e-05, "loss": 0.6815, "step": 1636 }, { "epoch": 0.67, "learning_rate": 4.163102787199528e-05, "loss": 0.6788, "step": 1638 }, { "epoch": 0.68, "learning_rate": 4.161628078454505e-05, "loss": 0.7023, "step": 1640 }, { "epoch": 0.68, "learning_rate": 4.160153369709482e-05, "loss": 0.6764, "step": 1642 }, { "epoch": 0.68, "learning_rate": 4.158678660964459e-05, "loss": 0.6692, "step": 1644 }, { "epoch": 0.68, "learning_rate": 4.157203952219437e-05, "loss": 0.6781, "step": 1646 }, { "epoch": 0.68, "learning_rate": 4.155729243474414e-05, "loss": 0.6691, "step": 1648 }, { "epoch": 0.68, "learning_rate": 4.154254534729391e-05, "loss": 0.685, "step": 1650 }, { "epoch": 0.68, "learning_rate": 4.152779825984368e-05, "loss": 0.8321, "step": 1652 }, { "epoch": 0.68, "learning_rate": 4.151305117239345e-05, "loss": 0.6421, "step": 1654 }, { "epoch": 0.68, "learning_rate": 4.149830408494322e-05, "loss": 0.7624, "step": 1656 }, { "epoch": 0.68, "learning_rate": 4.148355699749299e-05, "loss": 0.7034, "step": 1658 }, { "epoch": 0.68, "learning_rate": 4.146880991004277e-05, "loss": 0.6854, "step": 1660 }, { "epoch": 0.68, "learning_rate": 4.145406282259254e-05, "loss": 0.6872, "step": 1662 }, { "epoch": 0.69, "learning_rate": 4.143931573514231e-05, "loss": 0.6933, "step": 1664 }, { "epoch": 0.69, "learning_rate": 4.142456864769208e-05, "loss": 0.6613, "step": 1666 }, { "epoch": 0.69, "learning_rate": 4.140982156024185e-05, "loss": 0.6774, "step": 1668 }, { "epoch": 0.69, "learning_rate": 4.139507447279162e-05, "loss": 0.6366, "step": 1670 }, { "epoch": 0.69, "learning_rate": 4.13803273853414e-05, "loss": 0.6133, "step": 1672 }, { "epoch": 0.69, "learning_rate": 4.136558029789117e-05, "loss": 0.6792, "step": 1674 }, { "epoch": 0.69, "learning_rate": 4.1350833210440946e-05, "loss": 0.6475, "step": 1676 }, { "epoch": 0.69, "learning_rate": 4.1336086122990716e-05, "loss": 0.6826, "step": 1678 }, { "epoch": 0.69, "learning_rate": 4.1321339035540486e-05, "loss": 0.6444, "step": 1680 }, { "epoch": 0.69, "learning_rate": 4.130659194809026e-05, "loss": 0.671, "step": 1682 }, { "epoch": 0.69, "learning_rate": 4.129184486064003e-05, "loss": 0.7377, "step": 1684 }, { "epoch": 0.69, "learning_rate": 4.12770977731898e-05, "loss": 0.6623, "step": 1686 }, { "epoch": 0.7, "learning_rate": 4.126235068573957e-05, "loss": 0.6232, "step": 1688 }, { "epoch": 0.7, "learning_rate": 4.1247603598289345e-05, "loss": 0.6682, "step": 1690 }, { "epoch": 0.7, "learning_rate": 4.1232856510839115e-05, "loss": 0.6431, "step": 1692 }, { "epoch": 0.7, "learning_rate": 4.1218109423388886e-05, "loss": 0.5756, "step": 1694 }, { "epoch": 0.7, "learning_rate": 4.1203362335938656e-05, "loss": 0.681, "step": 1696 }, { "epoch": 0.7, "learning_rate": 4.1188615248488426e-05, "loss": 0.6087, "step": 1698 }, { "epoch": 0.7, "learning_rate": 4.11738681610382e-05, "loss": 0.6113, "step": 1700 }, { "epoch": 0.7, "learning_rate": 4.115912107358797e-05, "loss": 0.626, "step": 1702 }, { "epoch": 0.7, "learning_rate": 4.114437398613774e-05, "loss": 0.6438, "step": 1704 }, { "epoch": 0.7, "learning_rate": 4.1129626898687515e-05, "loss": 0.7078, "step": 1706 }, { "epoch": 0.7, "learning_rate": 4.1114879811237285e-05, "loss": 0.6205, "step": 1708 }, { "epoch": 0.7, "learning_rate": 4.1100132723787055e-05, "loss": 0.6851, "step": 1710 }, { "epoch": 0.71, "learning_rate": 4.1085385636336826e-05, "loss": 0.6878, "step": 1712 }, { "epoch": 0.71, "learning_rate": 4.1070638548886596e-05, "loss": 0.659, "step": 1714 }, { "epoch": 0.71, "learning_rate": 4.1055891461436366e-05, "loss": 0.6709, "step": 1716 }, { "epoch": 0.71, "learning_rate": 4.104114437398614e-05, "loss": 0.6479, "step": 1718 }, { "epoch": 0.71, "learning_rate": 4.1026397286535914e-05, "loss": 0.6662, "step": 1720 }, { "epoch": 0.71, "learning_rate": 4.1011650199085684e-05, "loss": 0.6182, "step": 1722 }, { "epoch": 0.71, "learning_rate": 4.0996903111635455e-05, "loss": 0.6007, "step": 1724 }, { "epoch": 0.71, "learning_rate": 4.0982156024185225e-05, "loss": 0.6237, "step": 1726 }, { "epoch": 0.71, "learning_rate": 4.0967408936734995e-05, "loss": 0.6529, "step": 1728 }, { "epoch": 0.71, "learning_rate": 4.0952661849284766e-05, "loss": 0.6719, "step": 1730 }, { "epoch": 0.71, "learning_rate": 4.0937914761834536e-05, "loss": 0.6873, "step": 1732 }, { "epoch": 0.71, "learning_rate": 4.092316767438431e-05, "loss": 0.6082, "step": 1734 }, { "epoch": 0.72, "learning_rate": 4.0908420586934084e-05, "loss": 0.6193, "step": 1736 }, { "epoch": 0.72, "learning_rate": 4.0893673499483854e-05, "loss": 0.6521, "step": 1738 }, { "epoch": 0.72, "learning_rate": 4.0878926412033624e-05, "loss": 0.5972, "step": 1740 }, { "epoch": 0.72, "learning_rate": 4.0864179324583395e-05, "loss": 0.6502, "step": 1742 }, { "epoch": 0.72, "learning_rate": 4.0849432237133165e-05, "loss": 0.6213, "step": 1744 }, { "epoch": 0.72, "learning_rate": 4.0834685149682935e-05, "loss": 0.6415, "step": 1746 }, { "epoch": 0.72, "learning_rate": 4.081993806223271e-05, "loss": 0.6002, "step": 1748 }, { "epoch": 0.72, "learning_rate": 4.080519097478248e-05, "loss": 0.6413, "step": 1750 }, { "epoch": 0.72, "learning_rate": 4.079044388733225e-05, "loss": 0.6248, "step": 1752 }, { "epoch": 0.72, "learning_rate": 4.0775696799882024e-05, "loss": 0.6517, "step": 1754 }, { "epoch": 0.72, "learning_rate": 4.0760949712431794e-05, "loss": 0.5927, "step": 1756 }, { "epoch": 0.72, "learning_rate": 4.0746202624981564e-05, "loss": 0.585, "step": 1758 }, { "epoch": 0.73, "learning_rate": 4.0731455537531335e-05, "loss": 0.6427, "step": 1760 }, { "epoch": 0.73, "learning_rate": 4.071670845008111e-05, "loss": 0.6499, "step": 1762 }, { "epoch": 0.73, "learning_rate": 4.070196136263088e-05, "loss": 0.6628, "step": 1764 }, { "epoch": 0.73, "learning_rate": 4.068721427518065e-05, "loss": 0.5609, "step": 1766 }, { "epoch": 0.73, "learning_rate": 4.067246718773042e-05, "loss": 0.5767, "step": 1768 }, { "epoch": 0.73, "learning_rate": 4.0657720100280193e-05, "loss": 0.6732, "step": 1770 }, { "epoch": 0.73, "learning_rate": 4.0642973012829964e-05, "loss": 0.6323, "step": 1772 }, { "epoch": 0.73, "learning_rate": 4.0628225925379734e-05, "loss": 0.612, "step": 1774 }, { "epoch": 0.73, "learning_rate": 4.061347883792951e-05, "loss": 0.5862, "step": 1776 }, { "epoch": 0.73, "learning_rate": 4.059873175047928e-05, "loss": 0.5813, "step": 1778 }, { "epoch": 0.73, "learning_rate": 4.058398466302905e-05, "loss": 0.6015, "step": 1780 }, { "epoch": 0.73, "learning_rate": 4.056923757557883e-05, "loss": 0.5844, "step": 1782 }, { "epoch": 0.74, "learning_rate": 4.05544904881286e-05, "loss": 0.64, "step": 1784 }, { "epoch": 0.74, "learning_rate": 4.053974340067837e-05, "loss": 0.5815, "step": 1786 }, { "epoch": 0.74, "learning_rate": 4.052499631322814e-05, "loss": 0.6386, "step": 1788 }, { "epoch": 0.74, "learning_rate": 4.051024922577791e-05, "loss": 0.5999, "step": 1790 }, { "epoch": 0.74, "learning_rate": 4.049550213832769e-05, "loss": 0.6016, "step": 1792 }, { "epoch": 0.74, "learning_rate": 4.048075505087746e-05, "loss": 0.6203, "step": 1794 }, { "epoch": 0.74, "learning_rate": 4.046600796342723e-05, "loss": 0.6047, "step": 1796 }, { "epoch": 0.74, "learning_rate": 4.0451260875977e-05, "loss": 0.6276, "step": 1798 }, { "epoch": 0.74, "learning_rate": 4.043651378852677e-05, "loss": 0.6086, "step": 1800 }, { "epoch": 0.74, "learning_rate": 4.042176670107654e-05, "loss": 0.6421, "step": 1802 }, { "epoch": 0.74, "learning_rate": 4.040701961362631e-05, "loss": 0.6232, "step": 1804 }, { "epoch": 0.74, "learning_rate": 4.039227252617608e-05, "loss": 0.5741, "step": 1806 }, { "epoch": 0.74, "learning_rate": 4.037752543872586e-05, "loss": 0.6024, "step": 1808 }, { "epoch": 0.75, "learning_rate": 4.036277835127563e-05, "loss": 0.5927, "step": 1810 }, { "epoch": 0.75, "learning_rate": 4.03480312638254e-05, "loss": 0.5929, "step": 1812 }, { "epoch": 0.75, "learning_rate": 4.033328417637517e-05, "loss": 0.6248, "step": 1814 }, { "epoch": 0.75, "learning_rate": 4.031853708892494e-05, "loss": 0.5759, "step": 1816 }, { "epoch": 0.75, "learning_rate": 4.030379000147471e-05, "loss": 0.5706, "step": 1818 }, { "epoch": 0.75, "learning_rate": 4.028904291402448e-05, "loss": 0.6462, "step": 1820 }, { "epoch": 0.75, "learning_rate": 4.027429582657426e-05, "loss": 0.6058, "step": 1822 }, { "epoch": 0.75, "learning_rate": 4.025954873912403e-05, "loss": 0.5881, "step": 1824 }, { "epoch": 0.75, "learning_rate": 4.02448016516738e-05, "loss": 0.6122, "step": 1826 }, { "epoch": 0.75, "learning_rate": 4.023005456422357e-05, "loss": 0.5945, "step": 1828 }, { "epoch": 0.75, "learning_rate": 4.021530747677334e-05, "loss": 0.597, "step": 1830 }, { "epoch": 0.75, "learning_rate": 4.020056038932311e-05, "loss": 0.5553, "step": 1832 }, { "epoch": 0.76, "learning_rate": 4.018581330187288e-05, "loss": 0.6057, "step": 1834 }, { "epoch": 0.76, "learning_rate": 4.0171066214422656e-05, "loss": 0.575, "step": 1836 }, { "epoch": 0.76, "learning_rate": 4.0156319126972427e-05, "loss": 0.6124, "step": 1838 }, { "epoch": 0.76, "learning_rate": 4.01415720395222e-05, "loss": 0.6177, "step": 1840 }, { "epoch": 0.76, "learning_rate": 4.012682495207197e-05, "loss": 0.5781, "step": 1842 }, { "epoch": 0.76, "learning_rate": 4.011207786462174e-05, "loss": 0.6067, "step": 1844 }, { "epoch": 0.76, "learning_rate": 4.009733077717151e-05, "loss": 0.559, "step": 1846 }, { "epoch": 0.76, "learning_rate": 4.008258368972128e-05, "loss": 0.6018, "step": 1848 }, { "epoch": 0.76, "learning_rate": 4.0067836602271056e-05, "loss": 0.6155, "step": 1850 }, { "epoch": 0.76, "learning_rate": 4.0053089514820826e-05, "loss": 0.6113, "step": 1852 }, { "epoch": 0.76, "learning_rate": 4.0038342427370596e-05, "loss": 0.5407, "step": 1854 }, { "epoch": 0.76, "learning_rate": 4.002359533992037e-05, "loss": 0.5407, "step": 1856 }, { "epoch": 0.77, "learning_rate": 4.000884825247014e-05, "loss": 0.5448, "step": 1858 }, { "epoch": 0.77, "learning_rate": 3.999410116501991e-05, "loss": 0.569, "step": 1860 }, { "epoch": 0.77, "learning_rate": 3.997935407756968e-05, "loss": 0.572, "step": 1862 }, { "epoch": 0.77, "learning_rate": 3.9964606990119455e-05, "loss": 0.5465, "step": 1864 }, { "epoch": 0.77, "learning_rate": 3.9949859902669225e-05, "loss": 0.6186, "step": 1866 }, { "epoch": 0.77, "learning_rate": 3.9935112815218996e-05, "loss": 0.5391, "step": 1868 }, { "epoch": 0.77, "learning_rate": 3.9920365727768766e-05, "loss": 0.5822, "step": 1870 }, { "epoch": 0.77, "learning_rate": 3.9905618640318536e-05, "loss": 0.5548, "step": 1872 }, { "epoch": 0.77, "learning_rate": 3.989087155286831e-05, "loss": 0.6418, "step": 1874 }, { "epoch": 0.77, "learning_rate": 3.987612446541808e-05, "loss": 0.5635, "step": 1876 }, { "epoch": 0.77, "learning_rate": 3.9861377377967854e-05, "loss": 0.5891, "step": 1878 }, { "epoch": 0.77, "learning_rate": 3.9846630290517625e-05, "loss": 0.5836, "step": 1880 }, { "epoch": 0.78, "learning_rate": 3.9831883203067395e-05, "loss": 0.5769, "step": 1882 }, { "epoch": 0.78, "learning_rate": 3.9817136115617165e-05, "loss": 0.5997, "step": 1884 }, { "epoch": 0.78, "learning_rate": 3.9802389028166936e-05, "loss": 0.5533, "step": 1886 }, { "epoch": 0.78, "learning_rate": 3.9787641940716706e-05, "loss": 0.5398, "step": 1888 }, { "epoch": 0.78, "learning_rate": 3.9772894853266476e-05, "loss": 0.5413, "step": 1890 }, { "epoch": 0.78, "learning_rate": 3.9758147765816254e-05, "loss": 0.523, "step": 1892 }, { "epoch": 0.78, "learning_rate": 3.974340067836603e-05, "loss": 0.5979, "step": 1894 }, { "epoch": 0.78, "learning_rate": 3.97286535909158e-05, "loss": 0.5861, "step": 1896 }, { "epoch": 0.78, "learning_rate": 3.971390650346557e-05, "loss": 0.5185, "step": 1898 }, { "epoch": 0.78, "learning_rate": 3.969915941601534e-05, "loss": 0.6024, "step": 1900 }, { "epoch": 0.78, "learning_rate": 3.968441232856511e-05, "loss": 0.5562, "step": 1902 }, { "epoch": 0.78, "learning_rate": 3.966966524111488e-05, "loss": 0.5882, "step": 1904 }, { "epoch": 0.79, "learning_rate": 3.965491815366465e-05, "loss": 0.5644, "step": 1906 }, { "epoch": 0.79, "learning_rate": 3.964017106621442e-05, "loss": 0.6252, "step": 1908 }, { "epoch": 0.79, "learning_rate": 3.96254239787642e-05, "loss": 0.5662, "step": 1910 }, { "epoch": 0.79, "learning_rate": 3.961067689131397e-05, "loss": 0.5367, "step": 1912 }, { "epoch": 0.79, "learning_rate": 3.959592980386374e-05, "loss": 0.5991, "step": 1914 }, { "epoch": 0.79, "learning_rate": 3.958118271641351e-05, "loss": 0.5349, "step": 1916 }, { "epoch": 0.79, "learning_rate": 3.956643562896328e-05, "loss": 0.5764, "step": 1918 }, { "epoch": 0.79, "learning_rate": 3.955168854151305e-05, "loss": 0.5159, "step": 1920 }, { "epoch": 0.79, "learning_rate": 3.953694145406282e-05, "loss": 0.5909, "step": 1922 }, { "epoch": 0.79, "learning_rate": 3.95221943666126e-05, "loss": 0.5929, "step": 1924 }, { "epoch": 0.79, "learning_rate": 3.950744727916237e-05, "loss": 0.5818, "step": 1926 }, { "epoch": 0.79, "learning_rate": 3.949270019171214e-05, "loss": 0.5754, "step": 1928 }, { "epoch": 0.8, "learning_rate": 3.947795310426191e-05, "loss": 0.5308, "step": 1930 }, { "epoch": 0.8, "learning_rate": 3.946320601681168e-05, "loss": 0.5257, "step": 1932 }, { "epoch": 0.8, "learning_rate": 3.944845892936145e-05, "loss": 0.5067, "step": 1934 }, { "epoch": 0.8, "learning_rate": 3.943371184191122e-05, "loss": 0.6011, "step": 1936 }, { "epoch": 0.8, "learning_rate": 3.9418964754461e-05, "loss": 0.5923, "step": 1938 }, { "epoch": 0.8, "learning_rate": 3.940421766701077e-05, "loss": 0.5545, "step": 1940 }, { "epoch": 0.8, "learning_rate": 3.938947057956054e-05, "loss": 0.5329, "step": 1942 }, { "epoch": 0.8, "learning_rate": 3.937472349211031e-05, "loss": 0.5169, "step": 1944 }, { "epoch": 0.8, "learning_rate": 3.935997640466008e-05, "loss": 0.5292, "step": 1946 }, { "epoch": 0.8, "learning_rate": 3.934522931720985e-05, "loss": 0.5997, "step": 1948 }, { "epoch": 0.8, "learning_rate": 3.933048222975962e-05, "loss": 0.5849, "step": 1950 }, { "epoch": 0.8, "learning_rate": 3.93157351423094e-05, "loss": 0.5549, "step": 1952 }, { "epoch": 0.81, "learning_rate": 3.930098805485917e-05, "loss": 0.5789, "step": 1954 }, { "epoch": 0.81, "learning_rate": 3.928624096740894e-05, "loss": 0.5595, "step": 1956 }, { "epoch": 0.81, "learning_rate": 3.927149387995871e-05, "loss": 0.5548, "step": 1958 }, { "epoch": 0.81, "learning_rate": 3.925674679250848e-05, "loss": 0.4898, "step": 1960 }, { "epoch": 0.81, "learning_rate": 3.924199970505825e-05, "loss": 0.5542, "step": 1962 }, { "epoch": 0.81, "learning_rate": 3.922725261760802e-05, "loss": 0.562, "step": 1964 }, { "epoch": 0.81, "learning_rate": 3.92125055301578e-05, "loss": 0.5437, "step": 1966 }, { "epoch": 0.81, "learning_rate": 3.919775844270757e-05, "loss": 0.5792, "step": 1968 }, { "epoch": 0.81, "learning_rate": 3.918301135525734e-05, "loss": 0.5387, "step": 1970 }, { "epoch": 0.81, "learning_rate": 3.916826426780711e-05, "loss": 0.5311, "step": 1972 }, { "epoch": 0.81, "learning_rate": 3.915351718035688e-05, "loss": 0.5059, "step": 1974 }, { "epoch": 0.81, "learning_rate": 3.913877009290665e-05, "loss": 0.5655, "step": 1976 }, { "epoch": 0.81, "learning_rate": 3.912402300545642e-05, "loss": 0.5516, "step": 1978 }, { "epoch": 0.82, "learning_rate": 3.91092759180062e-05, "loss": 0.5494, "step": 1980 }, { "epoch": 0.82, "learning_rate": 3.909452883055597e-05, "loss": 0.521, "step": 1982 }, { "epoch": 0.82, "learning_rate": 3.907978174310574e-05, "loss": 0.5534, "step": 1984 }, { "epoch": 0.82, "learning_rate": 3.906503465565551e-05, "loss": 0.575, "step": 1986 }, { "epoch": 0.82, "learning_rate": 3.905028756820528e-05, "loss": 0.5384, "step": 1988 }, { "epoch": 0.82, "learning_rate": 3.903554048075505e-05, "loss": 0.5486, "step": 1990 }, { "epoch": 0.82, "learning_rate": 3.902079339330482e-05, "loss": 0.4873, "step": 1992 }, { "epoch": 0.82, "learning_rate": 3.9006046305854596e-05, "loss": 0.3682, "step": 1994 }, { "epoch": 0.82, "learning_rate": 3.899129921840437e-05, "loss": 0.2667, "step": 1996 }, { "epoch": 0.82, "learning_rate": 3.897655213095414e-05, "loss": 0.1665, "step": 1998 }, { "epoch": 0.82, "learning_rate": 3.896180504350391e-05, "loss": 0.1086, "step": 2000 }, { "epoch": 0.82, "learning_rate": 3.894705795605368e-05, "loss": 0.058, "step": 2002 }, { "epoch": 0.83, "learning_rate": 3.8932310868603455e-05, "loss": 0.0663, "step": 2004 }, { "epoch": 0.83, "learning_rate": 3.8917563781153225e-05, "loss": 0.0442, "step": 2006 }, { "epoch": 0.83, "learning_rate": 3.8902816693702996e-05, "loss": 0.0255, "step": 2008 }, { "epoch": 0.83, "learning_rate": 3.8888069606252766e-05, "loss": 0.0213, "step": 2010 }, { "epoch": 0.83, "learning_rate": 3.887332251880254e-05, "loss": 0.0197, "step": 2012 }, { "epoch": 0.83, "learning_rate": 3.8858575431352314e-05, "loss": 0.0262, "step": 2014 }, { "epoch": 0.83, "learning_rate": 3.8843828343902084e-05, "loss": 0.0388, "step": 2016 }, { "epoch": 0.83, "learning_rate": 3.8829081256451854e-05, "loss": 0.0271, "step": 2018 }, { "epoch": 0.83, "learning_rate": 3.8814334169001625e-05, "loss": 0.0324, "step": 2020 }, { "epoch": 0.83, "learning_rate": 3.8799587081551395e-05, "loss": 0.018, "step": 2022 }, { "epoch": 0.83, "learning_rate": 3.8784839994101165e-05, "loss": 0.0144, "step": 2024 }, { "epoch": 0.83, "learning_rate": 3.877009290665094e-05, "loss": 0.0143, "step": 2026 }, { "epoch": 0.84, "learning_rate": 3.875534581920071e-05, "loss": 0.0095, "step": 2028 }, { "epoch": 0.84, "learning_rate": 3.874059873175048e-05, "loss": 0.0504, "step": 2030 }, { "epoch": 0.84, "learning_rate": 3.8725851644300254e-05, "loss": 0.0278, "step": 2032 }, { "epoch": 0.84, "learning_rate": 3.8711104556850024e-05, "loss": 0.0121, "step": 2034 }, { "epoch": 0.84, "learning_rate": 3.8696357469399794e-05, "loss": 0.0253, "step": 2036 }, { "epoch": 0.84, "learning_rate": 3.8681610381949565e-05, "loss": 0.0143, "step": 2038 }, { "epoch": 0.84, "learning_rate": 3.866686329449934e-05, "loss": 0.0168, "step": 2040 }, { "epoch": 0.84, "learning_rate": 3.865211620704911e-05, "loss": 0.0136, "step": 2042 }, { "epoch": 0.84, "learning_rate": 3.863736911959888e-05, "loss": 0.0125, "step": 2044 }, { "epoch": 0.84, "learning_rate": 3.862262203214865e-05, "loss": 0.0109, "step": 2046 }, { "epoch": 0.84, "learning_rate": 3.8607874944698423e-05, "loss": 0.0422, "step": 2048 }, { "epoch": 0.84, "learning_rate": 3.8593127857248194e-05, "loss": 0.0185, "step": 2050 }, { "epoch": 0.85, "learning_rate": 3.8578380769797964e-05, "loss": 0.0116, "step": 2052 }, { "epoch": 0.85, "learning_rate": 3.856363368234774e-05, "loss": 0.0068, "step": 2054 }, { "epoch": 0.85, "learning_rate": 3.854888659489751e-05, "loss": 0.0077, "step": 2056 }, { "epoch": 0.85, "learning_rate": 3.853413950744728e-05, "loss": 0.0064, "step": 2058 }, { "epoch": 0.85, "learning_rate": 3.851939241999705e-05, "loss": 0.0041, "step": 2060 }, { "epoch": 0.85, "learning_rate": 3.850464533254682e-05, "loss": 0.0048, "step": 2062 }, { "epoch": 0.85, "learning_rate": 3.848989824509659e-05, "loss": 0.0103, "step": 2064 }, { "epoch": 0.85, "learning_rate": 3.8475151157646363e-05, "loss": 0.016, "step": 2066 }, { "epoch": 0.85, "learning_rate": 3.846040407019614e-05, "loss": 0.0041, "step": 2068 }, { "epoch": 0.85, "learning_rate": 3.844565698274591e-05, "loss": 0.0032, "step": 2070 }, { "epoch": 0.85, "learning_rate": 3.843090989529568e-05, "loss": 0.0282, "step": 2072 }, { "epoch": 0.85, "learning_rate": 3.841616280784545e-05, "loss": 0.0019, "step": 2074 }, { "epoch": 0.86, "learning_rate": 3.840141572039522e-05, "loss": 0.0041, "step": 2076 }, { "epoch": 0.86, "learning_rate": 3.838666863294499e-05, "loss": 0.0037, "step": 2078 }, { "epoch": 0.86, "learning_rate": 3.837192154549476e-05, "loss": 0.0057, "step": 2080 }, { "epoch": 0.86, "learning_rate": 3.835717445804454e-05, "loss": 0.0104, "step": 2082 }, { "epoch": 0.86, "learning_rate": 3.834242737059431e-05, "loss": 0.0041, "step": 2084 }, { "epoch": 0.86, "learning_rate": 3.832768028314408e-05, "loss": 0.0053, "step": 2086 }, { "epoch": 0.86, "learning_rate": 3.831293319569385e-05, "loss": 0.0035, "step": 2088 }, { "epoch": 0.86, "learning_rate": 3.829818610824362e-05, "loss": 0.0169, "step": 2090 }, { "epoch": 0.86, "learning_rate": 3.828343902079339e-05, "loss": 0.0127, "step": 2092 }, { "epoch": 0.86, "learning_rate": 3.826869193334316e-05, "loss": 0.0265, "step": 2094 }, { "epoch": 0.86, "learning_rate": 3.825394484589294e-05, "loss": 0.0078, "step": 2096 }, { "epoch": 0.86, "learning_rate": 3.823919775844271e-05, "loss": 0.0057, "step": 2098 }, { "epoch": 0.87, "learning_rate": 3.822445067099248e-05, "loss": 0.0038, "step": 2100 }, { "epoch": 0.87, "learning_rate": 3.820970358354225e-05, "loss": 0.0041, "step": 2102 }, { "epoch": 0.87, "learning_rate": 3.819495649609202e-05, "loss": 0.0023, "step": 2104 }, { "epoch": 0.87, "learning_rate": 3.818020940864179e-05, "loss": 0.0024, "step": 2106 }, { "epoch": 0.87, "learning_rate": 3.816546232119156e-05, "loss": 0.0017, "step": 2108 }, { "epoch": 0.87, "learning_rate": 3.815071523374134e-05, "loss": 0.0026, "step": 2110 }, { "epoch": 0.87, "learning_rate": 3.813596814629111e-05, "loss": 0.003, "step": 2112 }, { "epoch": 0.87, "learning_rate": 3.812122105884088e-05, "loss": 0.0018, "step": 2114 }, { "epoch": 0.87, "learning_rate": 3.8106473971390657e-05, "loss": 0.0026, "step": 2116 }, { "epoch": 0.87, "learning_rate": 3.809172688394043e-05, "loss": 0.0013, "step": 2118 }, { "epoch": 0.87, "learning_rate": 3.80769797964902e-05, "loss": 0.0008, "step": 2120 }, { "epoch": 0.87, "learning_rate": 3.806223270903997e-05, "loss": 0.0019, "step": 2122 }, { "epoch": 0.88, "learning_rate": 3.804748562158974e-05, "loss": 0.0035, "step": 2124 }, { "epoch": 0.88, "learning_rate": 3.803273853413951e-05, "loss": 0.0048, "step": 2126 }, { "epoch": 0.88, "learning_rate": 3.8017991446689285e-05, "loss": 0.0014, "step": 2128 }, { "epoch": 0.88, "learning_rate": 3.8003244359239056e-05, "loss": 0.0016, "step": 2130 }, { "epoch": 0.88, "learning_rate": 3.7988497271788826e-05, "loss": 0.0014, "step": 2132 }, { "epoch": 0.88, "learning_rate": 3.7973750184338597e-05, "loss": 0.0029, "step": 2134 }, { "epoch": 0.88, "learning_rate": 3.795900309688837e-05, "loss": 0.002, "step": 2136 }, { "epoch": 0.88, "learning_rate": 3.794425600943814e-05, "loss": 0.0009, "step": 2138 }, { "epoch": 0.88, "learning_rate": 3.792950892198791e-05, "loss": 0.001, "step": 2140 }, { "epoch": 0.88, "learning_rate": 3.7914761834537685e-05, "loss": 0.0008, "step": 2142 }, { "epoch": 0.88, "learning_rate": 3.7900014747087455e-05, "loss": 0.0008, "step": 2144 }, { "epoch": 0.88, "learning_rate": 3.7885267659637226e-05, "loss": 0.0021, "step": 2146 }, { "epoch": 0.89, "learning_rate": 3.7870520572186996e-05, "loss": 0.001, "step": 2148 }, { "epoch": 0.89, "learning_rate": 3.7855773484736766e-05, "loss": 0.0011, "step": 2150 }, { "epoch": 0.89, "learning_rate": 3.784102639728654e-05, "loss": 0.0014, "step": 2152 }, { "epoch": 0.89, "learning_rate": 3.782627930983631e-05, "loss": 0.0008, "step": 2154 }, { "epoch": 0.89, "learning_rate": 3.7811532222386084e-05, "loss": 0.0005, "step": 2156 }, { "epoch": 0.89, "learning_rate": 3.7796785134935855e-05, "loss": 0.0054, "step": 2158 }, { "epoch": 0.89, "learning_rate": 3.7782038047485625e-05, "loss": 0.0007, "step": 2160 }, { "epoch": 0.89, "learning_rate": 3.7767290960035395e-05, "loss": 0.0065, "step": 2162 }, { "epoch": 0.89, "learning_rate": 3.7752543872585166e-05, "loss": 0.0014, "step": 2164 }, { "epoch": 0.89, "learning_rate": 3.7737796785134936e-05, "loss": 0.0004, "step": 2166 }, { "epoch": 0.89, "learning_rate": 3.7723049697684706e-05, "loss": 0.0009, "step": 2168 }, { "epoch": 0.89, "learning_rate": 3.7708302610234484e-05, "loss": 0.0009, "step": 2170 }, { "epoch": 0.89, "learning_rate": 3.7693555522784254e-05, "loss": 0.001, "step": 2172 }, { "epoch": 0.9, "learning_rate": 3.7678808435334024e-05, "loss": 0.0079, "step": 2174 }, { "epoch": 0.9, "learning_rate": 3.7664061347883795e-05, "loss": 0.0018, "step": 2176 }, { "epoch": 0.9, "learning_rate": 3.7649314260433565e-05, "loss": 0.0017, "step": 2178 }, { "epoch": 0.9, "learning_rate": 3.7634567172983335e-05, "loss": 0.0024, "step": 2180 }, { "epoch": 0.9, "learning_rate": 3.7619820085533106e-05, "loss": 0.0013, "step": 2182 }, { "epoch": 0.9, "learning_rate": 3.760507299808288e-05, "loss": 0.0009, "step": 2184 }, { "epoch": 0.9, "learning_rate": 3.759032591063265e-05, "loss": 0.0012, "step": 2186 }, { "epoch": 0.9, "learning_rate": 3.7575578823182424e-05, "loss": 0.0006, "step": 2188 }, { "epoch": 0.9, "learning_rate": 3.7560831735732194e-05, "loss": 0.0031, "step": 2190 }, { "epoch": 0.9, "learning_rate": 3.7546084648281964e-05, "loss": 0.0007, "step": 2192 }, { "epoch": 0.9, "learning_rate": 3.7531337560831735e-05, "loss": 0.0009, "step": 2194 }, { "epoch": 0.9, "learning_rate": 3.7516590473381505e-05, "loss": 0.0011, "step": 2196 }, { "epoch": 0.91, "learning_rate": 3.750184338593128e-05, "loss": 0.0017, "step": 2198 }, { "epoch": 0.91, "learning_rate": 3.748709629848105e-05, "loss": 0.0038, "step": 2200 }, { "epoch": 0.91, "learning_rate": 3.747234921103082e-05, "loss": 0.0005, "step": 2202 }, { "epoch": 0.91, "learning_rate": 3.745760212358059e-05, "loss": 0.0008, "step": 2204 }, { "epoch": 0.91, "learning_rate": 3.7442855036130364e-05, "loss": 0.0006, "step": 2206 }, { "epoch": 0.91, "learning_rate": 3.7428107948680134e-05, "loss": 0.0011, "step": 2208 }, { "epoch": 0.91, "learning_rate": 3.7413360861229904e-05, "loss": 0.0009, "step": 2210 }, { "epoch": 0.91, "learning_rate": 3.739861377377968e-05, "loss": 0.0005, "step": 2212 }, { "epoch": 0.91, "learning_rate": 3.738386668632945e-05, "loss": 0.0011, "step": 2214 }, { "epoch": 0.91, "learning_rate": 3.736911959887922e-05, "loss": 0.004, "step": 2216 }, { "epoch": 0.91, "learning_rate": 3.735437251142899e-05, "loss": 0.0004, "step": 2218 }, { "epoch": 0.91, "learning_rate": 3.733962542397876e-05, "loss": 0.0015, "step": 2220 }, { "epoch": 0.92, "learning_rate": 3.732487833652853e-05, "loss": 0.0014, "step": 2222 }, { "epoch": 0.92, "learning_rate": 3.7310131249078304e-05, "loss": 0.001, "step": 2224 }, { "epoch": 0.92, "learning_rate": 3.7295384161628074e-05, "loss": 0.001, "step": 2226 }, { "epoch": 0.92, "learning_rate": 3.728063707417785e-05, "loss": 0.0007, "step": 2228 }, { "epoch": 0.92, "learning_rate": 3.726588998672763e-05, "loss": 0.0009, "step": 2230 }, { "epoch": 0.92, "learning_rate": 3.72511428992774e-05, "loss": 0.0006, "step": 2232 }, { "epoch": 0.92, "learning_rate": 3.723639581182717e-05, "loss": 0.0023, "step": 2234 }, { "epoch": 0.92, "learning_rate": 3.722164872437694e-05, "loss": 0.0003, "step": 2236 }, { "epoch": 0.92, "learning_rate": 3.720690163692671e-05, "loss": 0.0007, "step": 2238 }, { "epoch": 0.92, "learning_rate": 3.719215454947648e-05, "loss": 0.0016, "step": 2240 }, { "epoch": 0.92, "learning_rate": 3.717740746202625e-05, "loss": 0.0007, "step": 2242 }, { "epoch": 0.92, "learning_rate": 3.716266037457603e-05, "loss": 0.0004, "step": 2244 }, { "epoch": 0.93, "learning_rate": 3.71479132871258e-05, "loss": 0.0004, "step": 2246 }, { "epoch": 0.93, "learning_rate": 3.713316619967557e-05, "loss": 0.0006, "step": 2248 }, { "epoch": 0.93, "learning_rate": 3.711841911222534e-05, "loss": 0.0015, "step": 2250 }, { "epoch": 0.93, "learning_rate": 3.710367202477511e-05, "loss": 0.0004, "step": 2252 }, { "epoch": 0.93, "learning_rate": 3.708892493732488e-05, "loss": 0.0021, "step": 2254 }, { "epoch": 0.93, "learning_rate": 3.707417784987465e-05, "loss": 0.0006, "step": 2256 }, { "epoch": 0.93, "learning_rate": 3.705943076242443e-05, "loss": 0.0005, "step": 2258 }, { "epoch": 0.93, "learning_rate": 3.70446836749742e-05, "loss": 0.0213, "step": 2260 }, { "epoch": 0.93, "learning_rate": 3.702993658752397e-05, "loss": 0.0002, "step": 2262 }, { "epoch": 0.93, "learning_rate": 3.701518950007374e-05, "loss": 0.0004, "step": 2264 }, { "epoch": 0.93, "learning_rate": 3.700044241262351e-05, "loss": 0.0009, "step": 2266 }, { "epoch": 0.93, "learning_rate": 3.698569532517328e-05, "loss": 0.0063, "step": 2268 }, { "epoch": 0.94, "learning_rate": 3.697094823772305e-05, "loss": 0.0005, "step": 2270 }, { "epoch": 0.94, "learning_rate": 3.6956201150272826e-05, "loss": 0.0007, "step": 2272 }, { "epoch": 0.94, "learning_rate": 3.69414540628226e-05, "loss": 0.0005, "step": 2274 }, { "epoch": 0.94, "learning_rate": 3.692670697537237e-05, "loss": 0.0045, "step": 2276 }, { "epoch": 0.94, "learning_rate": 3.691195988792214e-05, "loss": 0.0004, "step": 2278 }, { "epoch": 0.94, "learning_rate": 3.689721280047191e-05, "loss": 0.0016, "step": 2280 }, { "epoch": 0.94, "learning_rate": 3.688246571302168e-05, "loss": 0.001, "step": 2282 }, { "epoch": 0.94, "learning_rate": 3.686771862557145e-05, "loss": 0.0004, "step": 2284 }, { "epoch": 0.94, "learning_rate": 3.6852971538121226e-05, "loss": 0.001, "step": 2286 }, { "epoch": 0.94, "learning_rate": 3.6838224450670996e-05, "loss": 0.002, "step": 2288 }, { "epoch": 0.94, "learning_rate": 3.6823477363220766e-05, "loss": 0.0006, "step": 2290 }, { "epoch": 0.94, "learning_rate": 3.680873027577054e-05, "loss": 0.0006, "step": 2292 }, { "epoch": 0.95, "learning_rate": 3.679398318832031e-05, "loss": 0.0008, "step": 2294 }, { "epoch": 0.95, "learning_rate": 3.677923610087008e-05, "loss": 0.0004, "step": 2296 }, { "epoch": 0.95, "learning_rate": 3.676448901341985e-05, "loss": 0.0005, "step": 2298 }, { "epoch": 0.95, "learning_rate": 3.6749741925969625e-05, "loss": 0.0005, "step": 2300 }, { "epoch": 0.95, "learning_rate": 3.6734994838519395e-05, "loss": 0.0006, "step": 2302 }, { "epoch": 0.95, "learning_rate": 3.6720247751069166e-05, "loss": 0.0003, "step": 2304 }, { "epoch": 0.95, "learning_rate": 3.6705500663618936e-05, "loss": 0.0004, "step": 2306 }, { "epoch": 0.95, "learning_rate": 3.6690753576168707e-05, "loss": 0.0003, "step": 2308 }, { "epoch": 0.95, "learning_rate": 3.667600648871848e-05, "loss": 0.0011, "step": 2310 }, { "epoch": 0.95, "learning_rate": 3.666125940126825e-05, "loss": 0.0005, "step": 2312 }, { "epoch": 0.95, "learning_rate": 3.6646512313818024e-05, "loss": 0.0003, "step": 2314 }, { "epoch": 0.95, "learning_rate": 3.6631765226367795e-05, "loss": 0.0006, "step": 2316 }, { "epoch": 0.96, "learning_rate": 3.6617018138917565e-05, "loss": 0.0005, "step": 2318 }, { "epoch": 0.96, "learning_rate": 3.6602271051467336e-05, "loss": 0.0004, "step": 2320 }, { "epoch": 0.96, "learning_rate": 3.6587523964017106e-05, "loss": 0.0021, "step": 2322 }, { "epoch": 0.96, "learning_rate": 3.6572776876566876e-05, "loss": 0.0003, "step": 2324 }, { "epoch": 0.96, "learning_rate": 3.655802978911665e-05, "loss": 0.0003, "step": 2326 }, { "epoch": 0.96, "learning_rate": 3.654328270166642e-05, "loss": 0.0005, "step": 2328 }, { "epoch": 0.96, "learning_rate": 3.6528535614216194e-05, "loss": 0.0005, "step": 2330 }, { "epoch": 0.96, "learning_rate": 3.6513788526765964e-05, "loss": 0.0002, "step": 2332 }, { "epoch": 0.96, "learning_rate": 3.6499041439315735e-05, "loss": 0.0042, "step": 2334 }, { "epoch": 0.96, "learning_rate": 3.6484294351865505e-05, "loss": 0.0004, "step": 2336 }, { "epoch": 0.96, "learning_rate": 3.646954726441528e-05, "loss": 0.0003, "step": 2338 }, { "epoch": 0.96, "learning_rate": 3.645480017696505e-05, "loss": 0.0021, "step": 2340 }, { "epoch": 0.96, "learning_rate": 3.644005308951482e-05, "loss": 0.0014, "step": 2342 }, { "epoch": 0.97, "learning_rate": 3.6425306002064593e-05, "loss": 0.0007, "step": 2344 }, { "epoch": 0.97, "learning_rate": 3.641055891461437e-05, "loss": 0.0007, "step": 2346 }, { "epoch": 0.97, "learning_rate": 3.639581182716414e-05, "loss": 0.0074, "step": 2348 }, { "epoch": 0.97, "learning_rate": 3.638106473971391e-05, "loss": 0.0003, "step": 2350 }, { "epoch": 0.97, "learning_rate": 3.636631765226368e-05, "loss": 0.0059, "step": 2352 }, { "epoch": 0.97, "learning_rate": 3.635157056481345e-05, "loss": 0.0009, "step": 2354 }, { "epoch": 0.97, "learning_rate": 3.633682347736322e-05, "loss": 0.0014, "step": 2356 }, { "epoch": 0.97, "learning_rate": 3.632207638991299e-05, "loss": 0.0029, "step": 2358 }, { "epoch": 0.97, "learning_rate": 3.630732930246277e-05, "loss": 0.0005, "step": 2360 }, { "epoch": 0.97, "learning_rate": 3.629258221501254e-05, "loss": 0.0024, "step": 2362 }, { "epoch": 0.97, "learning_rate": 3.627783512756231e-05, "loss": 0.0003, "step": 2364 }, { "epoch": 0.97, "learning_rate": 3.626308804011208e-05, "loss": 0.0005, "step": 2366 }, { "epoch": 0.98, "learning_rate": 3.624834095266185e-05, "loss": 0.0006, "step": 2368 }, { "epoch": 0.98, "learning_rate": 3.623359386521162e-05, "loss": 0.0006, "step": 2370 }, { "epoch": 0.98, "learning_rate": 3.621884677776139e-05, "loss": 0.0003, "step": 2372 }, { "epoch": 0.98, "learning_rate": 3.620409969031117e-05, "loss": 0.0004, "step": 2374 }, { "epoch": 0.98, "learning_rate": 3.618935260286094e-05, "loss": 0.0009, "step": 2376 }, { "epoch": 0.98, "learning_rate": 3.617460551541071e-05, "loss": 0.0006, "step": 2378 }, { "epoch": 0.98, "learning_rate": 3.615985842796048e-05, "loss": 0.0003, "step": 2380 }, { "epoch": 0.98, "learning_rate": 3.614511134051025e-05, "loss": 0.0004, "step": 2382 }, { "epoch": 0.98, "learning_rate": 3.613036425306002e-05, "loss": 0.0004, "step": 2384 }, { "epoch": 0.98, "learning_rate": 3.611561716560979e-05, "loss": 0.0012, "step": 2386 }, { "epoch": 0.98, "learning_rate": 3.610087007815957e-05, "loss": 0.0003, "step": 2388 }, { "epoch": 0.98, "learning_rate": 3.608612299070934e-05, "loss": 0.0002, "step": 2390 }, { "epoch": 0.99, "learning_rate": 3.607137590325911e-05, "loss": 0.0002, "step": 2392 }, { "epoch": 0.99, "learning_rate": 3.605662881580888e-05, "loss": 0.0003, "step": 2394 }, { "epoch": 0.99, "learning_rate": 3.604188172835865e-05, "loss": 0.0005, "step": 2396 }, { "epoch": 0.99, "learning_rate": 3.602713464090842e-05, "loss": 0.0005, "step": 2398 }, { "epoch": 0.99, "learning_rate": 3.601238755345819e-05, "loss": 0.0003, "step": 2400 }, { "epoch": 0.99, "learning_rate": 3.599764046600797e-05, "loss": 0.0004, "step": 2402 }, { "epoch": 0.99, "learning_rate": 3.598289337855774e-05, "loss": 0.0012, "step": 2404 }, { "epoch": 0.99, "learning_rate": 3.596814629110751e-05, "loss": 0.0002, "step": 2406 }, { "epoch": 0.99, "learning_rate": 3.595339920365728e-05, "loss": 0.0003, "step": 2408 }, { "epoch": 0.99, "learning_rate": 3.593865211620705e-05, "loss": 0.0002, "step": 2410 }, { "epoch": 0.99, "learning_rate": 3.592390502875682e-05, "loss": 0.0001, "step": 2412 }, { "epoch": 0.99, "learning_rate": 3.590915794130659e-05, "loss": 0.001, "step": 2414 }, { "epoch": 1.0, "learning_rate": 3.589441085385637e-05, "loss": 0.0002, "step": 2416 }, { "epoch": 1.0, "learning_rate": 3.587966376640614e-05, "loss": 0.0003, "step": 2418 }, { "epoch": 1.0, "learning_rate": 3.586491667895591e-05, "loss": 0.0003, "step": 2420 }, { "epoch": 1.0, "learning_rate": 3.585016959150568e-05, "loss": 0.0004, "step": 2422 }, { "epoch": 1.0, "learning_rate": 3.583542250405545e-05, "loss": 0.0002, "step": 2424 }, { "epoch": 1.0, "learning_rate": 3.582067541660522e-05, "loss": 0.0001, "step": 2426 }, { "epoch": 1.0, "learning_rate": 3.580592832915499e-05, "loss": 0.0002, "step": 2428 }, { "epoch": 1.0, "learning_rate": 3.579118124170476e-05, "loss": 0.0003, "step": 2430 }, { "epoch": 1.0, "learning_rate": 3.577643415425454e-05, "loss": 0.0002, "step": 2432 }, { "epoch": 1.0, "learning_rate": 3.576168706680431e-05, "loss": 0.0003, "step": 2434 }, { "epoch": 1.0, "learning_rate": 3.574693997935408e-05, "loss": 0.0004, "step": 2436 }, { "epoch": 1.0, "learning_rate": 3.573219289190385e-05, "loss": 0.0002, "step": 2438 }, { "epoch": 1.01, "learning_rate": 3.571744580445362e-05, "loss": 0.0002, "step": 2440 }, { "epoch": 1.01, "learning_rate": 3.570269871700339e-05, "loss": 0.0002, "step": 2442 }, { "epoch": 1.01, "learning_rate": 3.568795162955316e-05, "loss": 0.0003, "step": 2444 }, { "epoch": 1.01, "learning_rate": 3.5673204542102936e-05, "loss": 0.0001, "step": 2446 }, { "epoch": 1.01, "learning_rate": 3.565845745465271e-05, "loss": 0.0002, "step": 2448 }, { "epoch": 1.01, "learning_rate": 3.5643710367202484e-05, "loss": 0.0002, "step": 2450 }, { "epoch": 1.01, "learning_rate": 3.5628963279752254e-05, "loss": 0.0003, "step": 2452 }, { "epoch": 1.01, "learning_rate": 3.5614216192302025e-05, "loss": 0.001, "step": 2454 }, { "epoch": 1.01, "learning_rate": 3.5599469104851795e-05, "loss": 0.0014, "step": 2456 }, { "epoch": 1.01, "learning_rate": 3.5584722017401565e-05, "loss": 0.0002, "step": 2458 }, { "epoch": 1.01, "learning_rate": 3.5569974929951336e-05, "loss": 0.0014, "step": 2460 }, { "epoch": 1.01, "learning_rate": 3.555522784250111e-05, "loss": 0.0002, "step": 2462 }, { "epoch": 1.02, "learning_rate": 3.554048075505088e-05, "loss": 0.0001, "step": 2464 }, { "epoch": 1.02, "learning_rate": 3.5525733667600654e-05, "loss": 0.0006, "step": 2466 }, { "epoch": 1.02, "learning_rate": 3.5510986580150424e-05, "loss": 0.0004, "step": 2468 }, { "epoch": 1.02, "learning_rate": 3.5496239492700194e-05, "loss": 0.0006, "step": 2470 }, { "epoch": 1.02, "learning_rate": 3.5481492405249965e-05, "loss": 0.0003, "step": 2472 }, { "epoch": 1.02, "learning_rate": 3.5466745317799735e-05, "loss": 0.0018, "step": 2474 }, { "epoch": 1.02, "learning_rate": 3.545199823034951e-05, "loss": 0.0006, "step": 2476 }, { "epoch": 1.02, "learning_rate": 3.543725114289928e-05, "loss": 0.0031, "step": 2478 }, { "epoch": 1.02, "learning_rate": 3.542250405544905e-05, "loss": 0.0003, "step": 2480 }, { "epoch": 1.02, "learning_rate": 3.540775696799882e-05, "loss": 0.0002, "step": 2482 }, { "epoch": 1.02, "learning_rate": 3.5393009880548594e-05, "loss": 0.0003, "step": 2484 }, { "epoch": 1.02, "learning_rate": 3.5378262793098364e-05, "loss": 0.0004, "step": 2486 }, { "epoch": 1.03, "learning_rate": 3.5363515705648134e-05, "loss": 0.0004, "step": 2488 }, { "epoch": 1.03, "learning_rate": 3.534876861819791e-05, "loss": 0.0006, "step": 2490 }, { "epoch": 1.03, "learning_rate": 3.533402153074768e-05, "loss": 0.0007, "step": 2492 }, { "epoch": 1.03, "learning_rate": 3.531927444329745e-05, "loss": 0.0005, "step": 2494 }, { "epoch": 1.03, "learning_rate": 3.530452735584722e-05, "loss": 0.0007, "step": 2496 }, { "epoch": 1.03, "learning_rate": 3.528978026839699e-05, "loss": 0.001, "step": 2498 }, { "epoch": 1.03, "learning_rate": 3.527503318094676e-05, "loss": 0.0003, "step": 2500 }, { "epoch": 1.03, "learning_rate": 3.5260286093496534e-05, "loss": 0.0002, "step": 2502 }, { "epoch": 1.03, "learning_rate": 3.524553900604631e-05, "loss": 0.0002, "step": 2504 }, { "epoch": 1.03, "learning_rate": 3.523079191859608e-05, "loss": 0.0009, "step": 2506 }, { "epoch": 1.03, "learning_rate": 3.521604483114585e-05, "loss": 0.0002, "step": 2508 }, { "epoch": 1.03, "learning_rate": 3.520129774369562e-05, "loss": 0.0003, "step": 2510 }, { "epoch": 1.04, "learning_rate": 3.518655065624539e-05, "loss": 0.0028, "step": 2512 }, { "epoch": 1.04, "learning_rate": 3.517180356879516e-05, "loss": 0.0004, "step": 2514 }, { "epoch": 1.04, "learning_rate": 3.515705648134493e-05, "loss": 0.0002, "step": 2516 }, { "epoch": 1.04, "learning_rate": 3.5142309393894703e-05, "loss": 0.0006, "step": 2518 }, { "epoch": 1.04, "learning_rate": 3.512756230644448e-05, "loss": 0.0117, "step": 2520 }, { "epoch": 1.04, "learning_rate": 3.511281521899425e-05, "loss": 0.0003, "step": 2522 }, { "epoch": 1.04, "learning_rate": 3.509806813154402e-05, "loss": 0.0012, "step": 2524 }, { "epoch": 1.04, "learning_rate": 3.508332104409379e-05, "loss": 0.0008, "step": 2526 }, { "epoch": 1.04, "learning_rate": 3.506857395664356e-05, "loss": 0.0003, "step": 2528 }, { "epoch": 1.04, "learning_rate": 3.505382686919333e-05, "loss": 0.0002, "step": 2530 }, { "epoch": 1.04, "learning_rate": 3.50390797817431e-05, "loss": 0.0002, "step": 2532 }, { "epoch": 1.04, "learning_rate": 3.502433269429288e-05, "loss": 0.0004, "step": 2534 }, { "epoch": 1.04, "learning_rate": 3.500958560684265e-05, "loss": 0.0002, "step": 2536 }, { "epoch": 1.05, "learning_rate": 3.499483851939242e-05, "loss": 0.0003, "step": 2538 }, { "epoch": 1.05, "learning_rate": 3.498009143194219e-05, "loss": 0.0004, "step": 2540 }, { "epoch": 1.05, "learning_rate": 3.496534434449196e-05, "loss": 0.0006, "step": 2542 }, { "epoch": 1.05, "learning_rate": 3.495059725704173e-05, "loss": 0.0003, "step": 2544 }, { "epoch": 1.05, "learning_rate": 3.49358501695915e-05, "loss": 0.0003, "step": 2546 }, { "epoch": 1.05, "learning_rate": 3.492110308214128e-05, "loss": 0.0002, "step": 2548 }, { "epoch": 1.05, "learning_rate": 3.490635599469105e-05, "loss": 0.0002, "step": 2550 }, { "epoch": 1.05, "learning_rate": 3.489160890724082e-05, "loss": 0.0003, "step": 2552 }, { "epoch": 1.05, "learning_rate": 3.487686181979059e-05, "loss": 0.0003, "step": 2554 }, { "epoch": 1.05, "learning_rate": 3.486211473234036e-05, "loss": 0.0007, "step": 2556 }, { "epoch": 1.05, "learning_rate": 3.484736764489013e-05, "loss": 0.0002, "step": 2558 }, { "epoch": 1.05, "learning_rate": 3.483262055743991e-05, "loss": 0.0003, "step": 2560 }, { "epoch": 1.06, "learning_rate": 3.481787346998968e-05, "loss": 0.0002, "step": 2562 }, { "epoch": 1.06, "learning_rate": 3.4803126382539456e-05, "loss": 0.0004, "step": 2564 }, { "epoch": 1.06, "learning_rate": 3.4788379295089226e-05, "loss": 0.0003, "step": 2566 }, { "epoch": 1.06, "learning_rate": 3.4773632207638996e-05, "loss": 0.0002, "step": 2568 }, { "epoch": 1.06, "learning_rate": 3.475888512018877e-05, "loss": 0.0008, "step": 2570 }, { "epoch": 1.06, "learning_rate": 3.474413803273854e-05, "loss": 0.0002, "step": 2572 }, { "epoch": 1.06, "learning_rate": 3.472939094528831e-05, "loss": 0.0007, "step": 2574 }, { "epoch": 1.06, "learning_rate": 3.471464385783808e-05, "loss": 0.0001, "step": 2576 }, { "epoch": 1.06, "learning_rate": 3.4699896770387855e-05, "loss": 0.0002, "step": 2578 }, { "epoch": 1.06, "learning_rate": 3.4685149682937625e-05, "loss": 0.0005, "step": 2580 }, { "epoch": 1.06, "learning_rate": 3.4670402595487396e-05, "loss": 0.0004, "step": 2582 }, { "epoch": 1.06, "learning_rate": 3.4655655508037166e-05, "loss": 0.0001, "step": 2584 }, { "epoch": 1.07, "learning_rate": 3.4640908420586937e-05, "loss": 0.0001, "step": 2586 }, { "epoch": 1.07, "learning_rate": 3.462616133313671e-05, "loss": 0.0004, "step": 2588 }, { "epoch": 1.07, "learning_rate": 3.461141424568648e-05, "loss": 0.0002, "step": 2590 }, { "epoch": 1.07, "learning_rate": 3.4596667158236254e-05, "loss": 0.0003, "step": 2592 }, { "epoch": 1.07, "learning_rate": 3.4581920070786025e-05, "loss": 0.0016, "step": 2594 }, { "epoch": 1.07, "learning_rate": 3.4567172983335795e-05, "loss": 0.0002, "step": 2596 }, { "epoch": 1.07, "learning_rate": 3.4552425895885565e-05, "loss": 0.0001, "step": 2598 }, { "epoch": 1.07, "learning_rate": 3.4537678808435336e-05, "loss": 0.0003, "step": 2600 }, { "epoch": 1.07, "learning_rate": 3.4522931720985106e-05, "loss": 0.0004, "step": 2602 }, { "epoch": 1.07, "learning_rate": 3.4508184633534877e-05, "loss": 0.0004, "step": 2604 }, { "epoch": 1.07, "learning_rate": 3.4493437546084654e-05, "loss": 0.0009, "step": 2606 }, { "epoch": 1.07, "learning_rate": 3.4478690458634424e-05, "loss": 0.0002, "step": 2608 }, { "epoch": 1.08, "learning_rate": 3.4463943371184194e-05, "loss": 0.0002, "step": 2610 }, { "epoch": 1.08, "learning_rate": 3.4449196283733965e-05, "loss": 0.0004, "step": 2612 }, { "epoch": 1.08, "learning_rate": 3.4434449196283735e-05, "loss": 0.0004, "step": 2614 }, { "epoch": 1.08, "learning_rate": 3.4419702108833506e-05, "loss": 0.0004, "step": 2616 }, { "epoch": 1.08, "learning_rate": 3.4404955021383276e-05, "loss": 0.0002, "step": 2618 }, { "epoch": 1.08, "learning_rate": 3.4390207933933046e-05, "loss": 0.0002, "step": 2620 }, { "epoch": 1.08, "learning_rate": 3.4375460846482823e-05, "loss": 0.0002, "step": 2622 }, { "epoch": 1.08, "learning_rate": 3.4360713759032594e-05, "loss": 0.0004, "step": 2624 }, { "epoch": 1.08, "learning_rate": 3.4345966671582364e-05, "loss": 0.0008, "step": 2626 }, { "epoch": 1.08, "learning_rate": 3.4331219584132135e-05, "loss": 0.0002, "step": 2628 }, { "epoch": 1.08, "learning_rate": 3.4316472496681905e-05, "loss": 0.0002, "step": 2630 }, { "epoch": 1.08, "learning_rate": 3.4301725409231675e-05, "loss": 0.0006, "step": 2632 }, { "epoch": 1.09, "learning_rate": 3.4286978321781446e-05, "loss": 0.0033, "step": 2634 }, { "epoch": 1.09, "learning_rate": 3.427223123433122e-05, "loss": 0.0003, "step": 2636 }, { "epoch": 1.09, "learning_rate": 3.425748414688099e-05, "loss": 0.0003, "step": 2638 }, { "epoch": 1.09, "learning_rate": 3.4242737059430764e-05, "loss": 0.0004, "step": 2640 }, { "epoch": 1.09, "learning_rate": 3.4227989971980534e-05, "loss": 0.001, "step": 2642 }, { "epoch": 1.09, "learning_rate": 3.4213242884530304e-05, "loss": 0.0006, "step": 2644 }, { "epoch": 1.09, "learning_rate": 3.4198495797080075e-05, "loss": 0.0004, "step": 2646 }, { "epoch": 1.09, "learning_rate": 3.4183748709629845e-05, "loss": 0.0002, "step": 2648 }, { "epoch": 1.09, "learning_rate": 3.416900162217962e-05, "loss": 0.0002, "step": 2650 }, { "epoch": 1.09, "learning_rate": 3.415425453472939e-05, "loss": 0.0002, "step": 2652 }, { "epoch": 1.09, "learning_rate": 3.413950744727916e-05, "loss": 0.0001, "step": 2654 }, { "epoch": 1.09, "learning_rate": 3.412476035982893e-05, "loss": 0.0002, "step": 2656 }, { "epoch": 1.1, "learning_rate": 3.4110013272378704e-05, "loss": 0.0002, "step": 2658 }, { "epoch": 1.1, "learning_rate": 3.4095266184928474e-05, "loss": 0.0001, "step": 2660 }, { "epoch": 1.1, "learning_rate": 3.4080519097478244e-05, "loss": 0.0002, "step": 2662 }, { "epoch": 1.1, "learning_rate": 3.406577201002802e-05, "loss": 0.0002, "step": 2664 }, { "epoch": 1.1, "learning_rate": 3.405102492257779e-05, "loss": 0.0001, "step": 2666 }, { "epoch": 1.1, "learning_rate": 3.403627783512756e-05, "loss": 0.0002, "step": 2668 }, { "epoch": 1.1, "learning_rate": 3.402153074767733e-05, "loss": 0.0002, "step": 2670 }, { "epoch": 1.1, "learning_rate": 3.400678366022711e-05, "loss": 0.0002, "step": 2672 }, { "epoch": 1.1, "learning_rate": 3.399203657277688e-05, "loss": 0.0001, "step": 2674 }, { "epoch": 1.1, "learning_rate": 3.397728948532665e-05, "loss": 0.0001, "step": 2676 }, { "epoch": 1.1, "learning_rate": 3.396254239787642e-05, "loss": 0.0003, "step": 2678 }, { "epoch": 1.1, "learning_rate": 3.39477953104262e-05, "loss": 0.0007, "step": 2680 }, { "epoch": 1.11, "learning_rate": 3.393304822297597e-05, "loss": 0.0003, "step": 2682 }, { "epoch": 1.11, "learning_rate": 3.391830113552574e-05, "loss": 0.0001, "step": 2684 }, { "epoch": 1.11, "learning_rate": 3.390355404807551e-05, "loss": 0.0001, "step": 2686 }, { "epoch": 1.11, "learning_rate": 3.388880696062528e-05, "loss": 0.0001, "step": 2688 }, { "epoch": 1.11, "learning_rate": 3.387405987317505e-05, "loss": 0.0002, "step": 2690 }, { "epoch": 1.11, "learning_rate": 3.385931278572482e-05, "loss": 0.0008, "step": 2692 }, { "epoch": 1.11, "learning_rate": 3.38445656982746e-05, "loss": 0.0001, "step": 2694 }, { "epoch": 1.11, "learning_rate": 3.382981861082437e-05, "loss": 0.0002, "step": 2696 }, { "epoch": 1.11, "learning_rate": 3.381507152337414e-05, "loss": 0.0001, "step": 2698 }, { "epoch": 1.11, "learning_rate": 3.380032443592391e-05, "loss": 0.0002, "step": 2700 }, { "epoch": 1.11, "learning_rate": 3.378557734847368e-05, "loss": 0.0001, "step": 2702 }, { "epoch": 1.11, "learning_rate": 3.377083026102345e-05, "loss": 0.0001, "step": 2704 }, { "epoch": 1.11, "learning_rate": 3.375608317357322e-05, "loss": 0.0002, "step": 2706 }, { "epoch": 1.12, "learning_rate": 3.3741336086123e-05, "loss": 0.0001, "step": 2708 }, { "epoch": 1.12, "learning_rate": 3.372658899867277e-05, "loss": 0.0001, "step": 2710 }, { "epoch": 1.12, "learning_rate": 3.371184191122254e-05, "loss": 0.0001, "step": 2712 }, { "epoch": 1.12, "learning_rate": 3.369709482377231e-05, "loss": 0.0002, "step": 2714 }, { "epoch": 1.12, "learning_rate": 3.368234773632208e-05, "loss": 0.0001, "step": 2716 }, { "epoch": 1.12, "learning_rate": 3.366760064887185e-05, "loss": 0.0001, "step": 2718 }, { "epoch": 1.12, "learning_rate": 3.365285356142162e-05, "loss": 0.0001, "step": 2720 }, { "epoch": 1.12, "learning_rate": 3.363810647397139e-05, "loss": 0.0001, "step": 2722 }, { "epoch": 1.12, "learning_rate": 3.3623359386521166e-05, "loss": 0.0001, "step": 2724 }, { "epoch": 1.12, "learning_rate": 3.360861229907094e-05, "loss": 0.0002, "step": 2726 }, { "epoch": 1.12, "learning_rate": 3.359386521162071e-05, "loss": 0.0002, "step": 2728 }, { "epoch": 1.12, "learning_rate": 3.357911812417048e-05, "loss": 0.0008, "step": 2730 }, { "epoch": 1.13, "learning_rate": 3.356437103672025e-05, "loss": 0.0001, "step": 2732 }, { "epoch": 1.13, "learning_rate": 3.354962394927002e-05, "loss": 0.0001, "step": 2734 }, { "epoch": 1.13, "learning_rate": 3.353487686181979e-05, "loss": 0.0002, "step": 2736 }, { "epoch": 1.13, "learning_rate": 3.3520129774369566e-05, "loss": 0.0002, "step": 2738 }, { "epoch": 1.13, "learning_rate": 3.3505382686919336e-05, "loss": 0.0001, "step": 2740 }, { "epoch": 1.13, "learning_rate": 3.3490635599469106e-05, "loss": 0.0001, "step": 2742 }, { "epoch": 1.13, "learning_rate": 3.347588851201888e-05, "loss": 0.0001, "step": 2744 }, { "epoch": 1.13, "learning_rate": 3.346114142456865e-05, "loss": 0.0001, "step": 2746 }, { "epoch": 1.13, "learning_rate": 3.344639433711842e-05, "loss": 0.0001, "step": 2748 }, { "epoch": 1.13, "learning_rate": 3.343164724966819e-05, "loss": 0.0001, "step": 2750 }, { "epoch": 1.13, "learning_rate": 3.3416900162217965e-05, "loss": 0.0002, "step": 2752 }, { "epoch": 1.13, "learning_rate": 3.3402153074767735e-05, "loss": 0.0001, "step": 2754 }, { "epoch": 1.14, "learning_rate": 3.3387405987317506e-05, "loss": 0.0001, "step": 2756 }, { "epoch": 1.14, "learning_rate": 3.3372658899867276e-05, "loss": 0.0001, "step": 2758 }, { "epoch": 1.14, "learning_rate": 3.3357911812417046e-05, "loss": 0.0001, "step": 2760 }, { "epoch": 1.14, "learning_rate": 3.334316472496682e-05, "loss": 0.0001, "step": 2762 }, { "epoch": 1.14, "learning_rate": 3.332841763751659e-05, "loss": 0.0001, "step": 2764 }, { "epoch": 1.14, "learning_rate": 3.3313670550066364e-05, "loss": 0.0004, "step": 2766 }, { "epoch": 1.14, "learning_rate": 3.3298923462616135e-05, "loss": 0.0001, "step": 2768 }, { "epoch": 1.14, "learning_rate": 3.3284176375165905e-05, "loss": 0.0001, "step": 2770 }, { "epoch": 1.14, "learning_rate": 3.3269429287715675e-05, "loss": 0.0001, "step": 2772 }, { "epoch": 1.14, "learning_rate": 3.3254682200265446e-05, "loss": 0.0001, "step": 2774 }, { "epoch": 1.14, "learning_rate": 3.3239935112815216e-05, "loss": 0.0001, "step": 2776 }, { "epoch": 1.14, "learning_rate": 3.3225188025364987e-05, "loss": 0.0001, "step": 2778 }, { "epoch": 1.15, "learning_rate": 3.3210440937914764e-05, "loss": 0.0001, "step": 2780 }, { "epoch": 1.15, "learning_rate": 3.319569385046454e-05, "loss": 0.0001, "step": 2782 }, { "epoch": 1.15, "learning_rate": 3.318094676301431e-05, "loss": 0.0001, "step": 2784 }, { "epoch": 1.15, "learning_rate": 3.316619967556408e-05, "loss": 0.0001, "step": 2786 }, { "epoch": 1.15, "learning_rate": 3.315145258811385e-05, "loss": 0.0001, "step": 2788 }, { "epoch": 1.15, "learning_rate": 3.313670550066362e-05, "loss": 0.0001, "step": 2790 }, { "epoch": 1.15, "learning_rate": 3.312195841321339e-05, "loss": 0.0001, "step": 2792 }, { "epoch": 1.15, "learning_rate": 3.310721132576316e-05, "loss": 0.0001, "step": 2794 }, { "epoch": 1.15, "learning_rate": 3.309246423831294e-05, "loss": 0.0001, "step": 2796 }, { "epoch": 1.15, "learning_rate": 3.307771715086271e-05, "loss": 0.0001, "step": 2798 }, { "epoch": 1.15, "learning_rate": 3.306297006341248e-05, "loss": 0.0001, "step": 2800 }, { "epoch": 1.15, "learning_rate": 3.304822297596225e-05, "loss": 0.0001, "step": 2802 }, { "epoch": 1.16, "learning_rate": 3.303347588851202e-05, "loss": 0.0011, "step": 2804 }, { "epoch": 1.16, "learning_rate": 3.301872880106179e-05, "loss": 0.0001, "step": 2806 }, { "epoch": 1.16, "learning_rate": 3.300398171361156e-05, "loss": 0.0001, "step": 2808 }, { "epoch": 1.16, "learning_rate": 3.298923462616133e-05, "loss": 0.0001, "step": 2810 }, { "epoch": 1.16, "learning_rate": 3.297448753871111e-05, "loss": 0.0002, "step": 2812 }, { "epoch": 1.16, "learning_rate": 3.295974045126088e-05, "loss": 0.0001, "step": 2814 }, { "epoch": 1.16, "learning_rate": 3.294499336381065e-05, "loss": 0.0004, "step": 2816 }, { "epoch": 1.16, "learning_rate": 3.293024627636042e-05, "loss": 0.0001, "step": 2818 }, { "epoch": 1.16, "learning_rate": 3.291549918891019e-05, "loss": 0.0001, "step": 2820 }, { "epoch": 1.16, "learning_rate": 3.290075210145996e-05, "loss": 0.0001, "step": 2822 }, { "epoch": 1.16, "learning_rate": 3.288600501400973e-05, "loss": 0.0001, "step": 2824 }, { "epoch": 1.16, "learning_rate": 3.287125792655951e-05, "loss": 0.0001, "step": 2826 }, { "epoch": 1.17, "learning_rate": 3.285651083910928e-05, "loss": 0.0001, "step": 2828 }, { "epoch": 1.17, "learning_rate": 3.284176375165905e-05, "loss": 0.0001, "step": 2830 }, { "epoch": 1.17, "learning_rate": 3.282701666420882e-05, "loss": 0.0001, "step": 2832 }, { "epoch": 1.17, "learning_rate": 3.281226957675859e-05, "loss": 0.0002, "step": 2834 }, { "epoch": 1.17, "learning_rate": 3.279752248930836e-05, "loss": 0.0001, "step": 2836 }, { "epoch": 1.17, "learning_rate": 3.278277540185813e-05, "loss": 0.0001, "step": 2838 }, { "epoch": 1.17, "learning_rate": 3.276802831440791e-05, "loss": 0.0001, "step": 2840 }, { "epoch": 1.17, "learning_rate": 3.275328122695768e-05, "loss": 0.0004, "step": 2842 }, { "epoch": 1.17, "learning_rate": 3.273853413950745e-05, "loss": 0.0002, "step": 2844 }, { "epoch": 1.17, "learning_rate": 3.272378705205722e-05, "loss": 0.0001, "step": 2846 }, { "epoch": 1.17, "learning_rate": 3.270903996460699e-05, "loss": 0.0002, "step": 2848 }, { "epoch": 1.17, "learning_rate": 3.269429287715676e-05, "loss": 0.0001, "step": 2850 }, { "epoch": 1.18, "learning_rate": 3.267954578970653e-05, "loss": 0.0001, "step": 2852 }, { "epoch": 1.18, "learning_rate": 3.266479870225631e-05, "loss": 0.0001, "step": 2854 }, { "epoch": 1.18, "learning_rate": 3.265005161480608e-05, "loss": 0.0006, "step": 2856 }, { "epoch": 1.18, "learning_rate": 3.263530452735585e-05, "loss": 0.0004, "step": 2858 }, { "epoch": 1.18, "learning_rate": 3.262055743990562e-05, "loss": 0.0006, "step": 2860 }, { "epoch": 1.18, "learning_rate": 3.260581035245539e-05, "loss": 0.0001, "step": 2862 }, { "epoch": 1.18, "learning_rate": 3.259106326500516e-05, "loss": 0.0002, "step": 2864 }, { "epoch": 1.18, "learning_rate": 3.257631617755493e-05, "loss": 0.0001, "step": 2866 }, { "epoch": 1.18, "learning_rate": 3.256156909010471e-05, "loss": 0.0001, "step": 2868 }, { "epoch": 1.18, "learning_rate": 3.254682200265448e-05, "loss": 0.0004, "step": 2870 }, { "epoch": 1.18, "learning_rate": 3.253207491520425e-05, "loss": 0.0001, "step": 2872 }, { "epoch": 1.18, "learning_rate": 3.251732782775402e-05, "loss": 0.0001, "step": 2874 }, { "epoch": 1.19, "learning_rate": 3.250258074030379e-05, "loss": 0.0007, "step": 2876 }, { "epoch": 1.19, "learning_rate": 3.248783365285356e-05, "loss": 0.0002, "step": 2878 }, { "epoch": 1.19, "learning_rate": 3.247308656540333e-05, "loss": 0.0002, "step": 2880 }, { "epoch": 1.19, "learning_rate": 3.2458339477953107e-05, "loss": 0.0002, "step": 2882 }, { "epoch": 1.19, "learning_rate": 3.244359239050288e-05, "loss": 0.0004, "step": 2884 }, { "epoch": 1.19, "learning_rate": 3.242884530305265e-05, "loss": 0.0002, "step": 2886 }, { "epoch": 1.19, "learning_rate": 3.241409821560242e-05, "loss": 0.0003, "step": 2888 }, { "epoch": 1.19, "learning_rate": 3.239935112815219e-05, "loss": 0.0002, "step": 2890 }, { "epoch": 1.19, "learning_rate": 3.238460404070196e-05, "loss": 0.0003, "step": 2892 }, { "epoch": 1.19, "learning_rate": 3.2369856953251736e-05, "loss": 0.0002, "step": 2894 }, { "epoch": 1.19, "learning_rate": 3.2355109865801506e-05, "loss": 0.0002, "step": 2896 }, { "epoch": 1.19, "learning_rate": 3.234036277835128e-05, "loss": 0.0002, "step": 2898 }, { "epoch": 1.19, "learning_rate": 3.2325615690901053e-05, "loss": 0.0002, "step": 2900 }, { "epoch": 1.2, "learning_rate": 3.2310868603450824e-05, "loss": 0.0001, "step": 2902 }, { "epoch": 1.2, "learning_rate": 3.2296121516000594e-05, "loss": 0.0002, "step": 2904 }, { "epoch": 1.2, "learning_rate": 3.2281374428550365e-05, "loss": 0.0001, "step": 2906 }, { "epoch": 1.2, "learning_rate": 3.2266627341100135e-05, "loss": 0.0002, "step": 2908 }, { "epoch": 1.2, "learning_rate": 3.2251880253649905e-05, "loss": 0.0002, "step": 2910 }, { "epoch": 1.2, "learning_rate": 3.2237133166199676e-05, "loss": 0.0001, "step": 2912 }, { "epoch": 1.2, "learning_rate": 3.222238607874945e-05, "loss": 0.0001, "step": 2914 }, { "epoch": 1.2, "learning_rate": 3.220763899129922e-05, "loss": 0.0001, "step": 2916 }, { "epoch": 1.2, "learning_rate": 3.2192891903848993e-05, "loss": 0.0001, "step": 2918 }, { "epoch": 1.2, "learning_rate": 3.2178144816398764e-05, "loss": 0.0002, "step": 2920 }, { "epoch": 1.2, "learning_rate": 3.2163397728948534e-05, "loss": 0.0001, "step": 2922 }, { "epoch": 1.2, "learning_rate": 3.2148650641498305e-05, "loss": 0.0001, "step": 2924 }, { "epoch": 1.21, "learning_rate": 3.2133903554048075e-05, "loss": 0.0001, "step": 2926 }, { "epoch": 1.21, "learning_rate": 3.211915646659785e-05, "loss": 0.0002, "step": 2928 }, { "epoch": 1.21, "learning_rate": 3.210440937914762e-05, "loss": 0.0001, "step": 2930 }, { "epoch": 1.21, "learning_rate": 3.208966229169739e-05, "loss": 0.0001, "step": 2932 }, { "epoch": 1.21, "learning_rate": 3.207491520424716e-05, "loss": 0.0001, "step": 2934 }, { "epoch": 1.21, "learning_rate": 3.2060168116796934e-05, "loss": 0.0001, "step": 2936 }, { "epoch": 1.21, "learning_rate": 3.2045421029346704e-05, "loss": 0.0002, "step": 2938 }, { "epoch": 1.21, "learning_rate": 3.2030673941896474e-05, "loss": 0.0001, "step": 2940 }, { "epoch": 1.21, "learning_rate": 3.201592685444625e-05, "loss": 0.0001, "step": 2942 }, { "epoch": 1.21, "learning_rate": 3.200117976699602e-05, "loss": 0.0001, "step": 2944 }, { "epoch": 1.21, "learning_rate": 3.198643267954579e-05, "loss": 0.0003, "step": 2946 }, { "epoch": 1.21, "learning_rate": 3.197168559209556e-05, "loss": 0.0001, "step": 2948 }, { "epoch": 1.22, "learning_rate": 3.195693850464533e-05, "loss": 0.0003, "step": 2950 }, { "epoch": 1.22, "learning_rate": 3.19421914171951e-05, "loss": 0.0008, "step": 2952 }, { "epoch": 1.22, "learning_rate": 3.1927444329744874e-05, "loss": 0.0001, "step": 2954 }, { "epoch": 1.22, "learning_rate": 3.191269724229465e-05, "loss": 0.0001, "step": 2956 }, { "epoch": 1.22, "learning_rate": 3.189795015484442e-05, "loss": 0.0001, "step": 2958 }, { "epoch": 1.22, "learning_rate": 3.188320306739419e-05, "loss": 0.0001, "step": 2960 }, { "epoch": 1.22, "learning_rate": 3.186845597994396e-05, "loss": 0.0002, "step": 2962 }, { "epoch": 1.22, "learning_rate": 3.185370889249373e-05, "loss": 0.0001, "step": 2964 }, { "epoch": 1.22, "learning_rate": 3.18389618050435e-05, "loss": 0.0001, "step": 2966 }, { "epoch": 1.22, "learning_rate": 3.182421471759327e-05, "loss": 0.0001, "step": 2968 }, { "epoch": 1.22, "learning_rate": 3.180946763014305e-05, "loss": 0.0001, "step": 2970 }, { "epoch": 1.22, "learning_rate": 3.179472054269282e-05, "loss": 0.0001, "step": 2972 }, { "epoch": 1.23, "learning_rate": 3.177997345524259e-05, "loss": 0.0001, "step": 2974 }, { "epoch": 1.23, "learning_rate": 3.176522636779236e-05, "loss": 0.0001, "step": 2976 }, { "epoch": 1.23, "learning_rate": 3.175047928034213e-05, "loss": 0.0001, "step": 2978 }, { "epoch": 1.23, "learning_rate": 3.17357321928919e-05, "loss": 0.0002, "step": 2980 }, { "epoch": 1.23, "learning_rate": 3.172098510544167e-05, "loss": 0.0001, "step": 2982 }, { "epoch": 1.23, "learning_rate": 3.170623801799145e-05, "loss": 0.0001, "step": 2984 }, { "epoch": 1.23, "learning_rate": 3.169149093054122e-05, "loss": 0.0001, "step": 2986 }, { "epoch": 1.23, "learning_rate": 3.167674384309099e-05, "loss": 0.0001, "step": 2988 }, { "epoch": 1.23, "learning_rate": 3.166199675564076e-05, "loss": 0.0001, "step": 2990 }, { "epoch": 1.23, "learning_rate": 3.164724966819053e-05, "loss": 0.0001, "step": 2992 }, { "epoch": 1.23, "learning_rate": 3.16325025807403e-05, "loss": 0.0001, "step": 2994 }, { "epoch": 1.23, "learning_rate": 3.161775549329007e-05, "loss": 0.0001, "step": 2996 }, { "epoch": 1.24, "learning_rate": 3.160300840583985e-05, "loss": 0.0001, "step": 2998 }, { "epoch": 1.24, "learning_rate": 3.158826131838962e-05, "loss": 0.0001, "step": 3000 }, { "epoch": 1.24, "learning_rate": 3.157351423093939e-05, "loss": 0.0001, "step": 3002 }, { "epoch": 1.24, "learning_rate": 3.155876714348916e-05, "loss": 0.0001, "step": 3004 }, { "epoch": 1.24, "learning_rate": 3.154402005603894e-05, "loss": 0.0001, "step": 3006 }, { "epoch": 1.24, "learning_rate": 3.152927296858871e-05, "loss": 0.0001, "step": 3008 }, { "epoch": 1.24, "learning_rate": 3.151452588113848e-05, "loss": 0.0002, "step": 3010 }, { "epoch": 1.24, "learning_rate": 3.149977879368825e-05, "loss": 0.0001, "step": 3012 }, { "epoch": 1.24, "learning_rate": 3.148503170623802e-05, "loss": 0.0001, "step": 3014 }, { "epoch": 1.24, "learning_rate": 3.1470284618787796e-05, "loss": 0.0001, "step": 3016 }, { "epoch": 1.24, "learning_rate": 3.1455537531337566e-05, "loss": 0.0001, "step": 3018 }, { "epoch": 1.24, "learning_rate": 3.1440790443887336e-05, "loss": 0.0001, "step": 3020 }, { "epoch": 1.25, "learning_rate": 3.142604335643711e-05, "loss": 0.0001, "step": 3022 }, { "epoch": 1.25, "learning_rate": 3.141129626898688e-05, "loss": 0.0001, "step": 3024 }, { "epoch": 1.25, "learning_rate": 3.139654918153665e-05, "loss": 0.0001, "step": 3026 }, { "epoch": 1.25, "learning_rate": 3.138180209408642e-05, "loss": 0.0001, "step": 3028 }, { "epoch": 1.25, "learning_rate": 3.1367055006636195e-05, "loss": 0.0001, "step": 3030 }, { "epoch": 1.25, "learning_rate": 3.1352307919185965e-05, "loss": 0.0001, "step": 3032 }, { "epoch": 1.25, "learning_rate": 3.1337560831735736e-05, "loss": 0.002, "step": 3034 }, { "epoch": 1.25, "learning_rate": 3.1322813744285506e-05, "loss": 0.0001, "step": 3036 }, { "epoch": 1.25, "learning_rate": 3.1308066656835276e-05, "loss": 0.0001, "step": 3038 }, { "epoch": 1.25, "learning_rate": 3.129331956938505e-05, "loss": 0.0001, "step": 3040 }, { "epoch": 1.25, "learning_rate": 3.127857248193482e-05, "loss": 0.0001, "step": 3042 }, { "epoch": 1.25, "learning_rate": 3.1263825394484594e-05, "loss": 0.0001, "step": 3044 }, { "epoch": 1.26, "learning_rate": 3.1249078307034365e-05, "loss": 0.0001, "step": 3046 }, { "epoch": 1.26, "learning_rate": 3.1234331219584135e-05, "loss": 0.0001, "step": 3048 }, { "epoch": 1.26, "learning_rate": 3.1219584132133905e-05, "loss": 0.0001, "step": 3050 }, { "epoch": 1.26, "learning_rate": 3.1204837044683676e-05, "loss": 0.0001, "step": 3052 }, { "epoch": 1.26, "learning_rate": 3.1190089957233446e-05, "loss": 0.0001, "step": 3054 }, { "epoch": 1.26, "learning_rate": 3.1175342869783217e-05, "loss": 0.0001, "step": 3056 }, { "epoch": 1.26, "learning_rate": 3.1160595782332994e-05, "loss": 0.0001, "step": 3058 }, { "epoch": 1.26, "learning_rate": 3.1145848694882764e-05, "loss": 0.0001, "step": 3060 }, { "epoch": 1.26, "learning_rate": 3.1131101607432534e-05, "loss": 0.0001, "step": 3062 }, { "epoch": 1.26, "learning_rate": 3.1116354519982305e-05, "loss": 0.0003, "step": 3064 }, { "epoch": 1.26, "learning_rate": 3.1101607432532075e-05, "loss": 0.0004, "step": 3066 }, { "epoch": 1.26, "learning_rate": 3.1086860345081845e-05, "loss": 0.0001, "step": 3068 }, { "epoch": 1.26, "learning_rate": 3.1072113257631616e-05, "loss": 0.0, "step": 3070 }, { "epoch": 1.27, "learning_rate": 3.105736617018139e-05, "loss": 0.0001, "step": 3072 }, { "epoch": 1.27, "learning_rate": 3.104261908273116e-05, "loss": 0.0001, "step": 3074 }, { "epoch": 1.27, "learning_rate": 3.1027871995280934e-05, "loss": 0.0001, "step": 3076 }, { "epoch": 1.27, "learning_rate": 3.1013124907830704e-05, "loss": 0.0001, "step": 3078 }, { "epoch": 1.27, "learning_rate": 3.0998377820380474e-05, "loss": 0.0001, "step": 3080 }, { "epoch": 1.27, "learning_rate": 3.0983630732930245e-05, "loss": 0.0005, "step": 3082 }, { "epoch": 1.27, "learning_rate": 3.0968883645480015e-05, "loss": 0.0001, "step": 3084 }, { "epoch": 1.27, "learning_rate": 3.095413655802979e-05, "loss": 0.0001, "step": 3086 }, { "epoch": 1.27, "learning_rate": 3.093938947057956e-05, "loss": 0.0003, "step": 3088 }, { "epoch": 1.27, "learning_rate": 3.092464238312933e-05, "loss": 0.0001, "step": 3090 }, { "epoch": 1.27, "learning_rate": 3.0909895295679103e-05, "loss": 0.0008, "step": 3092 }, { "epoch": 1.27, "learning_rate": 3.0895148208228874e-05, "loss": 0.0001, "step": 3094 }, { "epoch": 1.28, "learning_rate": 3.0880401120778644e-05, "loss": 0.0001, "step": 3096 }, { "epoch": 1.28, "learning_rate": 3.0865654033328415e-05, "loss": 0.0001, "step": 3098 }, { "epoch": 1.28, "learning_rate": 3.085090694587819e-05, "loss": 0.0001, "step": 3100 }, { "epoch": 1.28, "learning_rate": 3.083615985842796e-05, "loss": 0.0001, "step": 3102 }, { "epoch": 1.28, "learning_rate": 3.082141277097773e-05, "loss": 0.0001, "step": 3104 }, { "epoch": 1.28, "learning_rate": 3.08066656835275e-05, "loss": 0.0001, "step": 3106 }, { "epoch": 1.28, "learning_rate": 3.079191859607727e-05, "loss": 0.0002, "step": 3108 }, { "epoch": 1.28, "learning_rate": 3.0777171508627044e-05, "loss": 0.0005, "step": 3110 }, { "epoch": 1.28, "learning_rate": 3.0762424421176814e-05, "loss": 0.0001, "step": 3112 }, { "epoch": 1.28, "learning_rate": 3.074767733372659e-05, "loss": 0.0001, "step": 3114 }, { "epoch": 1.28, "learning_rate": 3.073293024627636e-05, "loss": 0.0001, "step": 3116 }, { "epoch": 1.28, "learning_rate": 3.071818315882614e-05, "loss": 0.0, "step": 3118 }, { "epoch": 1.29, "learning_rate": 3.070343607137591e-05, "loss": 0.0002, "step": 3120 }, { "epoch": 1.29, "learning_rate": 3.068868898392568e-05, "loss": 0.0001, "step": 3122 }, { "epoch": 1.29, "learning_rate": 3.067394189647545e-05, "loss": 0.0001, "step": 3124 }, { "epoch": 1.29, "learning_rate": 3.065919480902522e-05, "loss": 0.0001, "step": 3126 }, { "epoch": 1.29, "learning_rate": 3.064444772157499e-05, "loss": 0.0001, "step": 3128 }, { "epoch": 1.29, "learning_rate": 3.062970063412476e-05, "loss": 0.0001, "step": 3130 }, { "epoch": 1.29, "learning_rate": 3.061495354667454e-05, "loss": 0.0001, "step": 3132 }, { "epoch": 1.29, "learning_rate": 3.060020645922431e-05, "loss": 0.0001, "step": 3134 }, { "epoch": 1.29, "learning_rate": 3.058545937177408e-05, "loss": 0.0001, "step": 3136 }, { "epoch": 1.29, "learning_rate": 3.057071228432385e-05, "loss": 0.0001, "step": 3138 }, { "epoch": 1.29, "learning_rate": 3.055596519687362e-05, "loss": 0.0001, "step": 3140 }, { "epoch": 1.29, "learning_rate": 3.054121810942339e-05, "loss": 0.0001, "step": 3142 }, { "epoch": 1.3, "learning_rate": 3.052647102197316e-05, "loss": 0.0012, "step": 3144 }, { "epoch": 1.3, "learning_rate": 3.0511723934522934e-05, "loss": 0.0002, "step": 3146 }, { "epoch": 1.3, "learning_rate": 3.0496976847072704e-05, "loss": 0.0001, "step": 3148 }, { "epoch": 1.3, "learning_rate": 3.0482229759622478e-05, "loss": 0.0001, "step": 3150 }, { "epoch": 1.3, "learning_rate": 3.0467482672172248e-05, "loss": 0.0001, "step": 3152 }, { "epoch": 1.3, "learning_rate": 3.045273558472202e-05, "loss": 0.0, "step": 3154 }, { "epoch": 1.3, "learning_rate": 3.0437988497271792e-05, "loss": 0.0001, "step": 3156 }, { "epoch": 1.3, "learning_rate": 3.0423241409821563e-05, "loss": 0.0001, "step": 3158 }, { "epoch": 1.3, "learning_rate": 3.0408494322371333e-05, "loss": 0.0001, "step": 3160 }, { "epoch": 1.3, "learning_rate": 3.0393747234921104e-05, "loss": 0.0002, "step": 3162 }, { "epoch": 1.3, "learning_rate": 3.0379000147470877e-05, "loss": 0.0001, "step": 3164 }, { "epoch": 1.3, "learning_rate": 3.0364253060020648e-05, "loss": 0.0002, "step": 3166 }, { "epoch": 1.31, "learning_rate": 3.0349505972570418e-05, "loss": 0.0001, "step": 3168 }, { "epoch": 1.31, "learning_rate": 3.0334758885120192e-05, "loss": 0.0001, "step": 3170 }, { "epoch": 1.31, "learning_rate": 3.0320011797669962e-05, "loss": 0.0001, "step": 3172 }, { "epoch": 1.31, "learning_rate": 3.0305264710219732e-05, "loss": 0.0001, "step": 3174 }, { "epoch": 1.31, "learning_rate": 3.0290517622769503e-05, "loss": 0.0001, "step": 3176 }, { "epoch": 1.31, "learning_rate": 3.0275770535319277e-05, "loss": 0.0001, "step": 3178 }, { "epoch": 1.31, "learning_rate": 3.0261023447869047e-05, "loss": 0.0001, "step": 3180 }, { "epoch": 1.31, "learning_rate": 3.0246276360418817e-05, "loss": 0.0001, "step": 3182 }, { "epoch": 1.31, "learning_rate": 3.023152927296859e-05, "loss": 0.0003, "step": 3184 }, { "epoch": 1.31, "learning_rate": 3.021678218551836e-05, "loss": 0.0017, "step": 3186 }, { "epoch": 1.31, "learning_rate": 3.0202035098068132e-05, "loss": 0.0001, "step": 3188 }, { "epoch": 1.31, "learning_rate": 3.0187288010617902e-05, "loss": 0.0001, "step": 3190 }, { "epoch": 1.32, "learning_rate": 3.0172540923167676e-05, "loss": 0.001, "step": 3192 }, { "epoch": 1.32, "learning_rate": 3.0157793835717446e-05, "loss": 0.0001, "step": 3194 }, { "epoch": 1.32, "learning_rate": 3.0143046748267217e-05, "loss": 0.0001, "step": 3196 }, { "epoch": 1.32, "learning_rate": 3.0128299660816987e-05, "loss": 0.0001, "step": 3198 }, { "epoch": 1.32, "learning_rate": 3.011355257336676e-05, "loss": 0.0001, "step": 3200 }, { "epoch": 1.32, "learning_rate": 3.009880548591653e-05, "loss": 0.0004, "step": 3202 }, { "epoch": 1.32, "learning_rate": 3.00840583984663e-05, "loss": 0.0002, "step": 3204 }, { "epoch": 1.32, "learning_rate": 3.0069311311016075e-05, "loss": 0.0001, "step": 3206 }, { "epoch": 1.32, "learning_rate": 3.0054564223565846e-05, "loss": 0.0001, "step": 3208 }, { "epoch": 1.32, "learning_rate": 3.0039817136115616e-05, "loss": 0.0023, "step": 3210 }, { "epoch": 1.32, "learning_rate": 3.0025070048665386e-05, "loss": 0.0001, "step": 3212 }, { "epoch": 1.32, "learning_rate": 3.001032296121516e-05, "loss": 0.0015, "step": 3214 }, { "epoch": 1.33, "learning_rate": 2.999557587376493e-05, "loss": 0.0003, "step": 3216 }, { "epoch": 1.33, "learning_rate": 2.99808287863147e-05, "loss": 0.0002, "step": 3218 }, { "epoch": 1.33, "learning_rate": 2.9966081698864475e-05, "loss": 0.0001, "step": 3220 }, { "epoch": 1.33, "learning_rate": 2.9951334611414245e-05, "loss": 0.0001, "step": 3222 }, { "epoch": 1.33, "learning_rate": 2.9936587523964015e-05, "loss": 0.0002, "step": 3224 }, { "epoch": 1.33, "learning_rate": 2.9921840436513786e-05, "loss": 0.0001, "step": 3226 }, { "epoch": 1.33, "learning_rate": 2.9907093349063563e-05, "loss": 0.0002, "step": 3228 }, { "epoch": 1.33, "learning_rate": 2.9892346261613337e-05, "loss": 0.0001, "step": 3230 }, { "epoch": 1.33, "learning_rate": 2.9877599174163107e-05, "loss": 0.0002, "step": 3232 }, { "epoch": 1.33, "learning_rate": 2.9862852086712877e-05, "loss": 0.0001, "step": 3234 }, { "epoch": 1.33, "learning_rate": 2.9848104999262648e-05, "loss": 0.0001, "step": 3236 }, { "epoch": 1.33, "learning_rate": 2.983335791181242e-05, "loss": 0.0001, "step": 3238 }, { "epoch": 1.33, "learning_rate": 2.9818610824362192e-05, "loss": 0.0001, "step": 3240 }, { "epoch": 1.34, "learning_rate": 2.9803863736911962e-05, "loss": 0.0001, "step": 3242 }, { "epoch": 1.34, "learning_rate": 2.9789116649461736e-05, "loss": 0.0001, "step": 3244 }, { "epoch": 1.34, "learning_rate": 2.9774369562011506e-05, "loss": 0.0001, "step": 3246 }, { "epoch": 1.34, "learning_rate": 2.9759622474561277e-05, "loss": 0.0001, "step": 3248 }, { "epoch": 1.34, "learning_rate": 2.9744875387111047e-05, "loss": 0.0001, "step": 3250 }, { "epoch": 1.34, "learning_rate": 2.973012829966082e-05, "loss": 0.0002, "step": 3252 }, { "epoch": 1.34, "learning_rate": 2.971538121221059e-05, "loss": 0.0001, "step": 3254 }, { "epoch": 1.34, "learning_rate": 2.970063412476036e-05, "loss": 0.0002, "step": 3256 }, { "epoch": 1.34, "learning_rate": 2.9685887037310135e-05, "loss": 0.0001, "step": 3258 }, { "epoch": 1.34, "learning_rate": 2.9671139949859906e-05, "loss": 0.0001, "step": 3260 }, { "epoch": 1.34, "learning_rate": 2.9656392862409676e-05, "loss": 0.0001, "step": 3262 }, { "epoch": 1.34, "learning_rate": 2.9641645774959446e-05, "loss": 0.0003, "step": 3264 }, { "epoch": 1.35, "learning_rate": 2.962689868750922e-05, "loss": 0.0001, "step": 3266 }, { "epoch": 1.35, "learning_rate": 2.961215160005899e-05, "loss": 0.0001, "step": 3268 }, { "epoch": 1.35, "learning_rate": 2.959740451260876e-05, "loss": 0.0001, "step": 3270 }, { "epoch": 1.35, "learning_rate": 2.9582657425158535e-05, "loss": 0.0016, "step": 3272 }, { "epoch": 1.35, "learning_rate": 2.9567910337708305e-05, "loss": 0.0001, "step": 3274 }, { "epoch": 1.35, "learning_rate": 2.9553163250258075e-05, "loss": 0.0001, "step": 3276 }, { "epoch": 1.35, "learning_rate": 2.9538416162807846e-05, "loss": 0.0001, "step": 3278 }, { "epoch": 1.35, "learning_rate": 2.952366907535762e-05, "loss": 0.0001, "step": 3280 }, { "epoch": 1.35, "learning_rate": 2.950892198790739e-05, "loss": 0.0001, "step": 3282 }, { "epoch": 1.35, "learning_rate": 2.949417490045716e-05, "loss": 0.0001, "step": 3284 }, { "epoch": 1.35, "learning_rate": 2.9479427813006934e-05, "loss": 0.0001, "step": 3286 }, { "epoch": 1.35, "learning_rate": 2.9464680725556704e-05, "loss": 0.0001, "step": 3288 }, { "epoch": 1.36, "learning_rate": 2.9449933638106475e-05, "loss": 0.0001, "step": 3290 }, { "epoch": 1.36, "learning_rate": 2.9435186550656245e-05, "loss": 0.0001, "step": 3292 }, { "epoch": 1.36, "learning_rate": 2.942043946320602e-05, "loss": 0.0001, "step": 3294 }, { "epoch": 1.36, "learning_rate": 2.940569237575579e-05, "loss": 0.0001, "step": 3296 }, { "epoch": 1.36, "learning_rate": 2.939094528830556e-05, "loss": 0.0001, "step": 3298 }, { "epoch": 1.36, "learning_rate": 2.937619820085533e-05, "loss": 0.0001, "step": 3300 }, { "epoch": 1.36, "learning_rate": 2.9361451113405104e-05, "loss": 0.0001, "step": 3302 }, { "epoch": 1.36, "learning_rate": 2.9346704025954874e-05, "loss": 0.0001, "step": 3304 }, { "epoch": 1.36, "learning_rate": 2.9331956938504644e-05, "loss": 0.0001, "step": 3306 }, { "epoch": 1.36, "learning_rate": 2.9317209851054418e-05, "loss": 0.0001, "step": 3308 }, { "epoch": 1.36, "learning_rate": 2.930246276360419e-05, "loss": 0.0002, "step": 3310 }, { "epoch": 1.36, "learning_rate": 2.928771567615396e-05, "loss": 0.0001, "step": 3312 }, { "epoch": 1.37, "learning_rate": 2.927296858870373e-05, "loss": 0.0001, "step": 3314 }, { "epoch": 1.37, "learning_rate": 2.9258221501253503e-05, "loss": 0.0001, "step": 3316 }, { "epoch": 1.37, "learning_rate": 2.9243474413803273e-05, "loss": 0.0001, "step": 3318 }, { "epoch": 1.37, "learning_rate": 2.9228727326353044e-05, "loss": 0.0001, "step": 3320 }, { "epoch": 1.37, "learning_rate": 2.9213980238902818e-05, "loss": 0.0001, "step": 3322 }, { "epoch": 1.37, "learning_rate": 2.9199233151452588e-05, "loss": 0.0001, "step": 3324 }, { "epoch": 1.37, "learning_rate": 2.9184486064002358e-05, "loss": 0.0001, "step": 3326 }, { "epoch": 1.37, "learning_rate": 2.916973897655213e-05, "loss": 0.0001, "step": 3328 }, { "epoch": 1.37, "learning_rate": 2.9154991889101902e-05, "loss": 0.0001, "step": 3330 }, { "epoch": 1.37, "learning_rate": 2.9140244801651673e-05, "loss": 0.0001, "step": 3332 }, { "epoch": 1.37, "learning_rate": 2.9125497714201443e-05, "loss": 0.0001, "step": 3334 }, { "epoch": 1.37, "learning_rate": 2.9110750626751217e-05, "loss": 0.0001, "step": 3336 }, { "epoch": 1.38, "learning_rate": 2.909600353930099e-05, "loss": 0.0001, "step": 3338 }, { "epoch": 1.38, "learning_rate": 2.9081256451850764e-05, "loss": 0.0001, "step": 3340 }, { "epoch": 1.38, "learning_rate": 2.9066509364400535e-05, "loss": 0.0001, "step": 3342 }, { "epoch": 1.38, "learning_rate": 2.9051762276950305e-05, "loss": 0.0001, "step": 3344 }, { "epoch": 1.38, "learning_rate": 2.903701518950008e-05, "loss": 0.0001, "step": 3346 }, { "epoch": 1.38, "learning_rate": 2.902226810204985e-05, "loss": 0.0001, "step": 3348 }, { "epoch": 1.38, "learning_rate": 2.900752101459962e-05, "loss": 0.0001, "step": 3350 }, { "epoch": 1.38, "learning_rate": 2.899277392714939e-05, "loss": 0.0002, "step": 3352 }, { "epoch": 1.38, "learning_rate": 2.8978026839699164e-05, "loss": 0.0001, "step": 3354 }, { "epoch": 1.38, "learning_rate": 2.8963279752248934e-05, "loss": 0.0001, "step": 3356 }, { "epoch": 1.38, "learning_rate": 2.8948532664798704e-05, "loss": 0.0001, "step": 3358 }, { "epoch": 1.38, "learning_rate": 2.8933785577348478e-05, "loss": 0.0001, "step": 3360 }, { "epoch": 1.39, "learning_rate": 2.891903848989825e-05, "loss": 0.0001, "step": 3362 }, { "epoch": 1.39, "learning_rate": 2.890429140244802e-05, "loss": 0.0001, "step": 3364 }, { "epoch": 1.39, "learning_rate": 2.888954431499779e-05, "loss": 0.0001, "step": 3366 }, { "epoch": 1.39, "learning_rate": 2.8874797227547563e-05, "loss": 0.0001, "step": 3368 }, { "epoch": 1.39, "learning_rate": 2.8860050140097333e-05, "loss": 0.0, "step": 3370 }, { "epoch": 1.39, "learning_rate": 2.8845303052647104e-05, "loss": 0.0001, "step": 3372 }, { "epoch": 1.39, "learning_rate": 2.8830555965196878e-05, "loss": 0.0001, "step": 3374 }, { "epoch": 1.39, "learning_rate": 2.8815808877746648e-05, "loss": 0.0, "step": 3376 }, { "epoch": 1.39, "learning_rate": 2.8801061790296418e-05, "loss": 0.0001, "step": 3378 }, { "epoch": 1.39, "learning_rate": 2.878631470284619e-05, "loss": 0.0001, "step": 3380 }, { "epoch": 1.39, "learning_rate": 2.8771567615395962e-05, "loss": 0.0001, "step": 3382 }, { "epoch": 1.39, "learning_rate": 2.8756820527945733e-05, "loss": 0.0, "step": 3384 }, { "epoch": 1.4, "learning_rate": 2.8742073440495503e-05, "loss": 0.0001, "step": 3386 }, { "epoch": 1.4, "learning_rate": 2.8727326353045277e-05, "loss": 0.0, "step": 3388 }, { "epoch": 1.4, "learning_rate": 2.8712579265595047e-05, "loss": 0.0, "step": 3390 }, { "epoch": 1.4, "learning_rate": 2.8697832178144818e-05, "loss": 0.0001, "step": 3392 }, { "epoch": 1.4, "learning_rate": 2.8683085090694588e-05, "loss": 0.0001, "step": 3394 }, { "epoch": 1.4, "learning_rate": 2.8668338003244362e-05, "loss": 0.0001, "step": 3396 }, { "epoch": 1.4, "learning_rate": 2.8653590915794132e-05, "loss": 0.0001, "step": 3398 }, { "epoch": 1.4, "learning_rate": 2.8638843828343902e-05, "loss": 0.0002, "step": 3400 }, { "epoch": 1.4, "learning_rate": 2.8624096740893673e-05, "loss": 0.0, "step": 3402 }, { "epoch": 1.4, "learning_rate": 2.8609349653443447e-05, "loss": 0.0001, "step": 3404 }, { "epoch": 1.4, "learning_rate": 2.8594602565993217e-05, "loss": 0.0, "step": 3406 }, { "epoch": 1.4, "learning_rate": 2.8579855478542987e-05, "loss": 0.0001, "step": 3408 }, { "epoch": 1.41, "learning_rate": 2.856510839109276e-05, "loss": 0.0001, "step": 3410 }, { "epoch": 1.41, "learning_rate": 2.855036130364253e-05, "loss": 0.0001, "step": 3412 }, { "epoch": 1.41, "learning_rate": 2.8535614216192302e-05, "loss": 0.0, "step": 3414 }, { "epoch": 1.41, "learning_rate": 2.8520867128742072e-05, "loss": 0.0001, "step": 3416 }, { "epoch": 1.41, "learning_rate": 2.8506120041291846e-05, "loss": 0.0001, "step": 3418 }, { "epoch": 1.41, "learning_rate": 2.8491372953841616e-05, "loss": 0.0001, "step": 3420 }, { "epoch": 1.41, "learning_rate": 2.8476625866391387e-05, "loss": 0.0, "step": 3422 }, { "epoch": 1.41, "learning_rate": 2.846187877894116e-05, "loss": 0.0, "step": 3424 }, { "epoch": 1.41, "learning_rate": 2.844713169149093e-05, "loss": 0.0001, "step": 3426 }, { "epoch": 1.41, "learning_rate": 2.84323846040407e-05, "loss": 0.0001, "step": 3428 }, { "epoch": 1.41, "learning_rate": 2.841763751659047e-05, "loss": 0.0001, "step": 3430 }, { "epoch": 1.41, "learning_rate": 2.8402890429140245e-05, "loss": 0.0001, "step": 3432 }, { "epoch": 1.41, "learning_rate": 2.8388143341690016e-05, "loss": 0.0001, "step": 3434 }, { "epoch": 1.42, "learning_rate": 2.8373396254239786e-05, "loss": 0.0, "step": 3436 }, { "epoch": 1.42, "learning_rate": 2.835864916678956e-05, "loss": 0.0001, "step": 3438 }, { "epoch": 1.42, "learning_rate": 2.834390207933933e-05, "loss": 0.0001, "step": 3440 }, { "epoch": 1.42, "learning_rate": 2.83291549918891e-05, "loss": 0.0, "step": 3442 }, { "epoch": 1.42, "learning_rate": 2.831440790443887e-05, "loss": 0.0001, "step": 3444 }, { "epoch": 1.42, "learning_rate": 2.8299660816988645e-05, "loss": 0.0, "step": 3446 }, { "epoch": 1.42, "learning_rate": 2.8284913729538415e-05, "loss": 0.0, "step": 3448 }, { "epoch": 1.42, "learning_rate": 2.8270166642088192e-05, "loss": 0.0001, "step": 3450 }, { "epoch": 1.42, "learning_rate": 2.8255419554637962e-05, "loss": 0.0, "step": 3452 }, { "epoch": 1.42, "learning_rate": 2.8240672467187733e-05, "loss": 0.0001, "step": 3454 }, { "epoch": 1.42, "learning_rate": 2.8225925379737507e-05, "loss": 0.0001, "step": 3456 }, { "epoch": 1.42, "learning_rate": 2.8211178292287277e-05, "loss": 0.0001, "step": 3458 }, { "epoch": 1.43, "learning_rate": 2.8196431204837047e-05, "loss": 0.0001, "step": 3460 }, { "epoch": 1.43, "learning_rate": 2.818168411738682e-05, "loss": 0.0, "step": 3462 }, { "epoch": 1.43, "learning_rate": 2.816693702993659e-05, "loss": 0.0, "step": 3464 }, { "epoch": 1.43, "learning_rate": 2.8152189942486362e-05, "loss": 0.0, "step": 3466 }, { "epoch": 1.43, "learning_rate": 2.8137442855036132e-05, "loss": 0.0001, "step": 3468 }, { "epoch": 1.43, "learning_rate": 2.8122695767585906e-05, "loss": 0.0, "step": 3470 }, { "epoch": 1.43, "learning_rate": 2.8107948680135676e-05, "loss": 0.0, "step": 3472 }, { "epoch": 1.43, "learning_rate": 2.8093201592685447e-05, "loss": 0.0001, "step": 3474 }, { "epoch": 1.43, "learning_rate": 2.807845450523522e-05, "loss": 0.0, "step": 3476 }, { "epoch": 1.43, "learning_rate": 2.806370741778499e-05, "loss": 0.0, "step": 3478 }, { "epoch": 1.43, "learning_rate": 2.804896033033476e-05, "loss": 0.0001, "step": 3480 }, { "epoch": 1.43, "learning_rate": 2.803421324288453e-05, "loss": 0.0, "step": 3482 }, { "epoch": 1.44, "learning_rate": 2.8019466155434305e-05, "loss": 0.0001, "step": 3484 }, { "epoch": 1.44, "learning_rate": 2.8004719067984076e-05, "loss": 0.0001, "step": 3486 }, { "epoch": 1.44, "learning_rate": 2.7989971980533846e-05, "loss": 0.0, "step": 3488 }, { "epoch": 1.44, "learning_rate": 2.7975224893083616e-05, "loss": 0.0, "step": 3490 }, { "epoch": 1.44, "learning_rate": 2.796047780563339e-05, "loss": 0.0001, "step": 3492 }, { "epoch": 1.44, "learning_rate": 2.794573071818316e-05, "loss": 0.0, "step": 3494 }, { "epoch": 1.44, "learning_rate": 2.793098363073293e-05, "loss": 0.0001, "step": 3496 }, { "epoch": 1.44, "learning_rate": 2.7916236543282705e-05, "loss": 0.0, "step": 3498 }, { "epoch": 1.44, "learning_rate": 2.7901489455832475e-05, "loss": 0.0001, "step": 3500 }, { "epoch": 1.44, "learning_rate": 2.7886742368382245e-05, "loss": 0.0001, "step": 3502 }, { "epoch": 1.44, "learning_rate": 2.7871995280932016e-05, "loss": 0.0001, "step": 3504 }, { "epoch": 1.44, "learning_rate": 2.785724819348179e-05, "loss": 0.0, "step": 3506 }, { "epoch": 1.45, "learning_rate": 2.784250110603156e-05, "loss": 0.0001, "step": 3508 }, { "epoch": 1.45, "learning_rate": 2.782775401858133e-05, "loss": 0.0, "step": 3510 }, { "epoch": 1.45, "learning_rate": 2.7813006931131104e-05, "loss": 0.0, "step": 3512 }, { "epoch": 1.45, "learning_rate": 2.7798259843680874e-05, "loss": 0.0001, "step": 3514 }, { "epoch": 1.45, "learning_rate": 2.7783512756230645e-05, "loss": 0.0002, "step": 3516 }, { "epoch": 1.45, "learning_rate": 2.7768765668780415e-05, "loss": 0.0, "step": 3518 }, { "epoch": 1.45, "learning_rate": 2.775401858133019e-05, "loss": 0.0003, "step": 3520 }, { "epoch": 1.45, "learning_rate": 2.773927149387996e-05, "loss": 0.0, "step": 3522 }, { "epoch": 1.45, "learning_rate": 2.772452440642973e-05, "loss": 0.0, "step": 3524 }, { "epoch": 1.45, "learning_rate": 2.7709777318979503e-05, "loss": 0.0, "step": 3526 }, { "epoch": 1.45, "learning_rate": 2.7695030231529274e-05, "loss": 0.0, "step": 3528 }, { "epoch": 1.45, "learning_rate": 2.7680283144079044e-05, "loss": 0.0, "step": 3530 }, { "epoch": 1.46, "learning_rate": 2.7665536056628814e-05, "loss": 0.0, "step": 3532 }, { "epoch": 1.46, "learning_rate": 2.7650788969178588e-05, "loss": 0.0, "step": 3534 }, { "epoch": 1.46, "learning_rate": 2.763604188172836e-05, "loss": 0.0001, "step": 3536 }, { "epoch": 1.46, "learning_rate": 2.762129479427813e-05, "loss": 0.0, "step": 3538 }, { "epoch": 1.46, "learning_rate": 2.7606547706827903e-05, "loss": 0.0001, "step": 3540 }, { "epoch": 1.46, "learning_rate": 2.7591800619377673e-05, "loss": 0.0001, "step": 3542 }, { "epoch": 1.46, "learning_rate": 2.7577053531927443e-05, "loss": 0.0001, "step": 3544 }, { "epoch": 1.46, "learning_rate": 2.7562306444477214e-05, "loss": 0.0, "step": 3546 }, { "epoch": 1.46, "learning_rate": 2.7547559357026987e-05, "loss": 0.0001, "step": 3548 }, { "epoch": 1.46, "learning_rate": 2.7532812269576758e-05, "loss": 0.0, "step": 3550 }, { "epoch": 1.46, "learning_rate": 2.7518065182126528e-05, "loss": 0.0001, "step": 3552 }, { "epoch": 1.46, "learning_rate": 2.75033180946763e-05, "loss": 0.0, "step": 3554 }, { "epoch": 1.47, "learning_rate": 2.7488571007226072e-05, "loss": 0.0001, "step": 3556 }, { "epoch": 1.47, "learning_rate": 2.7473823919775843e-05, "loss": 0.0001, "step": 3558 }, { "epoch": 1.47, "learning_rate": 2.7459076832325613e-05, "loss": 0.0001, "step": 3560 }, { "epoch": 1.47, "learning_rate": 2.744432974487539e-05, "loss": 0.0, "step": 3562 }, { "epoch": 1.47, "learning_rate": 2.7429582657425164e-05, "loss": 0.0, "step": 3564 }, { "epoch": 1.47, "learning_rate": 2.7414835569974934e-05, "loss": 0.0001, "step": 3566 }, { "epoch": 1.47, "learning_rate": 2.7400088482524705e-05, "loss": 0.0, "step": 3568 }, { "epoch": 1.47, "learning_rate": 2.7385341395074475e-05, "loss": 0.0, "step": 3570 }, { "epoch": 1.47, "learning_rate": 2.737059430762425e-05, "loss": 0.0, "step": 3572 }, { "epoch": 1.47, "learning_rate": 2.735584722017402e-05, "loss": 0.0, "step": 3574 }, { "epoch": 1.47, "learning_rate": 2.734110013272379e-05, "loss": 0.0001, "step": 3576 }, { "epoch": 1.47, "learning_rate": 2.7326353045273563e-05, "loss": 0.0007, "step": 3578 }, { "epoch": 1.48, "learning_rate": 2.7311605957823334e-05, "loss": 0.0, "step": 3580 }, { "epoch": 1.48, "learning_rate": 2.7296858870373104e-05, "loss": 0.0, "step": 3582 }, { "epoch": 1.48, "learning_rate": 2.7282111782922874e-05, "loss": 0.0001, "step": 3584 }, { "epoch": 1.48, "learning_rate": 2.7267364695472648e-05, "loss": 0.0, "step": 3586 }, { "epoch": 1.48, "learning_rate": 2.725261760802242e-05, "loss": 0.0001, "step": 3588 }, { "epoch": 1.48, "learning_rate": 2.723787052057219e-05, "loss": 0.0001, "step": 3590 }, { "epoch": 1.48, "learning_rate": 2.722312343312196e-05, "loss": 0.0001, "step": 3592 }, { "epoch": 1.48, "learning_rate": 2.7208376345671733e-05, "loss": 0.0001, "step": 3594 }, { "epoch": 1.48, "learning_rate": 2.7193629258221503e-05, "loss": 0.0003, "step": 3596 }, { "epoch": 1.48, "learning_rate": 2.7178882170771274e-05, "loss": 0.0001, "step": 3598 }, { "epoch": 1.48, "learning_rate": 2.7164135083321047e-05, "loss": 0.0001, "step": 3600 }, { "epoch": 1.48, "learning_rate": 2.7149387995870818e-05, "loss": 0.0, "step": 3602 }, { "epoch": 1.48, "learning_rate": 2.7134640908420588e-05, "loss": 0.0, "step": 3604 }, { "epoch": 1.49, "learning_rate": 2.711989382097036e-05, "loss": 0.0001, "step": 3606 }, { "epoch": 1.49, "learning_rate": 2.7105146733520132e-05, "loss": 0.0, "step": 3608 }, { "epoch": 1.49, "learning_rate": 2.7090399646069903e-05, "loss": 0.0, "step": 3610 }, { "epoch": 1.49, "learning_rate": 2.7075652558619673e-05, "loss": 0.0, "step": 3612 }, { "epoch": 1.49, "learning_rate": 2.7060905471169447e-05, "loss": 0.0, "step": 3614 }, { "epoch": 1.49, "learning_rate": 2.7046158383719217e-05, "loss": 0.0, "step": 3616 }, { "epoch": 1.49, "learning_rate": 2.7031411296268988e-05, "loss": 0.0, "step": 3618 }, { "epoch": 1.49, "learning_rate": 2.7016664208818758e-05, "loss": 0.0001, "step": 3620 }, { "epoch": 1.49, "learning_rate": 2.700191712136853e-05, "loss": 0.0001, "step": 3622 }, { "epoch": 1.49, "learning_rate": 2.6987170033918302e-05, "loss": 0.0001, "step": 3624 }, { "epoch": 1.49, "learning_rate": 2.6972422946468072e-05, "loss": 0.0, "step": 3626 }, { "epoch": 1.49, "learning_rate": 2.6957675859017846e-05, "loss": 0.0001, "step": 3628 }, { "epoch": 1.5, "learning_rate": 2.6942928771567617e-05, "loss": 0.0001, "step": 3630 }, { "epoch": 1.5, "learning_rate": 2.6928181684117387e-05, "loss": 0.0001, "step": 3632 }, { "epoch": 1.5, "learning_rate": 2.6913434596667157e-05, "loss": 0.0001, "step": 3634 }, { "epoch": 1.5, "learning_rate": 2.689868750921693e-05, "loss": 0.0, "step": 3636 }, { "epoch": 1.5, "learning_rate": 2.68839404217667e-05, "loss": 0.0001, "step": 3638 }, { "epoch": 1.5, "learning_rate": 2.6869193334316472e-05, "loss": 0.0, "step": 3640 }, { "epoch": 1.5, "learning_rate": 2.6854446246866246e-05, "loss": 0.0001, "step": 3642 }, { "epoch": 1.5, "learning_rate": 2.6839699159416016e-05, "loss": 0.0, "step": 3644 }, { "epoch": 1.5, "learning_rate": 2.6824952071965786e-05, "loss": 0.0001, "step": 3646 }, { "epoch": 1.5, "learning_rate": 2.6810204984515557e-05, "loss": 0.0, "step": 3648 }, { "epoch": 1.5, "learning_rate": 2.679545789706533e-05, "loss": 0.0001, "step": 3650 }, { "epoch": 1.5, "learning_rate": 2.67807108096151e-05, "loss": 0.0001, "step": 3652 }, { "epoch": 1.51, "learning_rate": 2.676596372216487e-05, "loss": 0.0001, "step": 3654 }, { "epoch": 1.51, "learning_rate": 2.675121663471464e-05, "loss": 0.0, "step": 3656 }, { "epoch": 1.51, "learning_rate": 2.6736469547264415e-05, "loss": 0.0001, "step": 3658 }, { "epoch": 1.51, "learning_rate": 2.6721722459814186e-05, "loss": 0.0001, "step": 3660 }, { "epoch": 1.51, "learning_rate": 2.6706975372363956e-05, "loss": 0.0001, "step": 3662 }, { "epoch": 1.51, "learning_rate": 2.669222828491373e-05, "loss": 0.0004, "step": 3664 }, { "epoch": 1.51, "learning_rate": 2.66774811974635e-05, "loss": 0.0, "step": 3666 }, { "epoch": 1.51, "learning_rate": 2.666273411001327e-05, "loss": 0.0003, "step": 3668 }, { "epoch": 1.51, "learning_rate": 2.664798702256304e-05, "loss": 0.0001, "step": 3670 }, { "epoch": 1.51, "learning_rate": 2.6633239935112818e-05, "loss": 0.0, "step": 3672 }, { "epoch": 1.51, "learning_rate": 2.6618492847662592e-05, "loss": 0.0, "step": 3674 }, { "epoch": 1.51, "learning_rate": 2.6603745760212362e-05, "loss": 0.0, "step": 3676 }, { "epoch": 1.52, "learning_rate": 2.6588998672762132e-05, "loss": 0.0001, "step": 3678 }, { "epoch": 1.52, "learning_rate": 2.6574251585311906e-05, "loss": 0.0001, "step": 3680 }, { "epoch": 1.52, "learning_rate": 2.6559504497861677e-05, "loss": 0.0, "step": 3682 }, { "epoch": 1.52, "learning_rate": 2.6544757410411447e-05, "loss": 0.0001, "step": 3684 }, { "epoch": 1.52, "learning_rate": 2.6530010322961217e-05, "loss": 0.0001, "step": 3686 }, { "epoch": 1.52, "learning_rate": 2.651526323551099e-05, "loss": 0.0001, "step": 3688 }, { "epoch": 1.52, "learning_rate": 2.650051614806076e-05, "loss": 0.0, "step": 3690 }, { "epoch": 1.52, "learning_rate": 2.6485769060610532e-05, "loss": 0.0001, "step": 3692 }, { "epoch": 1.52, "learning_rate": 2.6471021973160302e-05, "loss": 0.0001, "step": 3694 }, { "epoch": 1.52, "learning_rate": 2.6456274885710076e-05, "loss": 0.0001, "step": 3696 }, { "epoch": 1.52, "learning_rate": 2.6441527798259846e-05, "loss": 0.0001, "step": 3698 }, { "epoch": 1.52, "learning_rate": 2.6426780710809617e-05, "loss": 0.0, "step": 3700 }, { "epoch": 1.53, "learning_rate": 2.641203362335939e-05, "loss": 0.0, "step": 3702 }, { "epoch": 1.53, "learning_rate": 2.639728653590916e-05, "loss": 0.0001, "step": 3704 }, { "epoch": 1.53, "learning_rate": 2.638253944845893e-05, "loss": 0.0, "step": 3706 }, { "epoch": 1.53, "learning_rate": 2.63677923610087e-05, "loss": 0.0, "step": 3708 }, { "epoch": 1.53, "learning_rate": 2.6353045273558475e-05, "loss": 0.0001, "step": 3710 }, { "epoch": 1.53, "learning_rate": 2.6338298186108246e-05, "loss": 0.0001, "step": 3712 }, { "epoch": 1.53, "learning_rate": 2.6323551098658016e-05, "loss": 0.0001, "step": 3714 }, { "epoch": 1.53, "learning_rate": 2.630880401120779e-05, "loss": 0.0, "step": 3716 }, { "epoch": 1.53, "learning_rate": 2.629405692375756e-05, "loss": 0.0, "step": 3718 }, { "epoch": 1.53, "learning_rate": 2.627930983630733e-05, "loss": 0.0, "step": 3720 }, { "epoch": 1.53, "learning_rate": 2.62645627488571e-05, "loss": 0.0001, "step": 3722 }, { "epoch": 1.53, "learning_rate": 2.6249815661406875e-05, "loss": 0.0, "step": 3724 }, { "epoch": 1.54, "learning_rate": 2.6235068573956645e-05, "loss": 0.0003, "step": 3726 }, { "epoch": 1.54, "learning_rate": 2.6220321486506415e-05, "loss": 0.0, "step": 3728 }, { "epoch": 1.54, "learning_rate": 2.620557439905619e-05, "loss": 0.0, "step": 3730 }, { "epoch": 1.54, "learning_rate": 2.619082731160596e-05, "loss": 0.0001, "step": 3732 }, { "epoch": 1.54, "learning_rate": 2.617608022415573e-05, "loss": 0.0, "step": 3734 }, { "epoch": 1.54, "learning_rate": 2.61613331367055e-05, "loss": 0.0, "step": 3736 }, { "epoch": 1.54, "learning_rate": 2.6146586049255274e-05, "loss": 0.0001, "step": 3738 }, { "epoch": 1.54, "learning_rate": 2.6131838961805044e-05, "loss": 0.0001, "step": 3740 }, { "epoch": 1.54, "learning_rate": 2.6117091874354815e-05, "loss": 0.0001, "step": 3742 }, { "epoch": 1.54, "learning_rate": 2.610234478690459e-05, "loss": 0.0001, "step": 3744 }, { "epoch": 1.54, "learning_rate": 2.608759769945436e-05, "loss": 0.0, "step": 3746 }, { "epoch": 1.54, "learning_rate": 2.607285061200413e-05, "loss": 0.0001, "step": 3748 }, { "epoch": 1.55, "learning_rate": 2.60581035245539e-05, "loss": 0.0001, "step": 3750 }, { "epoch": 1.55, "learning_rate": 2.6043356437103673e-05, "loss": 0.0, "step": 3752 }, { "epoch": 1.55, "learning_rate": 2.6028609349653444e-05, "loss": 0.0, "step": 3754 }, { "epoch": 1.55, "learning_rate": 2.6013862262203214e-05, "loss": 0.0001, "step": 3756 }, { "epoch": 1.55, "learning_rate": 2.5999115174752984e-05, "loss": 0.0, "step": 3758 }, { "epoch": 1.55, "learning_rate": 2.5984368087302758e-05, "loss": 0.0, "step": 3760 }, { "epoch": 1.55, "learning_rate": 2.596962099985253e-05, "loss": 0.0001, "step": 3762 }, { "epoch": 1.55, "learning_rate": 2.59548739124023e-05, "loss": 0.0, "step": 3764 }, { "epoch": 1.55, "learning_rate": 2.5940126824952073e-05, "loss": 0.0001, "step": 3766 }, { "epoch": 1.55, "learning_rate": 2.5925379737501843e-05, "loss": 0.0, "step": 3768 }, { "epoch": 1.55, "learning_rate": 2.5910632650051613e-05, "loss": 0.0001, "step": 3770 }, { "epoch": 1.55, "learning_rate": 2.5895885562601384e-05, "loss": 0.0, "step": 3772 }, { "epoch": 1.56, "learning_rate": 2.5881138475151157e-05, "loss": 0.0001, "step": 3774 }, { "epoch": 1.56, "learning_rate": 2.5866391387700928e-05, "loss": 0.0001, "step": 3776 }, { "epoch": 1.56, "learning_rate": 2.5851644300250698e-05, "loss": 0.0, "step": 3778 }, { "epoch": 1.56, "learning_rate": 2.5836897212800472e-05, "loss": 0.0001, "step": 3780 }, { "epoch": 1.56, "learning_rate": 2.5822150125350242e-05, "loss": 0.0001, "step": 3782 }, { "epoch": 1.56, "learning_rate": 2.580740303790002e-05, "loss": 0.0, "step": 3784 }, { "epoch": 1.56, "learning_rate": 2.579265595044979e-05, "loss": 0.0, "step": 3786 }, { "epoch": 1.56, "learning_rate": 2.577790886299956e-05, "loss": 0.0, "step": 3788 }, { "epoch": 1.56, "learning_rate": 2.5763161775549334e-05, "loss": 0.0, "step": 3790 }, { "epoch": 1.56, "learning_rate": 2.5748414688099104e-05, "loss": 0.0001, "step": 3792 }, { "epoch": 1.56, "learning_rate": 2.5733667600648875e-05, "loss": 0.0001, "step": 3794 }, { "epoch": 1.56, "learning_rate": 2.5718920513198645e-05, "loss": 0.0, "step": 3796 }, { "epoch": 1.56, "learning_rate": 2.570417342574842e-05, "loss": 0.0002, "step": 3798 }, { "epoch": 1.57, "learning_rate": 2.568942633829819e-05, "loss": 0.0, "step": 3800 }, { "epoch": 1.57, "learning_rate": 2.567467925084796e-05, "loss": 0.0, "step": 3802 }, { "epoch": 1.57, "learning_rate": 2.5659932163397733e-05, "loss": 0.0001, "step": 3804 }, { "epoch": 1.57, "learning_rate": 2.5645185075947504e-05, "loss": 0.0, "step": 3806 }, { "epoch": 1.57, "learning_rate": 2.5630437988497274e-05, "loss": 0.0, "step": 3808 }, { "epoch": 1.57, "learning_rate": 2.5615690901047044e-05, "loss": 0.0, "step": 3810 }, { "epoch": 1.57, "learning_rate": 2.5600943813596818e-05, "loss": 0.0, "step": 3812 }, { "epoch": 1.57, "learning_rate": 2.558619672614659e-05, "loss": 0.0, "step": 3814 }, { "epoch": 1.57, "learning_rate": 2.557144963869636e-05, "loss": 0.0, "step": 3816 }, { "epoch": 1.57, "learning_rate": 2.5556702551246133e-05, "loss": 0.0, "step": 3818 }, { "epoch": 1.57, "learning_rate": 2.5541955463795903e-05, "loss": 0.0, "step": 3820 }, { "epoch": 1.57, "learning_rate": 2.5527208376345673e-05, "loss": 0.0001, "step": 3822 }, { "epoch": 1.58, "learning_rate": 2.5512461288895444e-05, "loss": 0.0, "step": 3824 }, { "epoch": 1.58, "learning_rate": 2.5497714201445217e-05, "loss": 0.0, "step": 3826 }, { "epoch": 1.58, "learning_rate": 2.5482967113994988e-05, "loss": 0.0, "step": 3828 }, { "epoch": 1.58, "learning_rate": 2.5468220026544758e-05, "loss": 0.0, "step": 3830 }, { "epoch": 1.58, "learning_rate": 2.5453472939094532e-05, "loss": 0.0, "step": 3832 }, { "epoch": 1.58, "learning_rate": 2.5438725851644302e-05, "loss": 0.0, "step": 3834 }, { "epoch": 1.58, "learning_rate": 2.5423978764194073e-05, "loss": 0.0, "step": 3836 }, { "epoch": 1.58, "learning_rate": 2.5409231676743843e-05, "loss": 0.0001, "step": 3838 }, { "epoch": 1.58, "learning_rate": 2.5394484589293617e-05, "loss": 0.0, "step": 3840 }, { "epoch": 1.58, "learning_rate": 2.5379737501843387e-05, "loss": 0.0, "step": 3842 }, { "epoch": 1.58, "learning_rate": 2.5364990414393158e-05, "loss": 0.0001, "step": 3844 }, { "epoch": 1.58, "learning_rate": 2.5350243326942928e-05, "loss": 0.0001, "step": 3846 }, { "epoch": 1.59, "learning_rate": 2.53354962394927e-05, "loss": 0.0003, "step": 3848 }, { "epoch": 1.59, "learning_rate": 2.5320749152042472e-05, "loss": 0.0, "step": 3850 }, { "epoch": 1.59, "learning_rate": 2.5306002064592242e-05, "loss": 0.0, "step": 3852 }, { "epoch": 1.59, "learning_rate": 2.5291254977142016e-05, "loss": 0.0, "step": 3854 }, { "epoch": 1.59, "learning_rate": 2.5276507889691787e-05, "loss": 0.0, "step": 3856 }, { "epoch": 1.59, "learning_rate": 2.5261760802241557e-05, "loss": 0.0001, "step": 3858 }, { "epoch": 1.59, "learning_rate": 2.5247013714791327e-05, "loss": 0.0001, "step": 3860 }, { "epoch": 1.59, "learning_rate": 2.52322666273411e-05, "loss": 0.0, "step": 3862 }, { "epoch": 1.59, "learning_rate": 2.521751953989087e-05, "loss": 0.0001, "step": 3864 }, { "epoch": 1.59, "learning_rate": 2.5202772452440642e-05, "loss": 0.0006, "step": 3866 }, { "epoch": 1.59, "learning_rate": 2.5188025364990415e-05, "loss": 0.0, "step": 3868 }, { "epoch": 1.59, "learning_rate": 2.5173278277540186e-05, "loss": 0.0, "step": 3870 }, { "epoch": 1.6, "learning_rate": 2.5158531190089956e-05, "loss": 0.0, "step": 3872 }, { "epoch": 1.6, "learning_rate": 2.5143784102639727e-05, "loss": 0.0001, "step": 3874 }, { "epoch": 1.6, "learning_rate": 2.51290370151895e-05, "loss": 0.0, "step": 3876 }, { "epoch": 1.6, "learning_rate": 2.511428992773927e-05, "loss": 0.0, "step": 3878 }, { "epoch": 1.6, "learning_rate": 2.509954284028904e-05, "loss": 0.0, "step": 3880 }, { "epoch": 1.6, "learning_rate": 2.5084795752838815e-05, "loss": 0.0, "step": 3882 }, { "epoch": 1.6, "learning_rate": 2.5070048665388585e-05, "loss": 0.0, "step": 3884 }, { "epoch": 1.6, "learning_rate": 2.5055301577938356e-05, "loss": 0.0001, "step": 3886 }, { "epoch": 1.6, "learning_rate": 2.5040554490488126e-05, "loss": 0.0001, "step": 3888 }, { "epoch": 1.6, "learning_rate": 2.50258074030379e-05, "loss": 0.0001, "step": 3890 }, { "epoch": 1.6, "learning_rate": 2.501106031558767e-05, "loss": 0.0001, "step": 3892 }, { "epoch": 1.6, "learning_rate": 2.4996313228137444e-05, "loss": 0.0001, "step": 3894 }, { "epoch": 1.61, "learning_rate": 2.4981566140687214e-05, "loss": 0.0, "step": 3896 }, { "epoch": 1.61, "learning_rate": 2.4966819053236988e-05, "loss": 0.0, "step": 3898 }, { "epoch": 1.61, "learning_rate": 2.4952071965786758e-05, "loss": 0.0, "step": 3900 }, { "epoch": 1.61, "learning_rate": 2.493732487833653e-05, "loss": 0.0001, "step": 3902 }, { "epoch": 1.61, "learning_rate": 2.49225777908863e-05, "loss": 0.0001, "step": 3904 }, { "epoch": 1.61, "learning_rate": 2.4907830703436073e-05, "loss": 0.0001, "step": 3906 }, { "epoch": 1.61, "learning_rate": 2.4893083615985843e-05, "loss": 0.0, "step": 3908 }, { "epoch": 1.61, "learning_rate": 2.4878336528535613e-05, "loss": 0.0001, "step": 3910 }, { "epoch": 1.61, "learning_rate": 2.4863589441085387e-05, "loss": 0.0, "step": 3912 }, { "epoch": 1.61, "learning_rate": 2.4848842353635158e-05, "loss": 0.0001, "step": 3914 }, { "epoch": 1.61, "learning_rate": 2.4834095266184928e-05, "loss": 0.0001, "step": 3916 }, { "epoch": 1.61, "learning_rate": 2.48193481787347e-05, "loss": 0.0, "step": 3918 }, { "epoch": 1.62, "learning_rate": 2.4804601091284472e-05, "loss": 0.0, "step": 3920 }, { "epoch": 1.62, "learning_rate": 2.4789854003834246e-05, "loss": 0.0001, "step": 3922 }, { "epoch": 1.62, "learning_rate": 2.4775106916384016e-05, "loss": 0.0, "step": 3924 }, { "epoch": 1.62, "learning_rate": 2.4760359828933787e-05, "loss": 0.0001, "step": 3926 }, { "epoch": 1.62, "learning_rate": 2.474561274148356e-05, "loss": 0.0001, "step": 3928 }, { "epoch": 1.62, "learning_rate": 2.473086565403333e-05, "loss": 0.0001, "step": 3930 }, { "epoch": 1.62, "learning_rate": 2.47161185665831e-05, "loss": 0.0004, "step": 3932 }, { "epoch": 1.62, "learning_rate": 2.4701371479132875e-05, "loss": 0.0, "step": 3934 }, { "epoch": 1.62, "learning_rate": 2.4686624391682645e-05, "loss": 0.0, "step": 3936 }, { "epoch": 1.62, "learning_rate": 2.4671877304232416e-05, "loss": 0.0, "step": 3938 }, { "epoch": 1.62, "learning_rate": 2.4657130216782186e-05, "loss": 0.0002, "step": 3940 }, { "epoch": 1.62, "learning_rate": 2.464238312933196e-05, "loss": 0.0, "step": 3942 }, { "epoch": 1.63, "learning_rate": 2.462763604188173e-05, "loss": 0.0001, "step": 3944 }, { "epoch": 1.63, "learning_rate": 2.46128889544315e-05, "loss": 0.0001, "step": 3946 }, { "epoch": 1.63, "learning_rate": 2.459814186698127e-05, "loss": 0.0, "step": 3948 }, { "epoch": 1.63, "learning_rate": 2.4583394779531045e-05, "loss": 0.0001, "step": 3950 }, { "epoch": 1.63, "learning_rate": 2.4568647692080815e-05, "loss": 0.0001, "step": 3952 }, { "epoch": 1.63, "learning_rate": 2.4553900604630585e-05, "loss": 0.0, "step": 3954 }, { "epoch": 1.63, "learning_rate": 2.453915351718036e-05, "loss": 0.0, "step": 3956 }, { "epoch": 1.63, "learning_rate": 2.452440642973013e-05, "loss": 0.0, "step": 3958 }, { "epoch": 1.63, "learning_rate": 2.45096593422799e-05, "loss": 0.0, "step": 3960 }, { "epoch": 1.63, "learning_rate": 2.449491225482967e-05, "loss": 0.0, "step": 3962 }, { "epoch": 1.63, "learning_rate": 2.4480165167379444e-05, "loss": 0.0, "step": 3964 }, { "epoch": 1.63, "learning_rate": 2.4465418079929214e-05, "loss": 0.0, "step": 3966 }, { "epoch": 1.63, "learning_rate": 2.4450670992478985e-05, "loss": 0.0009, "step": 3968 }, { "epoch": 1.64, "learning_rate": 2.443592390502876e-05, "loss": 0.0001, "step": 3970 }, { "epoch": 1.64, "learning_rate": 2.442117681757853e-05, "loss": 0.0001, "step": 3972 }, { "epoch": 1.64, "learning_rate": 2.44064297301283e-05, "loss": 0.0001, "step": 3974 }, { "epoch": 1.64, "learning_rate": 2.439168264267807e-05, "loss": 0.0001, "step": 3976 }, { "epoch": 1.64, "learning_rate": 2.4376935555227847e-05, "loss": 0.0001, "step": 3978 }, { "epoch": 1.64, "learning_rate": 2.4362188467777617e-05, "loss": 0.0003, "step": 3980 }, { "epoch": 1.64, "learning_rate": 2.4347441380327387e-05, "loss": 0.0001, "step": 3982 }, { "epoch": 1.64, "learning_rate": 2.4332694292877158e-05, "loss": 0.0001, "step": 3984 }, { "epoch": 1.64, "learning_rate": 2.431794720542693e-05, "loss": 0.0001, "step": 3986 }, { "epoch": 1.64, "learning_rate": 2.4303200117976702e-05, "loss": 0.0001, "step": 3988 }, { "epoch": 1.64, "learning_rate": 2.4288453030526472e-05, "loss": 0.0001, "step": 3990 }, { "epoch": 1.64, "learning_rate": 2.4273705943076243e-05, "loss": 0.0001, "step": 3992 }, { "epoch": 1.65, "learning_rate": 2.4258958855626016e-05, "loss": 0.0001, "step": 3994 }, { "epoch": 1.65, "learning_rate": 2.4244211768175787e-05, "loss": 0.0002, "step": 3996 }, { "epoch": 1.65, "learning_rate": 2.4229464680725557e-05, "loss": 0.0001, "step": 3998 }, { "epoch": 1.65, "learning_rate": 2.421471759327533e-05, "loss": 0.0001, "step": 4000 }, { "epoch": 1.65, "learning_rate": 2.41999705058251e-05, "loss": 0.0002, "step": 4002 }, { "epoch": 1.65, "learning_rate": 2.418522341837487e-05, "loss": 0.0002, "step": 4004 }, { "epoch": 1.65, "learning_rate": 2.4170476330924642e-05, "loss": 0.0001, "step": 4006 }, { "epoch": 1.65, "learning_rate": 2.4155729243474416e-05, "loss": 0.0001, "step": 4008 }, { "epoch": 1.65, "learning_rate": 2.4140982156024186e-05, "loss": 0.0001, "step": 4010 }, { "epoch": 1.65, "learning_rate": 2.4126235068573956e-05, "loss": 0.0001, "step": 4012 }, { "epoch": 1.65, "learning_rate": 2.411148798112373e-05, "loss": 0.0001, "step": 4014 }, { "epoch": 1.65, "learning_rate": 2.40967408936735e-05, "loss": 0.0001, "step": 4016 }, { "epoch": 1.66, "learning_rate": 2.408199380622327e-05, "loss": 0.0003, "step": 4018 }, { "epoch": 1.66, "learning_rate": 2.406724671877304e-05, "loss": 0.0001, "step": 4020 }, { "epoch": 1.66, "learning_rate": 2.4052499631322815e-05, "loss": 0.0001, "step": 4022 }, { "epoch": 1.66, "learning_rate": 2.4037752543872585e-05, "loss": 0.0001, "step": 4024 }, { "epoch": 1.66, "learning_rate": 2.4023005456422356e-05, "loss": 0.0001, "step": 4026 }, { "epoch": 1.66, "learning_rate": 2.400825836897213e-05, "loss": 0.0, "step": 4028 }, { "epoch": 1.66, "learning_rate": 2.39935112815219e-05, "loss": 0.0001, "step": 4030 }, { "epoch": 1.66, "learning_rate": 2.397876419407167e-05, "loss": 0.0, "step": 4032 }, { "epoch": 1.66, "learning_rate": 2.3964017106621444e-05, "loss": 0.0001, "step": 4034 }, { "epoch": 1.66, "learning_rate": 2.3949270019171218e-05, "loss": 0.0001, "step": 4036 }, { "epoch": 1.66, "learning_rate": 2.3934522931720988e-05, "loss": 0.0001, "step": 4038 }, { "epoch": 1.66, "learning_rate": 2.391977584427076e-05, "loss": 0.0001, "step": 4040 }, { "epoch": 1.67, "learning_rate": 2.390502875682053e-05, "loss": 0.0, "step": 4042 }, { "epoch": 1.67, "learning_rate": 2.3890281669370303e-05, "loss": 0.0001, "step": 4044 }, { "epoch": 1.67, "learning_rate": 2.3875534581920073e-05, "loss": 0.0001, "step": 4046 }, { "epoch": 1.67, "learning_rate": 2.3860787494469843e-05, "loss": 0.0, "step": 4048 }, { "epoch": 1.67, "learning_rate": 2.3846040407019614e-05, "loss": 0.0001, "step": 4050 }, { "epoch": 1.67, "learning_rate": 2.3831293319569387e-05, "loss": 0.0, "step": 4052 }, { "epoch": 1.67, "learning_rate": 2.3816546232119158e-05, "loss": 0.0001, "step": 4054 }, { "epoch": 1.67, "learning_rate": 2.3801799144668928e-05, "loss": 0.002, "step": 4056 }, { "epoch": 1.67, "learning_rate": 2.3787052057218702e-05, "loss": 0.0001, "step": 4058 }, { "epoch": 1.67, "learning_rate": 2.3772304969768472e-05, "loss": 0.0001, "step": 4060 }, { "epoch": 1.67, "learning_rate": 2.3757557882318243e-05, "loss": 0.0, "step": 4062 }, { "epoch": 1.67, "learning_rate": 2.3742810794868013e-05, "loss": 0.0001, "step": 4064 }, { "epoch": 1.68, "learning_rate": 2.3728063707417787e-05, "loss": 0.0001, "step": 4066 }, { "epoch": 1.68, "learning_rate": 2.3713316619967557e-05, "loss": 0.0001, "step": 4068 }, { "epoch": 1.68, "learning_rate": 2.3698569532517327e-05, "loss": 0.0001, "step": 4070 }, { "epoch": 1.68, "learning_rate": 2.36838224450671e-05, "loss": 0.0001, "step": 4072 }, { "epoch": 1.68, "learning_rate": 2.366907535761687e-05, "loss": 0.0001, "step": 4074 }, { "epoch": 1.68, "learning_rate": 2.3654328270166642e-05, "loss": 0.0001, "step": 4076 }, { "epoch": 1.68, "learning_rate": 2.3639581182716412e-05, "loss": 0.0001, "step": 4078 }, { "epoch": 1.68, "learning_rate": 2.3624834095266186e-05, "loss": 0.0001, "step": 4080 }, { "epoch": 1.68, "learning_rate": 2.3610087007815956e-05, "loss": 0.0001, "step": 4082 }, { "epoch": 1.68, "learning_rate": 2.3595339920365727e-05, "loss": 0.0002, "step": 4084 }, { "epoch": 1.68, "learning_rate": 2.35805928329155e-05, "loss": 0.0001, "step": 4086 }, { "epoch": 1.68, "learning_rate": 2.356584574546527e-05, "loss": 0.0001, "step": 4088 }, { "epoch": 1.69, "learning_rate": 2.3551098658015045e-05, "loss": 0.0025, "step": 4090 }, { "epoch": 1.69, "learning_rate": 2.3536351570564815e-05, "loss": 0.0001, "step": 4092 }, { "epoch": 1.69, "learning_rate": 2.3521604483114585e-05, "loss": 0.0001, "step": 4094 }, { "epoch": 1.69, "learning_rate": 2.350685739566436e-05, "loss": 0.0001, "step": 4096 }, { "epoch": 1.69, "learning_rate": 2.349211030821413e-05, "loss": 0.0001, "step": 4098 }, { "epoch": 1.69, "learning_rate": 2.34773632207639e-05, "loss": 0.0001, "step": 4100 }, { "epoch": 1.69, "learning_rate": 2.3462616133313674e-05, "loss": 0.0001, "step": 4102 }, { "epoch": 1.69, "learning_rate": 2.3447869045863444e-05, "loss": 0.0002, "step": 4104 }, { "epoch": 1.69, "learning_rate": 2.3433121958413214e-05, "loss": 0.0001, "step": 4106 }, { "epoch": 1.69, "learning_rate": 2.3418374870962985e-05, "loss": 0.0002, "step": 4108 }, { "epoch": 1.69, "learning_rate": 2.340362778351276e-05, "loss": 0.0004, "step": 4110 }, { "epoch": 1.69, "learning_rate": 2.338888069606253e-05, "loss": 0.0002, "step": 4112 }, { "epoch": 1.7, "learning_rate": 2.33741336086123e-05, "loss": 0.0001, "step": 4114 }, { "epoch": 1.7, "learning_rate": 2.3359386521162073e-05, "loss": 0.0001, "step": 4116 }, { "epoch": 1.7, "learning_rate": 2.3344639433711843e-05, "loss": 0.0001, "step": 4118 }, { "epoch": 1.7, "learning_rate": 2.3329892346261614e-05, "loss": 0.0001, "step": 4120 }, { "epoch": 1.7, "learning_rate": 2.3315145258811384e-05, "loss": 0.0001, "step": 4122 }, { "epoch": 1.7, "learning_rate": 2.3300398171361158e-05, "loss": 0.0001, "step": 4124 }, { "epoch": 1.7, "learning_rate": 2.3285651083910928e-05, "loss": 0.0001, "step": 4126 }, { "epoch": 1.7, "learning_rate": 2.32709039964607e-05, "loss": 0.0001, "step": 4128 }, { "epoch": 1.7, "learning_rate": 2.3256156909010472e-05, "loss": 0.0, "step": 4130 }, { "epoch": 1.7, "learning_rate": 2.3241409821560243e-05, "loss": 0.0001, "step": 4132 }, { "epoch": 1.7, "learning_rate": 2.3226662734110013e-05, "loss": 0.0001, "step": 4134 }, { "epoch": 1.7, "learning_rate": 2.3211915646659783e-05, "loss": 0.0003, "step": 4136 }, { "epoch": 1.7, "learning_rate": 2.3197168559209557e-05, "loss": 0.0001, "step": 4138 }, { "epoch": 1.71, "learning_rate": 2.3182421471759328e-05, "loss": 0.0001, "step": 4140 }, { "epoch": 1.71, "learning_rate": 2.3167674384309098e-05, "loss": 0.0, "step": 4142 }, { "epoch": 1.71, "learning_rate": 2.3152927296858872e-05, "loss": 0.0001, "step": 4144 }, { "epoch": 1.71, "learning_rate": 2.3138180209408645e-05, "loss": 0.0001, "step": 4146 }, { "epoch": 1.71, "learning_rate": 2.3123433121958416e-05, "loss": 0.0, "step": 4148 }, { "epoch": 1.71, "learning_rate": 2.3108686034508186e-05, "loss": 0.0001, "step": 4150 }, { "epoch": 1.71, "learning_rate": 2.3093938947057957e-05, "loss": 0.0001, "step": 4152 }, { "epoch": 1.71, "learning_rate": 2.307919185960773e-05, "loss": 0.0001, "step": 4154 }, { "epoch": 1.71, "learning_rate": 2.30644447721575e-05, "loss": 0.0002, "step": 4156 }, { "epoch": 1.71, "learning_rate": 2.304969768470727e-05, "loss": 0.0002, "step": 4158 }, { "epoch": 1.71, "learning_rate": 2.3034950597257045e-05, "loss": 0.0, "step": 4160 }, { "epoch": 1.71, "learning_rate": 2.3020203509806815e-05, "loss": 0.0001, "step": 4162 }, { "epoch": 1.72, "learning_rate": 2.3005456422356586e-05, "loss": 0.0001, "step": 4164 }, { "epoch": 1.72, "learning_rate": 2.2990709334906356e-05, "loss": 0.0, "step": 4166 }, { "epoch": 1.72, "learning_rate": 2.297596224745613e-05, "loss": 0.0001, "step": 4168 }, { "epoch": 1.72, "learning_rate": 2.29612151600059e-05, "loss": 0.0001, "step": 4170 }, { "epoch": 1.72, "learning_rate": 2.294646807255567e-05, "loss": 0.0001, "step": 4172 }, { "epoch": 1.72, "learning_rate": 2.2931720985105444e-05, "loss": 0.0001, "step": 4174 }, { "epoch": 1.72, "learning_rate": 2.2916973897655214e-05, "loss": 0.0001, "step": 4176 }, { "epoch": 1.72, "learning_rate": 2.2902226810204985e-05, "loss": 0.0, "step": 4178 }, { "epoch": 1.72, "learning_rate": 2.2887479722754755e-05, "loss": 0.0001, "step": 4180 }, { "epoch": 1.72, "learning_rate": 2.287273263530453e-05, "loss": 0.0001, "step": 4182 }, { "epoch": 1.72, "learning_rate": 2.28579855478543e-05, "loss": 0.0001, "step": 4184 }, { "epoch": 1.72, "learning_rate": 2.284323846040407e-05, "loss": 0.0001, "step": 4186 }, { "epoch": 1.73, "learning_rate": 2.2828491372953843e-05, "loss": 0.0001, "step": 4188 }, { "epoch": 1.73, "learning_rate": 2.2813744285503614e-05, "loss": 0.0, "step": 4190 }, { "epoch": 1.73, "learning_rate": 2.2798997198053384e-05, "loss": 0.0001, "step": 4192 }, { "epoch": 1.73, "learning_rate": 2.2784250110603155e-05, "loss": 0.0, "step": 4194 }, { "epoch": 1.73, "learning_rate": 2.276950302315293e-05, "loss": 0.0001, "step": 4196 }, { "epoch": 1.73, "learning_rate": 2.27547559357027e-05, "loss": 0.0001, "step": 4198 }, { "epoch": 1.73, "learning_rate": 2.2740008848252472e-05, "loss": 0.0001, "step": 4200 }, { "epoch": 1.73, "learning_rate": 2.2725261760802243e-05, "loss": 0.0, "step": 4202 }, { "epoch": 1.73, "learning_rate": 2.2710514673352017e-05, "loss": 0.0001, "step": 4204 }, { "epoch": 1.73, "learning_rate": 2.2695767585901787e-05, "loss": 0.0001, "step": 4206 }, { "epoch": 1.73, "learning_rate": 2.2681020498451557e-05, "loss": 0.0001, "step": 4208 }, { "epoch": 1.73, "learning_rate": 2.2666273411001328e-05, "loss": 0.0, "step": 4210 }, { "epoch": 1.74, "learning_rate": 2.26515263235511e-05, "loss": 0.0, "step": 4212 }, { "epoch": 1.74, "learning_rate": 2.2636779236100872e-05, "loss": 0.0, "step": 4214 }, { "epoch": 1.74, "learning_rate": 2.2622032148650642e-05, "loss": 0.0, "step": 4216 }, { "epoch": 1.74, "learning_rate": 2.2607285061200416e-05, "loss": 0.0, "step": 4218 }, { "epoch": 1.74, "learning_rate": 2.2592537973750186e-05, "loss": 0.0001, "step": 4220 }, { "epoch": 1.74, "learning_rate": 2.2577790886299957e-05, "loss": 0.0, "step": 4222 }, { "epoch": 1.74, "learning_rate": 2.2563043798849727e-05, "loss": 0.0001, "step": 4224 }, { "epoch": 1.74, "learning_rate": 2.25482967113995e-05, "loss": 0.0, "step": 4226 }, { "epoch": 1.74, "learning_rate": 2.253354962394927e-05, "loss": 0.0001, "step": 4228 }, { "epoch": 1.74, "learning_rate": 2.251880253649904e-05, "loss": 0.0, "step": 4230 }, { "epoch": 1.74, "learning_rate": 2.2504055449048815e-05, "loss": 0.0001, "step": 4232 }, { "epoch": 1.74, "learning_rate": 2.2489308361598586e-05, "loss": 0.0, "step": 4234 }, { "epoch": 1.75, "learning_rate": 2.2474561274148356e-05, "loss": 0.0, "step": 4236 }, { "epoch": 1.75, "learning_rate": 2.2459814186698126e-05, "loss": 0.0, "step": 4238 }, { "epoch": 1.75, "learning_rate": 2.24450670992479e-05, "loss": 0.0001, "step": 4240 }, { "epoch": 1.75, "learning_rate": 2.243032001179767e-05, "loss": 0.0, "step": 4242 }, { "epoch": 1.75, "learning_rate": 2.241557292434744e-05, "loss": 0.0, "step": 4244 }, { "epoch": 1.75, "learning_rate": 2.2400825836897215e-05, "loss": 0.0001, "step": 4246 }, { "epoch": 1.75, "learning_rate": 2.2386078749446985e-05, "loss": 0.0, "step": 4248 }, { "epoch": 1.75, "learning_rate": 2.2371331661996755e-05, "loss": 0.0, "step": 4250 }, { "epoch": 1.75, "learning_rate": 2.2356584574546526e-05, "loss": 0.0, "step": 4252 }, { "epoch": 1.75, "learning_rate": 2.23418374870963e-05, "loss": 0.0, "step": 4254 }, { "epoch": 1.75, "learning_rate": 2.2327090399646073e-05, "loss": 0.0, "step": 4256 }, { "epoch": 1.75, "learning_rate": 2.2312343312195844e-05, "loss": 0.0, "step": 4258 }, { "epoch": 1.76, "learning_rate": 2.2297596224745614e-05, "loss": 0.0001, "step": 4260 }, { "epoch": 1.76, "learning_rate": 2.2282849137295388e-05, "loss": 0.0001, "step": 4262 }, { "epoch": 1.76, "learning_rate": 2.2268102049845158e-05, "loss": 0.0, "step": 4264 }, { "epoch": 1.76, "learning_rate": 2.225335496239493e-05, "loss": 0.0005, "step": 4266 }, { "epoch": 1.76, "learning_rate": 2.22386078749447e-05, "loss": 0.0, "step": 4268 }, { "epoch": 1.76, "learning_rate": 2.2223860787494473e-05, "loss": 0.0001, "step": 4270 }, { "epoch": 1.76, "learning_rate": 2.2209113700044243e-05, "loss": 0.0001, "step": 4272 }, { "epoch": 1.76, "learning_rate": 2.2194366612594013e-05, "loss": 0.0007, "step": 4274 }, { "epoch": 1.76, "learning_rate": 2.2179619525143787e-05, "loss": 0.0, "step": 4276 }, { "epoch": 1.76, "learning_rate": 2.2164872437693557e-05, "loss": 0.0001, "step": 4278 }, { "epoch": 1.76, "learning_rate": 2.2150125350243328e-05, "loss": 0.0001, "step": 4280 }, { "epoch": 1.76, "learning_rate": 2.2135378262793098e-05, "loss": 0.0001, "step": 4282 }, { "epoch": 1.77, "learning_rate": 2.2120631175342872e-05, "loss": 0.0002, "step": 4284 }, { "epoch": 1.77, "learning_rate": 2.2105884087892642e-05, "loss": 0.0002, "step": 4286 }, { "epoch": 1.77, "learning_rate": 2.2091137000442413e-05, "loss": 0.0003, "step": 4288 }, { "epoch": 1.77, "learning_rate": 2.2076389912992186e-05, "loss": 0.0001, "step": 4290 }, { "epoch": 1.77, "learning_rate": 2.2061642825541957e-05, "loss": 0.0002, "step": 4292 }, { "epoch": 1.77, "learning_rate": 2.2046895738091727e-05, "loss": 0.0002, "step": 4294 }, { "epoch": 1.77, "learning_rate": 2.2032148650641497e-05, "loss": 0.0002, "step": 4296 }, { "epoch": 1.77, "learning_rate": 2.201740156319127e-05, "loss": 0.0004, "step": 4298 }, { "epoch": 1.77, "learning_rate": 2.200265447574104e-05, "loss": 0.0002, "step": 4300 }, { "epoch": 1.77, "learning_rate": 2.1987907388290812e-05, "loss": 0.0001, "step": 4302 }, { "epoch": 1.77, "learning_rate": 2.1973160300840582e-05, "loss": 0.0002, "step": 4304 }, { "epoch": 1.77, "learning_rate": 2.1958413213390356e-05, "loss": 0.0003, "step": 4306 }, { "epoch": 1.78, "learning_rate": 2.1943666125940126e-05, "loss": 0.0001, "step": 4308 }, { "epoch": 1.78, "learning_rate": 2.1928919038489897e-05, "loss": 0.0002, "step": 4310 }, { "epoch": 1.78, "learning_rate": 2.191417195103967e-05, "loss": 0.0001, "step": 4312 }, { "epoch": 1.78, "learning_rate": 2.1899424863589444e-05, "loss": 0.0001, "step": 4314 }, { "epoch": 1.78, "learning_rate": 2.1884677776139215e-05, "loss": 0.0001, "step": 4316 }, { "epoch": 1.78, "learning_rate": 2.1869930688688985e-05, "loss": 0.0005, "step": 4318 }, { "epoch": 1.78, "learning_rate": 2.185518360123876e-05, "loss": 0.0002, "step": 4320 }, { "epoch": 1.78, "learning_rate": 2.184043651378853e-05, "loss": 0.0001, "step": 4322 }, { "epoch": 1.78, "learning_rate": 2.18256894263383e-05, "loss": 0.0001, "step": 4324 }, { "epoch": 1.78, "learning_rate": 2.181094233888807e-05, "loss": 0.0001, "step": 4326 }, { "epoch": 1.78, "learning_rate": 2.1796195251437844e-05, "loss": 0.0002, "step": 4328 }, { "epoch": 1.78, "learning_rate": 2.1781448163987614e-05, "loss": 0.0002, "step": 4330 }, { "epoch": 1.78, "learning_rate": 2.1766701076537384e-05, "loss": 0.0001, "step": 4332 }, { "epoch": 1.79, "learning_rate": 2.1751953989087158e-05, "loss": 0.0001, "step": 4334 }, { "epoch": 1.79, "learning_rate": 2.173720690163693e-05, "loss": 0.0003, "step": 4336 }, { "epoch": 1.79, "learning_rate": 2.17224598141867e-05, "loss": 0.0001, "step": 4338 }, { "epoch": 1.79, "learning_rate": 2.170771272673647e-05, "loss": 0.0001, "step": 4340 }, { "epoch": 1.79, "learning_rate": 2.1692965639286243e-05, "loss": 0.0001, "step": 4342 }, { "epoch": 1.79, "learning_rate": 2.1678218551836013e-05, "loss": 0.0001, "step": 4344 }, { "epoch": 1.79, "learning_rate": 2.1663471464385784e-05, "loss": 0.0003, "step": 4346 }, { "epoch": 1.79, "learning_rate": 2.1648724376935554e-05, "loss": 0.0001, "step": 4348 }, { "epoch": 1.79, "learning_rate": 2.1633977289485328e-05, "loss": 0.0001, "step": 4350 }, { "epoch": 1.79, "learning_rate": 2.1619230202035098e-05, "loss": 0.0001, "step": 4352 }, { "epoch": 1.79, "learning_rate": 2.160448311458487e-05, "loss": 0.0002, "step": 4354 }, { "epoch": 1.79, "learning_rate": 2.1589736027134642e-05, "loss": 0.0001, "step": 4356 }, { "epoch": 1.8, "learning_rate": 2.1574988939684413e-05, "loss": 0.0001, "step": 4358 }, { "epoch": 1.8, "learning_rate": 2.1560241852234183e-05, "loss": 0.0, "step": 4360 }, { "epoch": 1.8, "learning_rate": 2.1545494764783953e-05, "loss": 0.0001, "step": 4362 }, { "epoch": 1.8, "learning_rate": 2.1530747677333727e-05, "loss": 0.0001, "step": 4364 }, { "epoch": 1.8, "learning_rate": 2.15160005898835e-05, "loss": 0.0001, "step": 4366 }, { "epoch": 1.8, "learning_rate": 2.150125350243327e-05, "loss": 0.0001, "step": 4368 }, { "epoch": 1.8, "learning_rate": 2.148650641498304e-05, "loss": 0.0001, "step": 4370 }, { "epoch": 1.8, "learning_rate": 2.1471759327532815e-05, "loss": 0.0001, "step": 4372 }, { "epoch": 1.8, "learning_rate": 2.1457012240082586e-05, "loss": 0.0001, "step": 4374 }, { "epoch": 1.8, "learning_rate": 2.1442265152632356e-05, "loss": 0.0001, "step": 4376 }, { "epoch": 1.8, "learning_rate": 2.142751806518213e-05, "loss": 0.0001, "step": 4378 }, { "epoch": 1.8, "learning_rate": 2.14127709777319e-05, "loss": 0.0001, "step": 4380 }, { "epoch": 1.81, "learning_rate": 2.139802389028167e-05, "loss": 0.0001, "step": 4382 }, { "epoch": 1.81, "learning_rate": 2.138327680283144e-05, "loss": 0.0002, "step": 4384 }, { "epoch": 1.81, "learning_rate": 2.1368529715381215e-05, "loss": 0.0002, "step": 4386 }, { "epoch": 1.81, "learning_rate": 2.1353782627930985e-05, "loss": 0.0001, "step": 4388 }, { "epoch": 1.81, "learning_rate": 2.1339035540480755e-05, "loss": 0.0001, "step": 4390 }, { "epoch": 1.81, "learning_rate": 2.132428845303053e-05, "loss": 0.0001, "step": 4392 }, { "epoch": 1.81, "learning_rate": 2.13095413655803e-05, "loss": 0.0001, "step": 4394 }, { "epoch": 1.81, "learning_rate": 2.129479427813007e-05, "loss": 0.0, "step": 4396 }, { "epoch": 1.81, "learning_rate": 2.128004719067984e-05, "loss": 0.0001, "step": 4398 }, { "epoch": 1.81, "learning_rate": 2.1265300103229614e-05, "loss": 0.0, "step": 4400 }, { "epoch": 1.81, "learning_rate": 2.1250553015779384e-05, "loss": 0.0001, "step": 4402 }, { "epoch": 1.81, "learning_rate": 2.1235805928329155e-05, "loss": 0.0, "step": 4404 }, { "epoch": 1.82, "learning_rate": 2.1221058840878925e-05, "loss": 0.0, "step": 4406 }, { "epoch": 1.82, "learning_rate": 2.12063117534287e-05, "loss": 0.0001, "step": 4408 }, { "epoch": 1.82, "learning_rate": 2.119156466597847e-05, "loss": 0.0001, "step": 4410 }, { "epoch": 1.82, "learning_rate": 2.117681757852824e-05, "loss": 0.0001, "step": 4412 }, { "epoch": 1.82, "learning_rate": 2.1162070491078013e-05, "loss": 0.0, "step": 4414 }, { "epoch": 1.82, "learning_rate": 2.1147323403627784e-05, "loss": 0.0, "step": 4416 }, { "epoch": 1.82, "learning_rate": 2.1132576316177554e-05, "loss": 0.0001, "step": 4418 }, { "epoch": 1.82, "learning_rate": 2.1117829228727325e-05, "loss": 0.0, "step": 4420 }, { "epoch": 1.82, "learning_rate": 2.11030821412771e-05, "loss": 0.0001, "step": 4422 }, { "epoch": 1.82, "learning_rate": 2.1088335053826872e-05, "loss": 0.0001, "step": 4424 }, { "epoch": 1.82, "learning_rate": 2.1073587966376642e-05, "loss": 0.0, "step": 4426 }, { "epoch": 1.82, "learning_rate": 2.1058840878926413e-05, "loss": 0.0, "step": 4428 }, { "epoch": 1.83, "learning_rate": 2.1044093791476187e-05, "loss": 0.0, "step": 4430 }, { "epoch": 1.83, "learning_rate": 2.1029346704025957e-05, "loss": 0.0001, "step": 4432 }, { "epoch": 1.83, "learning_rate": 2.1014599616575727e-05, "loss": 0.0, "step": 4434 }, { "epoch": 1.83, "learning_rate": 2.09998525291255e-05, "loss": 0.0, "step": 4436 }, { "epoch": 1.83, "learning_rate": 2.098510544167527e-05, "loss": 0.0, "step": 4438 }, { "epoch": 1.83, "learning_rate": 2.0970358354225042e-05, "loss": 0.0, "step": 4440 }, { "epoch": 1.83, "learning_rate": 2.0955611266774812e-05, "loss": 0.0, "step": 4442 }, { "epoch": 1.83, "learning_rate": 2.0940864179324586e-05, "loss": 0.0001, "step": 4444 }, { "epoch": 1.83, "learning_rate": 2.0926117091874356e-05, "loss": 0.0001, "step": 4446 }, { "epoch": 1.83, "learning_rate": 2.0911370004424127e-05, "loss": 0.0, "step": 4448 }, { "epoch": 1.83, "learning_rate": 2.0896622916973897e-05, "loss": 0.0001, "step": 4450 }, { "epoch": 1.83, "learning_rate": 2.088187582952367e-05, "loss": 0.0, "step": 4452 }, { "epoch": 1.84, "learning_rate": 2.086712874207344e-05, "loss": 0.0001, "step": 4454 }, { "epoch": 1.84, "learning_rate": 2.085238165462321e-05, "loss": 0.0004, "step": 4456 }, { "epoch": 1.84, "learning_rate": 2.0837634567172985e-05, "loss": 0.0, "step": 4458 }, { "epoch": 1.84, "learning_rate": 2.0822887479722756e-05, "loss": 0.0, "step": 4460 }, { "epoch": 1.84, "learning_rate": 2.0808140392272526e-05, "loss": 0.0001, "step": 4462 }, { "epoch": 1.84, "learning_rate": 2.0793393304822296e-05, "loss": 0.0, "step": 4464 }, { "epoch": 1.84, "learning_rate": 2.077864621737207e-05, "loss": 0.0, "step": 4466 }, { "epoch": 1.84, "learning_rate": 2.076389912992184e-05, "loss": 0.0001, "step": 4468 }, { "epoch": 1.84, "learning_rate": 2.074915204247161e-05, "loss": 0.0001, "step": 4470 }, { "epoch": 1.84, "learning_rate": 2.0734404955021385e-05, "loss": 0.0001, "step": 4472 }, { "epoch": 1.84, "learning_rate": 2.0719657867571155e-05, "loss": 0.0001, "step": 4474 }, { "epoch": 1.84, "learning_rate": 2.0704910780120925e-05, "loss": 0.0001, "step": 4476 }, { "epoch": 1.85, "learning_rate": 2.06901636926707e-05, "loss": 0.0001, "step": 4478 }, { "epoch": 1.85, "learning_rate": 2.0675416605220473e-05, "loss": 0.0, "step": 4480 }, { "epoch": 1.85, "learning_rate": 2.0660669517770243e-05, "loss": 0.0, "step": 4482 }, { "epoch": 1.85, "learning_rate": 2.0645922430320014e-05, "loss": 0.0002, "step": 4484 }, { "epoch": 1.85, "learning_rate": 2.0631175342869784e-05, "loss": 0.0001, "step": 4486 }, { "epoch": 1.85, "learning_rate": 2.0616428255419558e-05, "loss": 0.0001, "step": 4488 }, { "epoch": 1.85, "learning_rate": 2.0601681167969328e-05, "loss": 0.0001, "step": 4490 }, { "epoch": 1.85, "learning_rate": 2.05869340805191e-05, "loss": 0.0001, "step": 4492 }, { "epoch": 1.85, "learning_rate": 2.057218699306887e-05, "loss": 0.0001, "step": 4494 }, { "epoch": 1.85, "learning_rate": 2.0557439905618642e-05, "loss": 0.0001, "step": 4496 }, { "epoch": 1.85, "learning_rate": 2.0542692818168413e-05, "loss": 0.0001, "step": 4498 }, { "epoch": 1.85, "learning_rate": 2.0527945730718183e-05, "loss": 0.0001, "step": 4500 }, { "epoch": 1.85, "learning_rate": 2.0513198643267957e-05, "loss": 0.0, "step": 4502 }, { "epoch": 1.86, "learning_rate": 2.0498451555817727e-05, "loss": 0.0001, "step": 4504 }, { "epoch": 1.86, "learning_rate": 2.0483704468367498e-05, "loss": 0.0001, "step": 4506 }, { "epoch": 1.86, "learning_rate": 2.0468957380917268e-05, "loss": 0.0001, "step": 4508 }, { "epoch": 1.86, "learning_rate": 2.0454210293467042e-05, "loss": 0.0001, "step": 4510 }, { "epoch": 1.86, "learning_rate": 2.0439463206016812e-05, "loss": 0.0, "step": 4512 }, { "epoch": 1.86, "learning_rate": 2.0424716118566583e-05, "loss": 0.0, "step": 4514 }, { "epoch": 1.86, "learning_rate": 2.0409969031116356e-05, "loss": 0.0001, "step": 4516 }, { "epoch": 1.86, "learning_rate": 2.0395221943666127e-05, "loss": 0.0, "step": 4518 }, { "epoch": 1.86, "learning_rate": 2.0380474856215897e-05, "loss": 0.0001, "step": 4520 }, { "epoch": 1.86, "learning_rate": 2.0365727768765667e-05, "loss": 0.0003, "step": 4522 }, { "epoch": 1.86, "learning_rate": 2.035098068131544e-05, "loss": 0.0, "step": 4524 }, { "epoch": 1.86, "learning_rate": 2.033623359386521e-05, "loss": 0.0001, "step": 4526 }, { "epoch": 1.87, "learning_rate": 2.0321486506414982e-05, "loss": 0.0001, "step": 4528 }, { "epoch": 1.87, "learning_rate": 2.0306739418964756e-05, "loss": 0.0003, "step": 4530 }, { "epoch": 1.87, "learning_rate": 2.0291992331514526e-05, "loss": 0.0, "step": 4532 }, { "epoch": 1.87, "learning_rate": 2.02772452440643e-05, "loss": 0.0, "step": 4534 }, { "epoch": 1.87, "learning_rate": 2.026249815661407e-05, "loss": 0.0001, "step": 4536 }, { "epoch": 1.87, "learning_rate": 2.0247751069163844e-05, "loss": 0.0001, "step": 4538 }, { "epoch": 1.87, "learning_rate": 2.0233003981713614e-05, "loss": 0.0, "step": 4540 }, { "epoch": 1.87, "learning_rate": 2.0218256894263385e-05, "loss": 0.0, "step": 4542 }, { "epoch": 1.87, "learning_rate": 2.0203509806813155e-05, "loss": 0.0003, "step": 4544 }, { "epoch": 1.87, "learning_rate": 2.018876271936293e-05, "loss": 0.0001, "step": 4546 }, { "epoch": 1.87, "learning_rate": 2.01740156319127e-05, "loss": 0.0, "step": 4548 }, { "epoch": 1.87, "learning_rate": 2.015926854446247e-05, "loss": 0.0, "step": 4550 }, { "epoch": 1.88, "learning_rate": 2.014452145701224e-05, "loss": 0.0, "step": 4552 }, { "epoch": 1.88, "learning_rate": 2.0129774369562014e-05, "loss": 0.0, "step": 4554 }, { "epoch": 1.88, "learning_rate": 2.0115027282111784e-05, "loss": 0.0, "step": 4556 }, { "epoch": 1.88, "learning_rate": 2.0100280194661554e-05, "loss": 0.0001, "step": 4558 }, { "epoch": 1.88, "learning_rate": 2.0085533107211328e-05, "loss": 0.0003, "step": 4560 }, { "epoch": 1.88, "learning_rate": 2.00707860197611e-05, "loss": 0.0001, "step": 4562 }, { "epoch": 1.88, "learning_rate": 2.005603893231087e-05, "loss": 0.0001, "step": 4564 }, { "epoch": 1.88, "learning_rate": 2.004129184486064e-05, "loss": 0.0001, "step": 4566 }, { "epoch": 1.88, "learning_rate": 2.0026544757410413e-05, "loss": 0.0, "step": 4568 }, { "epoch": 1.88, "learning_rate": 2.0011797669960183e-05, "loss": 0.0001, "step": 4570 }, { "epoch": 1.88, "learning_rate": 1.9997050582509954e-05, "loss": 0.0001, "step": 4572 }, { "epoch": 1.88, "learning_rate": 1.9982303495059727e-05, "loss": 0.0001, "step": 4574 }, { "epoch": 1.89, "learning_rate": 1.9967556407609498e-05, "loss": 0.0001, "step": 4576 }, { "epoch": 1.89, "learning_rate": 1.9952809320159268e-05, "loss": 0.0001, "step": 4578 }, { "epoch": 1.89, "learning_rate": 1.993806223270904e-05, "loss": 0.0001, "step": 4580 }, { "epoch": 1.89, "learning_rate": 1.9923315145258812e-05, "loss": 0.0058, "step": 4582 }, { "epoch": 1.89, "learning_rate": 1.9908568057808583e-05, "loss": 0.0001, "step": 4584 }, { "epoch": 1.89, "learning_rate": 1.9893820970358353e-05, "loss": 0.0001, "step": 4586 }, { "epoch": 1.89, "learning_rate": 1.9879073882908127e-05, "loss": 0.0001, "step": 4588 }, { "epoch": 1.89, "learning_rate": 1.98643267954579e-05, "loss": 0.0001, "step": 4590 }, { "epoch": 1.89, "learning_rate": 1.984957970800767e-05, "loss": 0.0001, "step": 4592 }, { "epoch": 1.89, "learning_rate": 1.983483262055744e-05, "loss": 0.0001, "step": 4594 }, { "epoch": 1.89, "learning_rate": 1.982008553310721e-05, "loss": 0.0001, "step": 4596 }, { "epoch": 1.89, "learning_rate": 1.9805338445656985e-05, "loss": 0.0001, "step": 4598 }, { "epoch": 1.9, "learning_rate": 1.9790591358206756e-05, "loss": 0.0, "step": 4600 }, { "epoch": 1.9, "learning_rate": 1.9775844270756526e-05, "loss": 0.0001, "step": 4602 }, { "epoch": 1.9, "learning_rate": 1.97610971833063e-05, "loss": 0.0, "step": 4604 }, { "epoch": 1.9, "learning_rate": 1.974635009585607e-05, "loss": 0.0001, "step": 4606 }, { "epoch": 1.9, "learning_rate": 1.973160300840584e-05, "loss": 0.0001, "step": 4608 }, { "epoch": 1.9, "learning_rate": 1.971685592095561e-05, "loss": 0.0, "step": 4610 }, { "epoch": 1.9, "learning_rate": 1.9702108833505385e-05, "loss": 0.0, "step": 4612 }, { "epoch": 1.9, "learning_rate": 1.9687361746055155e-05, "loss": 0.0001, "step": 4614 }, { "epoch": 1.9, "learning_rate": 1.9672614658604925e-05, "loss": 0.0001, "step": 4616 }, { "epoch": 1.9, "learning_rate": 1.96578675711547e-05, "loss": 0.0001, "step": 4618 }, { "epoch": 1.9, "learning_rate": 1.964312048370447e-05, "loss": 0.0, "step": 4620 }, { "epoch": 1.9, "learning_rate": 1.962837339625424e-05, "loss": 0.0, "step": 4622 }, { "epoch": 1.91, "learning_rate": 1.961362630880401e-05, "loss": 0.0001, "step": 4624 }, { "epoch": 1.91, "learning_rate": 1.9598879221353784e-05, "loss": 0.0001, "step": 4626 }, { "epoch": 1.91, "learning_rate": 1.9584132133903554e-05, "loss": 0.0001, "step": 4628 }, { "epoch": 1.91, "learning_rate": 1.9569385046453325e-05, "loss": 0.0001, "step": 4630 }, { "epoch": 1.91, "learning_rate": 1.95546379590031e-05, "loss": 0.0, "step": 4632 }, { "epoch": 1.91, "learning_rate": 1.953989087155287e-05, "loss": 0.0, "step": 4634 }, { "epoch": 1.91, "learning_rate": 1.952514378410264e-05, "loss": 0.0001, "step": 4636 }, { "epoch": 1.91, "learning_rate": 1.951039669665241e-05, "loss": 0.0, "step": 4638 }, { "epoch": 1.91, "learning_rate": 1.9495649609202183e-05, "loss": 0.0001, "step": 4640 }, { "epoch": 1.91, "learning_rate": 1.9480902521751954e-05, "loss": 0.0001, "step": 4642 }, { "epoch": 1.91, "learning_rate": 1.9466155434301728e-05, "loss": 0.0002, "step": 4644 }, { "epoch": 1.91, "learning_rate": 1.9451408346851498e-05, "loss": 0.0, "step": 4646 }, { "epoch": 1.92, "learning_rate": 1.943666125940127e-05, "loss": 0.0, "step": 4648 }, { "epoch": 1.92, "learning_rate": 1.9421914171951042e-05, "loss": 0.0001, "step": 4650 }, { "epoch": 1.92, "learning_rate": 1.9407167084500812e-05, "loss": 0.0012, "step": 4652 }, { "epoch": 1.92, "learning_rate": 1.9392419997050583e-05, "loss": 0.0, "step": 4654 }, { "epoch": 1.92, "learning_rate": 1.9377672909600356e-05, "loss": 0.0001, "step": 4656 }, { "epoch": 1.92, "learning_rate": 1.9362925822150127e-05, "loss": 0.0, "step": 4658 }, { "epoch": 1.92, "learning_rate": 1.9348178734699897e-05, "loss": 0.0041, "step": 4660 }, { "epoch": 1.92, "learning_rate": 1.933343164724967e-05, "loss": 0.0001, "step": 4662 }, { "epoch": 1.92, "learning_rate": 1.931868455979944e-05, "loss": 0.0, "step": 4664 }, { "epoch": 1.92, "learning_rate": 1.9303937472349212e-05, "loss": 0.0001, "step": 4666 }, { "epoch": 1.92, "learning_rate": 1.9289190384898982e-05, "loss": 0.0001, "step": 4668 }, { "epoch": 1.92, "learning_rate": 1.9274443297448756e-05, "loss": 0.0011, "step": 4670 }, { "epoch": 1.93, "learning_rate": 1.9259696209998526e-05, "loss": 0.0001, "step": 4672 }, { "epoch": 1.93, "learning_rate": 1.9244949122548297e-05, "loss": 0.0001, "step": 4674 }, { "epoch": 1.93, "learning_rate": 1.923020203509807e-05, "loss": 0.0002, "step": 4676 }, { "epoch": 1.93, "learning_rate": 1.921545494764784e-05, "loss": 0.0002, "step": 4678 }, { "epoch": 1.93, "learning_rate": 1.920070786019761e-05, "loss": 0.0001, "step": 4680 }, { "epoch": 1.93, "learning_rate": 1.918596077274738e-05, "loss": 0.0001, "step": 4682 }, { "epoch": 1.93, "learning_rate": 1.9171213685297155e-05, "loss": 0.0001, "step": 4684 }, { "epoch": 1.93, "learning_rate": 1.9156466597846926e-05, "loss": 0.0002, "step": 4686 }, { "epoch": 1.93, "learning_rate": 1.9141719510396696e-05, "loss": 0.0001, "step": 4688 }, { "epoch": 1.93, "learning_rate": 1.912697242294647e-05, "loss": 0.0002, "step": 4690 }, { "epoch": 1.93, "learning_rate": 1.911222533549624e-05, "loss": 0.0001, "step": 4692 }, { "epoch": 1.93, "learning_rate": 1.909747824804601e-05, "loss": 0.0001, "step": 4694 }, { "epoch": 1.93, "learning_rate": 1.908273116059578e-05, "loss": 0.0004, "step": 4696 }, { "epoch": 1.94, "learning_rate": 1.9067984073145555e-05, "loss": 0.0001, "step": 4698 }, { "epoch": 1.94, "learning_rate": 1.9053236985695328e-05, "loss": 0.0005, "step": 4700 }, { "epoch": 1.94, "learning_rate": 1.90384898982451e-05, "loss": 0.0001, "step": 4702 }, { "epoch": 1.94, "learning_rate": 1.902374281079487e-05, "loss": 0.0, "step": 4704 }, { "epoch": 1.94, "learning_rate": 1.9008995723344643e-05, "loss": 0.0001, "step": 4706 }, { "epoch": 1.94, "learning_rate": 1.8994248635894413e-05, "loss": 0.0, "step": 4708 }, { "epoch": 1.94, "learning_rate": 1.8979501548444183e-05, "loss": 0.0001, "step": 4710 }, { "epoch": 1.94, "learning_rate": 1.8964754460993954e-05, "loss": 0.0, "step": 4712 }, { "epoch": 1.94, "learning_rate": 1.8950007373543728e-05, "loss": 0.0001, "step": 4714 }, { "epoch": 1.94, "learning_rate": 1.8935260286093498e-05, "loss": 0.0001, "step": 4716 }, { "epoch": 1.94, "learning_rate": 1.892051319864327e-05, "loss": 0.0001, "step": 4718 }, { "epoch": 1.94, "learning_rate": 1.8905766111193042e-05, "loss": 0.0, "step": 4720 }, { "epoch": 1.95, "learning_rate": 1.8891019023742812e-05, "loss": 0.0001, "step": 4722 }, { "epoch": 1.95, "learning_rate": 1.8876271936292583e-05, "loss": 0.0, "step": 4724 }, { "epoch": 1.95, "learning_rate": 1.8861524848842353e-05, "loss": 0.0001, "step": 4726 }, { "epoch": 1.95, "learning_rate": 1.8846777761392127e-05, "loss": 0.0001, "step": 4728 }, { "epoch": 1.95, "learning_rate": 1.8832030673941897e-05, "loss": 0.0001, "step": 4730 }, { "epoch": 1.95, "learning_rate": 1.8817283586491668e-05, "loss": 0.0001, "step": 4732 }, { "epoch": 1.95, "learning_rate": 1.880253649904144e-05, "loss": 0.0001, "step": 4734 }, { "epoch": 1.95, "learning_rate": 1.8787789411591212e-05, "loss": 0.0, "step": 4736 }, { "epoch": 1.95, "learning_rate": 1.8773042324140982e-05, "loss": 0.0, "step": 4738 }, { "epoch": 1.95, "learning_rate": 1.8758295236690753e-05, "loss": 0.0, "step": 4740 }, { "epoch": 1.95, "learning_rate": 1.8743548149240526e-05, "loss": 0.0, "step": 4742 }, { "epoch": 1.95, "learning_rate": 1.8728801061790297e-05, "loss": 0.0001, "step": 4744 }, { "epoch": 1.96, "learning_rate": 1.8714053974340067e-05, "loss": 0.0001, "step": 4746 }, { "epoch": 1.96, "learning_rate": 1.869930688688984e-05, "loss": 0.0001, "step": 4748 }, { "epoch": 1.96, "learning_rate": 1.868455979943961e-05, "loss": 0.0001, "step": 4750 }, { "epoch": 1.96, "learning_rate": 1.866981271198938e-05, "loss": 0.0, "step": 4752 }, { "epoch": 1.96, "learning_rate": 1.8655065624539152e-05, "loss": 0.0, "step": 4754 }, { "epoch": 1.96, "learning_rate": 1.8640318537088926e-05, "loss": 0.0001, "step": 4756 }, { "epoch": 1.96, "learning_rate": 1.86255714496387e-05, "loss": 0.0, "step": 4758 }, { "epoch": 1.96, "learning_rate": 1.861082436218847e-05, "loss": 0.0001, "step": 4760 }, { "epoch": 1.96, "learning_rate": 1.859607727473824e-05, "loss": 0.0001, "step": 4762 }, { "epoch": 1.96, "learning_rate": 1.8581330187288014e-05, "loss": 0.0, "step": 4764 }, { "epoch": 1.96, "learning_rate": 1.8566583099837784e-05, "loss": 0.0, "step": 4766 }, { "epoch": 1.96, "learning_rate": 1.8551836012387555e-05, "loss": 0.0001, "step": 4768 }, { "epoch": 1.97, "learning_rate": 1.8537088924937325e-05, "loss": 0.0, "step": 4770 }, { "epoch": 1.97, "learning_rate": 1.85223418374871e-05, "loss": 0.0, "step": 4772 }, { "epoch": 1.97, "learning_rate": 1.850759475003687e-05, "loss": 0.0001, "step": 4774 }, { "epoch": 1.97, "learning_rate": 1.849284766258664e-05, "loss": 0.0, "step": 4776 }, { "epoch": 1.97, "learning_rate": 1.8478100575136413e-05, "loss": 0.0001, "step": 4778 }, { "epoch": 1.97, "learning_rate": 1.8463353487686184e-05, "loss": 0.0, "step": 4780 }, { "epoch": 1.97, "learning_rate": 1.8448606400235954e-05, "loss": 0.0002, "step": 4782 }, { "epoch": 1.97, "learning_rate": 1.8433859312785724e-05, "loss": 0.0, "step": 4784 }, { "epoch": 1.97, "learning_rate": 1.8419112225335498e-05, "loss": 0.0001, "step": 4786 }, { "epoch": 1.97, "learning_rate": 1.840436513788527e-05, "loss": 0.0001, "step": 4788 }, { "epoch": 1.97, "learning_rate": 1.838961805043504e-05, "loss": 0.0, "step": 4790 }, { "epoch": 1.97, "learning_rate": 1.8374870962984813e-05, "loss": 0.0, "step": 4792 }, { "epoch": 1.98, "learning_rate": 1.8360123875534583e-05, "loss": 0.0, "step": 4794 }, { "epoch": 1.98, "learning_rate": 1.8345376788084353e-05, "loss": 0.0001, "step": 4796 }, { "epoch": 1.98, "learning_rate": 1.8330629700634124e-05, "loss": 0.0, "step": 4798 }, { "epoch": 1.98, "learning_rate": 1.8315882613183897e-05, "loss": 0.0001, "step": 4800 }, { "epoch": 1.98, "learning_rate": 1.8301135525733668e-05, "loss": 0.0001, "step": 4802 }, { "epoch": 1.98, "learning_rate": 1.8286388438283438e-05, "loss": 0.0001, "step": 4804 }, { "epoch": 1.98, "learning_rate": 1.827164135083321e-05, "loss": 0.0001, "step": 4806 }, { "epoch": 1.98, "learning_rate": 1.8256894263382982e-05, "loss": 0.0001, "step": 4808 }, { "epoch": 1.98, "learning_rate": 1.8242147175932753e-05, "loss": 0.0001, "step": 4810 }, { "epoch": 1.98, "learning_rate": 1.8227400088482526e-05, "loss": 0.0, "step": 4812 }, { "epoch": 1.98, "learning_rate": 1.8212653001032297e-05, "loss": 0.0001, "step": 4814 }, { "epoch": 1.98, "learning_rate": 1.819790591358207e-05, "loss": 0.0, "step": 4816 }, { "epoch": 1.99, "learning_rate": 1.818315882613184e-05, "loss": 0.0001, "step": 4818 }, { "epoch": 1.99, "learning_rate": 1.816841173868161e-05, "loss": 0.0, "step": 4820 }, { "epoch": 1.99, "learning_rate": 1.8153664651231385e-05, "loss": 0.0001, "step": 4822 }, { "epoch": 1.99, "learning_rate": 1.8138917563781155e-05, "loss": 0.0, "step": 4824 }, { "epoch": 1.99, "learning_rate": 1.8124170476330926e-05, "loss": 0.0, "step": 4826 }, { "epoch": 1.99, "learning_rate": 1.8109423388880696e-05, "loss": 0.0001, "step": 4828 }, { "epoch": 1.99, "learning_rate": 1.809467630143047e-05, "loss": 0.0001, "step": 4830 }, { "epoch": 1.99, "learning_rate": 1.807992921398024e-05, "loss": 0.0, "step": 4832 }, { "epoch": 1.99, "learning_rate": 1.806518212653001e-05, "loss": 0.0, "step": 4834 }, { "epoch": 1.99, "learning_rate": 1.8050435039079784e-05, "loss": 0.0, "step": 4836 }, { "epoch": 1.99, "learning_rate": 1.8035687951629555e-05, "loss": 0.0001, "step": 4838 }, { "epoch": 1.99, "learning_rate": 1.8020940864179325e-05, "loss": 0.0001, "step": 4840 }, { "epoch": 2.0, "learning_rate": 1.8006193776729095e-05, "loss": 0.0, "step": 4842 }, { "epoch": 2.0, "learning_rate": 1.799144668927887e-05, "loss": 0.0001, "step": 4844 }, { "epoch": 2.0, "learning_rate": 1.797669960182864e-05, "loss": 0.0001, "step": 4846 }, { "epoch": 2.0, "learning_rate": 1.796195251437841e-05, "loss": 0.0001, "step": 4848 }, { "epoch": 2.0, "learning_rate": 1.7947205426928184e-05, "loss": 0.0, "step": 4850 }, { "epoch": 2.0, "learning_rate": 1.7932458339477954e-05, "loss": 0.0, "step": 4852 }, { "epoch": 2.0, "learning_rate": 1.7917711252027724e-05, "loss": 0.0001, "step": 4854 }, { "epoch": 2.0, "learning_rate": 1.7902964164577495e-05, "loss": 0.0, "step": 4856 }, { "epoch": 2.0, "learning_rate": 1.788821707712727e-05, "loss": 0.0, "step": 4858 }, { "epoch": 2.0, "learning_rate": 1.787346998967704e-05, "loss": 0.0001, "step": 4860 }, { "epoch": 2.0, "learning_rate": 1.785872290222681e-05, "loss": 0.0001, "step": 4862 }, { "epoch": 2.0, "learning_rate": 1.784397581477658e-05, "loss": 0.0, "step": 4864 }, { "epoch": 2.0, "learning_rate": 1.7829228727326353e-05, "loss": 0.0, "step": 4866 }, { "epoch": 2.01, "learning_rate": 1.7814481639876127e-05, "loss": 0.0001, "step": 4868 }, { "epoch": 2.01, "learning_rate": 1.7799734552425897e-05, "loss": 0.0001, "step": 4870 }, { "epoch": 2.01, "learning_rate": 1.7784987464975668e-05, "loss": 0.0001, "step": 4872 }, { "epoch": 2.01, "learning_rate": 1.777024037752544e-05, "loss": 0.0, "step": 4874 }, { "epoch": 2.01, "learning_rate": 1.7755493290075212e-05, "loss": 0.0001, "step": 4876 }, { "epoch": 2.01, "learning_rate": 1.7740746202624982e-05, "loss": 0.0001, "step": 4878 }, { "epoch": 2.01, "learning_rate": 1.7725999115174756e-05, "loss": 0.0, "step": 4880 }, { "epoch": 2.01, "learning_rate": 1.7711252027724526e-05, "loss": 0.0001, "step": 4882 }, { "epoch": 2.01, "learning_rate": 1.7696504940274297e-05, "loss": 0.0, "step": 4884 }, { "epoch": 2.01, "learning_rate": 1.7681757852824067e-05, "loss": 0.0001, "step": 4886 }, { "epoch": 2.01, "learning_rate": 1.766701076537384e-05, "loss": 0.0, "step": 4888 }, { "epoch": 2.01, "learning_rate": 1.765226367792361e-05, "loss": 0.0, "step": 4890 }, { "epoch": 2.02, "learning_rate": 1.763751659047338e-05, "loss": 0.0, "step": 4892 }, { "epoch": 2.02, "learning_rate": 1.7622769503023155e-05, "loss": 0.0, "step": 4894 }, { "epoch": 2.02, "learning_rate": 1.7608022415572926e-05, "loss": 0.0, "step": 4896 }, { "epoch": 2.02, "learning_rate": 1.7593275328122696e-05, "loss": 0.0, "step": 4898 }, { "epoch": 2.02, "learning_rate": 1.7578528240672467e-05, "loss": 0.0, "step": 4900 }, { "epoch": 2.02, "learning_rate": 1.756378115322224e-05, "loss": 0.0001, "step": 4902 }, { "epoch": 2.02, "learning_rate": 1.754903406577201e-05, "loss": 0.0, "step": 4904 }, { "epoch": 2.02, "learning_rate": 1.753428697832178e-05, "loss": 0.0, "step": 4906 }, { "epoch": 2.02, "learning_rate": 1.751953989087155e-05, "loss": 0.0, "step": 4908 }, { "epoch": 2.02, "learning_rate": 1.7504792803421325e-05, "loss": 0.0, "step": 4910 }, { "epoch": 2.02, "learning_rate": 1.7490045715971095e-05, "loss": 0.0, "step": 4912 }, { "epoch": 2.02, "learning_rate": 1.7475298628520866e-05, "loss": 0.0, "step": 4914 }, { "epoch": 2.03, "learning_rate": 1.746055154107064e-05, "loss": 0.0169, "step": 4916 }, { "epoch": 2.03, "learning_rate": 1.744580445362041e-05, "loss": 0.0001, "step": 4918 }, { "epoch": 2.03, "learning_rate": 1.743105736617018e-05, "loss": 0.0014, "step": 4920 }, { "epoch": 2.03, "learning_rate": 1.7416310278719954e-05, "loss": 0.0, "step": 4922 }, { "epoch": 2.03, "learning_rate": 1.7401563191269728e-05, "loss": 0.0, "step": 4924 }, { "epoch": 2.03, "learning_rate": 1.7386816103819498e-05, "loss": 0.0, "step": 4926 }, { "epoch": 2.03, "learning_rate": 1.737206901636927e-05, "loss": 0.0, "step": 4928 }, { "epoch": 2.03, "learning_rate": 1.735732192891904e-05, "loss": 0.0, "step": 4930 }, { "epoch": 2.03, "learning_rate": 1.7342574841468813e-05, "loss": 0.0001, "step": 4932 }, { "epoch": 2.03, "learning_rate": 1.7327827754018583e-05, "loss": 0.0001, "step": 4934 }, { "epoch": 2.03, "learning_rate": 1.7313080666568353e-05, "loss": 0.0089, "step": 4936 }, { "epoch": 2.03, "learning_rate": 1.7298333579118127e-05, "loss": 0.0001, "step": 4938 }, { "epoch": 2.04, "learning_rate": 1.7283586491667898e-05, "loss": 0.0026, "step": 4940 }, { "epoch": 2.04, "learning_rate": 1.7268839404217668e-05, "loss": 0.0004, "step": 4942 }, { "epoch": 2.04, "learning_rate": 1.7254092316767438e-05, "loss": 0.0001, "step": 4944 }, { "epoch": 2.04, "learning_rate": 1.7239345229317212e-05, "loss": 0.0002, "step": 4946 }, { "epoch": 2.04, "learning_rate": 1.7224598141866982e-05, "loss": 0.0007, "step": 4948 }, { "epoch": 2.04, "learning_rate": 1.7209851054416753e-05, "loss": 0.0003, "step": 4950 }, { "epoch": 2.04, "learning_rate": 1.7195103966966523e-05, "loss": 0.0008, "step": 4952 }, { "epoch": 2.04, "learning_rate": 1.7180356879516297e-05, "loss": 0.0001, "step": 4954 }, { "epoch": 2.04, "learning_rate": 1.7165609792066067e-05, "loss": 0.0001, "step": 4956 }, { "epoch": 2.04, "learning_rate": 1.7150862704615838e-05, "loss": 0.0001, "step": 4958 }, { "epoch": 2.04, "learning_rate": 1.713611561716561e-05, "loss": 0.0002, "step": 4960 }, { "epoch": 2.04, "learning_rate": 1.7121368529715382e-05, "loss": 0.0001, "step": 4962 }, { "epoch": 2.05, "learning_rate": 1.7106621442265152e-05, "loss": 0.0002, "step": 4964 }, { "epoch": 2.05, "learning_rate": 1.7091874354814922e-05, "loss": 0.0001, "step": 4966 }, { "epoch": 2.05, "learning_rate": 1.7077127267364696e-05, "loss": 0.0001, "step": 4968 }, { "epoch": 2.05, "learning_rate": 1.7062380179914467e-05, "loss": 0.0001, "step": 4970 }, { "epoch": 2.05, "learning_rate": 1.7047633092464237e-05, "loss": 0.0001, "step": 4972 }, { "epoch": 2.05, "learning_rate": 1.703288600501401e-05, "loss": 0.0001, "step": 4974 }, { "epoch": 2.05, "learning_rate": 1.701813891756378e-05, "loss": 0.0001, "step": 4976 }, { "epoch": 2.05, "learning_rate": 1.7003391830113555e-05, "loss": 0.0001, "step": 4978 }, { "epoch": 2.05, "learning_rate": 1.6988644742663325e-05, "loss": 0.0001, "step": 4980 }, { "epoch": 2.05, "learning_rate": 1.69738976552131e-05, "loss": 0.0002, "step": 4982 }, { "epoch": 2.05, "learning_rate": 1.695915056776287e-05, "loss": 0.0001, "step": 4984 }, { "epoch": 2.05, "learning_rate": 1.694440348031264e-05, "loss": 0.0001, "step": 4986 }, { "epoch": 2.06, "learning_rate": 1.692965639286241e-05, "loss": 0.0001, "step": 4988 }, { "epoch": 2.06, "learning_rate": 1.6914909305412184e-05, "loss": 0.0002, "step": 4990 }, { "epoch": 2.06, "learning_rate": 1.6900162217961954e-05, "loss": 0.0, "step": 4992 }, { "epoch": 2.06, "learning_rate": 1.6885415130511725e-05, "loss": 0.0001, "step": 4994 }, { "epoch": 2.06, "learning_rate": 1.68706680430615e-05, "loss": 0.0001, "step": 4996 }, { "epoch": 2.06, "learning_rate": 1.685592095561127e-05, "loss": 0.0001, "step": 4998 }, { "epoch": 2.06, "learning_rate": 1.684117386816104e-05, "loss": 0.0001, "step": 5000 }, { "epoch": 2.06, "learning_rate": 1.682642678071081e-05, "loss": 0.0001, "step": 5002 }, { "epoch": 2.06, "learning_rate": 1.6811679693260583e-05, "loss": 0.0002, "step": 5004 }, { "epoch": 2.06, "learning_rate": 1.6796932605810354e-05, "loss": 0.0001, "step": 5006 }, { "epoch": 2.06, "learning_rate": 1.6782185518360124e-05, "loss": 0.0, "step": 5008 }, { "epoch": 2.06, "learning_rate": 1.6767438430909894e-05, "loss": 0.0001, "step": 5010 }, { "epoch": 2.07, "learning_rate": 1.6752691343459668e-05, "loss": 0.0, "step": 5012 }, { "epoch": 2.07, "learning_rate": 1.673794425600944e-05, "loss": 0.0001, "step": 5014 }, { "epoch": 2.07, "learning_rate": 1.672319716855921e-05, "loss": 0.0001, "step": 5016 }, { "epoch": 2.07, "learning_rate": 1.6708450081108983e-05, "loss": 0.0001, "step": 5018 }, { "epoch": 2.07, "learning_rate": 1.6693702993658753e-05, "loss": 0.0, "step": 5020 }, { "epoch": 2.07, "learning_rate": 1.6678955906208523e-05, "loss": 0.0001, "step": 5022 }, { "epoch": 2.07, "learning_rate": 1.6664208818758294e-05, "loss": 0.0001, "step": 5024 }, { "epoch": 2.07, "learning_rate": 1.6649461731308067e-05, "loss": 0.0, "step": 5026 }, { "epoch": 2.07, "learning_rate": 1.6634714643857838e-05, "loss": 0.0001, "step": 5028 }, { "epoch": 2.07, "learning_rate": 1.6619967556407608e-05, "loss": 0.0001, "step": 5030 }, { "epoch": 2.07, "learning_rate": 1.6605220468957382e-05, "loss": 0.0001, "step": 5032 }, { "epoch": 2.07, "learning_rate": 1.6590473381507156e-05, "loss": 0.0001, "step": 5034 }, { "epoch": 2.07, "learning_rate": 1.6575726294056926e-05, "loss": 0.0001, "step": 5036 }, { "epoch": 2.08, "learning_rate": 1.6560979206606696e-05, "loss": 0.0, "step": 5038 }, { "epoch": 2.08, "learning_rate": 1.654623211915647e-05, "loss": 0.0, "step": 5040 }, { "epoch": 2.08, "learning_rate": 1.653148503170624e-05, "loss": 0.0001, "step": 5042 }, { "epoch": 2.08, "learning_rate": 1.651673794425601e-05, "loss": 0.0001, "step": 5044 }, { "epoch": 2.08, "learning_rate": 1.650199085680578e-05, "loss": 0.0001, "step": 5046 }, { "epoch": 2.08, "learning_rate": 1.6487243769355555e-05, "loss": 0.0001, "step": 5048 }, { "epoch": 2.08, "learning_rate": 1.6472496681905325e-05, "loss": 0.0, "step": 5050 }, { "epoch": 2.08, "learning_rate": 1.6457749594455096e-05, "loss": 0.0001, "step": 5052 }, { "epoch": 2.08, "learning_rate": 1.6443002507004866e-05, "loss": 0.0, "step": 5054 }, { "epoch": 2.08, "learning_rate": 1.642825541955464e-05, "loss": 0.0002, "step": 5056 }, { "epoch": 2.08, "learning_rate": 1.641350833210441e-05, "loss": 0.0001, "step": 5058 }, { "epoch": 2.08, "learning_rate": 1.639876124465418e-05, "loss": 0.0001, "step": 5060 }, { "epoch": 2.09, "learning_rate": 1.6384014157203954e-05, "loss": 0.0001, "step": 5062 }, { "epoch": 2.09, "learning_rate": 1.6369267069753725e-05, "loss": 0.0001, "step": 5064 }, { "epoch": 2.09, "learning_rate": 1.6354519982303495e-05, "loss": 0.0, "step": 5066 }, { "epoch": 2.09, "learning_rate": 1.6339772894853265e-05, "loss": 0.0, "step": 5068 }, { "epoch": 2.09, "learning_rate": 1.632502580740304e-05, "loss": 0.0001, "step": 5070 }, { "epoch": 2.09, "learning_rate": 1.631027871995281e-05, "loss": 0.0, "step": 5072 }, { "epoch": 2.09, "learning_rate": 1.629553163250258e-05, "loss": 0.0001, "step": 5074 }, { "epoch": 2.09, "learning_rate": 1.6280784545052354e-05, "loss": 0.0001, "step": 5076 }, { "epoch": 2.09, "learning_rate": 1.6266037457602124e-05, "loss": 0.0, "step": 5078 }, { "epoch": 2.09, "learning_rate": 1.6251290370151894e-05, "loss": 0.0, "step": 5080 }, { "epoch": 2.09, "learning_rate": 1.6236543282701665e-05, "loss": 0.0, "step": 5082 }, { "epoch": 2.09, "learning_rate": 1.622179619525144e-05, "loss": 0.0, "step": 5084 }, { "epoch": 2.1, "learning_rate": 1.620704910780121e-05, "loss": 0.0001, "step": 5086 }, { "epoch": 2.1, "learning_rate": 1.619230202035098e-05, "loss": 0.0001, "step": 5088 }, { "epoch": 2.1, "learning_rate": 1.6177554932900753e-05, "loss": 0.0, "step": 5090 }, { "epoch": 2.1, "learning_rate": 1.6162807845450527e-05, "loss": 0.0001, "step": 5092 }, { "epoch": 2.1, "learning_rate": 1.6148060758000297e-05, "loss": 0.0, "step": 5094 }, { "epoch": 2.1, "learning_rate": 1.6133313670550067e-05, "loss": 0.0, "step": 5096 }, { "epoch": 2.1, "learning_rate": 1.6118566583099838e-05, "loss": 0.0, "step": 5098 }, { "epoch": 2.1, "learning_rate": 1.610381949564961e-05, "loss": 0.0, "step": 5100 }, { "epoch": 2.1, "learning_rate": 1.6089072408199382e-05, "loss": 0.0001, "step": 5102 }, { "epoch": 2.1, "learning_rate": 1.6074325320749152e-05, "loss": 0.0001, "step": 5104 }, { "epoch": 2.1, "learning_rate": 1.6059578233298926e-05, "loss": 0.0001, "step": 5106 }, { "epoch": 2.1, "learning_rate": 1.6044831145848696e-05, "loss": 0.0001, "step": 5108 }, { "epoch": 2.11, "learning_rate": 1.6030084058398467e-05, "loss": 0.0001, "step": 5110 }, { "epoch": 2.11, "learning_rate": 1.6015336970948237e-05, "loss": 0.0, "step": 5112 }, { "epoch": 2.11, "learning_rate": 1.600058988349801e-05, "loss": 0.0, "step": 5114 }, { "epoch": 2.11, "learning_rate": 1.598584279604778e-05, "loss": 0.0001, "step": 5116 }, { "epoch": 2.11, "learning_rate": 1.597109570859755e-05, "loss": 0.0, "step": 5118 }, { "epoch": 2.11, "learning_rate": 1.5956348621147325e-05, "loss": 0.0, "step": 5120 }, { "epoch": 2.11, "learning_rate": 1.5941601533697096e-05, "loss": 0.0001, "step": 5122 }, { "epoch": 2.11, "learning_rate": 1.5926854446246866e-05, "loss": 0.0001, "step": 5124 }, { "epoch": 2.11, "learning_rate": 1.5912107358796636e-05, "loss": 0.0001, "step": 5126 }, { "epoch": 2.11, "learning_rate": 1.589736027134641e-05, "loss": 0.0001, "step": 5128 }, { "epoch": 2.11, "learning_rate": 1.588261318389618e-05, "loss": 0.0001, "step": 5130 }, { "epoch": 2.11, "learning_rate": 1.586786609644595e-05, "loss": 0.0, "step": 5132 }, { "epoch": 2.12, "learning_rate": 1.5853119008995725e-05, "loss": 0.0001, "step": 5134 }, { "epoch": 2.12, "learning_rate": 1.5838371921545495e-05, "loss": 0.0, "step": 5136 }, { "epoch": 2.12, "learning_rate": 1.5823624834095265e-05, "loss": 0.0, "step": 5138 }, { "epoch": 2.12, "learning_rate": 1.5808877746645036e-05, "loss": 0.0, "step": 5140 }, { "epoch": 2.12, "learning_rate": 1.579413065919481e-05, "loss": 0.0001, "step": 5142 }, { "epoch": 2.12, "learning_rate": 1.577938357174458e-05, "loss": 0.0, "step": 5144 }, { "epoch": 2.12, "learning_rate": 1.5764636484294354e-05, "loss": 0.0002, "step": 5146 }, { "epoch": 2.12, "learning_rate": 1.5749889396844124e-05, "loss": 0.0001, "step": 5148 }, { "epoch": 2.12, "learning_rate": 1.5735142309393898e-05, "loss": 0.0, "step": 5150 }, { "epoch": 2.12, "learning_rate": 1.5720395221943668e-05, "loss": 0.0001, "step": 5152 }, { "epoch": 2.12, "learning_rate": 1.570564813449344e-05, "loss": 0.0001, "step": 5154 }, { "epoch": 2.12, "learning_rate": 1.569090104704321e-05, "loss": 0.0001, "step": 5156 }, { "epoch": 2.13, "learning_rate": 1.5676153959592983e-05, "loss": 0.0, "step": 5158 }, { "epoch": 2.13, "learning_rate": 1.5661406872142753e-05, "loss": 0.0, "step": 5160 }, { "epoch": 2.13, "learning_rate": 1.5646659784692523e-05, "loss": 0.0, "step": 5162 }, { "epoch": 2.13, "learning_rate": 1.5631912697242297e-05, "loss": 0.0, "step": 5164 }, { "epoch": 2.13, "learning_rate": 1.5617165609792068e-05, "loss": 0.0, "step": 5166 }, { "epoch": 2.13, "learning_rate": 1.5602418522341838e-05, "loss": 0.0001, "step": 5168 }, { "epoch": 2.13, "learning_rate": 1.5587671434891608e-05, "loss": 0.0001, "step": 5170 }, { "epoch": 2.13, "learning_rate": 1.5572924347441382e-05, "loss": 0.0, "step": 5172 }, { "epoch": 2.13, "learning_rate": 1.5558177259991152e-05, "loss": 0.0001, "step": 5174 }, { "epoch": 2.13, "learning_rate": 1.5543430172540923e-05, "loss": 0.0001, "step": 5176 }, { "epoch": 2.13, "learning_rate": 1.5528683085090696e-05, "loss": 0.0, "step": 5178 }, { "epoch": 2.13, "learning_rate": 1.5513935997640467e-05, "loss": 0.0001, "step": 5180 }, { "epoch": 2.14, "learning_rate": 1.5499188910190237e-05, "loss": 0.0, "step": 5182 }, { "epoch": 2.14, "learning_rate": 1.5484441822740008e-05, "loss": 0.0001, "step": 5184 }, { "epoch": 2.14, "learning_rate": 1.546969473528978e-05, "loss": 0.0001, "step": 5186 }, { "epoch": 2.14, "learning_rate": 1.5454947647839552e-05, "loss": 0.0, "step": 5188 }, { "epoch": 2.14, "learning_rate": 1.5440200560389322e-05, "loss": 0.0, "step": 5190 }, { "epoch": 2.14, "learning_rate": 1.5425453472939096e-05, "loss": 0.0, "step": 5192 }, { "epoch": 2.14, "learning_rate": 1.5410706385488866e-05, "loss": 0.0, "step": 5194 }, { "epoch": 2.14, "learning_rate": 1.5395959298038637e-05, "loss": 0.0, "step": 5196 }, { "epoch": 2.14, "learning_rate": 1.5381212210588407e-05, "loss": 0.0, "step": 5198 }, { "epoch": 2.14, "learning_rate": 1.536646512313818e-05, "loss": 0.0, "step": 5200 }, { "epoch": 2.14, "learning_rate": 1.5351718035687954e-05, "loss": 0.0, "step": 5202 }, { "epoch": 2.14, "learning_rate": 1.5336970948237725e-05, "loss": 0.0, "step": 5204 }, { "epoch": 2.15, "learning_rate": 1.5322223860787495e-05, "loss": 0.0, "step": 5206 }, { "epoch": 2.15, "learning_rate": 1.530747677333727e-05, "loss": 0.0, "step": 5208 }, { "epoch": 2.15, "learning_rate": 1.529272968588704e-05, "loss": 0.0, "step": 5210 }, { "epoch": 2.15, "learning_rate": 1.527798259843681e-05, "loss": 0.0, "step": 5212 }, { "epoch": 2.15, "learning_rate": 1.526323551098658e-05, "loss": 0.0, "step": 5214 }, { "epoch": 2.15, "learning_rate": 1.5248488423536352e-05, "loss": 0.0001, "step": 5216 }, { "epoch": 2.15, "learning_rate": 1.5233741336086124e-05, "loss": 0.0001, "step": 5218 }, { "epoch": 2.15, "learning_rate": 1.5218994248635896e-05, "loss": 0.0, "step": 5220 }, { "epoch": 2.15, "learning_rate": 1.5204247161185667e-05, "loss": 0.0, "step": 5222 }, { "epoch": 2.15, "learning_rate": 1.5189500073735439e-05, "loss": 0.0, "step": 5224 }, { "epoch": 2.15, "learning_rate": 1.5174752986285209e-05, "loss": 0.0, "step": 5226 }, { "epoch": 2.15, "learning_rate": 1.5160005898834981e-05, "loss": 0.0, "step": 5228 }, { "epoch": 2.15, "learning_rate": 1.5145258811384751e-05, "loss": 0.0, "step": 5230 }, { "epoch": 2.16, "learning_rate": 1.5130511723934523e-05, "loss": 0.0, "step": 5232 }, { "epoch": 2.16, "learning_rate": 1.5115764636484296e-05, "loss": 0.0001, "step": 5234 }, { "epoch": 2.16, "learning_rate": 1.5101017549034066e-05, "loss": 0.0, "step": 5236 }, { "epoch": 2.16, "learning_rate": 1.5086270461583838e-05, "loss": 0.0, "step": 5238 }, { "epoch": 2.16, "learning_rate": 1.5071523374133608e-05, "loss": 0.0, "step": 5240 }, { "epoch": 2.16, "learning_rate": 1.505677628668338e-05, "loss": 0.0001, "step": 5242 }, { "epoch": 2.16, "learning_rate": 1.504202919923315e-05, "loss": 0.0, "step": 5244 }, { "epoch": 2.16, "learning_rate": 1.5027282111782923e-05, "loss": 0.0, "step": 5246 }, { "epoch": 2.16, "learning_rate": 1.5012535024332693e-05, "loss": 0.0, "step": 5248 }, { "epoch": 2.16, "learning_rate": 1.4997787936882465e-05, "loss": 0.0, "step": 5250 }, { "epoch": 2.16, "learning_rate": 1.4983040849432237e-05, "loss": 0.0, "step": 5252 }, { "epoch": 2.16, "learning_rate": 1.4968293761982008e-05, "loss": 0.0, "step": 5254 }, { "epoch": 2.17, "learning_rate": 1.4953546674531781e-05, "loss": 0.0, "step": 5256 }, { "epoch": 2.17, "learning_rate": 1.4938799587081553e-05, "loss": 0.0001, "step": 5258 }, { "epoch": 2.17, "learning_rate": 1.4924052499631324e-05, "loss": 0.0, "step": 5260 }, { "epoch": 2.17, "learning_rate": 1.4909305412181096e-05, "loss": 0.0, "step": 5262 }, { "epoch": 2.17, "learning_rate": 1.4894558324730868e-05, "loss": 0.0, "step": 5264 }, { "epoch": 2.17, "learning_rate": 1.4879811237280638e-05, "loss": 0.0001, "step": 5266 }, { "epoch": 2.17, "learning_rate": 1.486506414983041e-05, "loss": 0.0, "step": 5268 }, { "epoch": 2.17, "learning_rate": 1.485031706238018e-05, "loss": 0.0, "step": 5270 }, { "epoch": 2.17, "learning_rate": 1.4835569974929953e-05, "loss": 0.0, "step": 5272 }, { "epoch": 2.17, "learning_rate": 1.4820822887479723e-05, "loss": 0.0, "step": 5274 }, { "epoch": 2.17, "learning_rate": 1.4806075800029495e-05, "loss": 0.0, "step": 5276 }, { "epoch": 2.17, "learning_rate": 1.4791328712579267e-05, "loss": 0.0, "step": 5278 }, { "epoch": 2.18, "learning_rate": 1.4776581625129038e-05, "loss": 0.0, "step": 5280 }, { "epoch": 2.18, "learning_rate": 1.476183453767881e-05, "loss": 0.0001, "step": 5282 }, { "epoch": 2.18, "learning_rate": 1.474708745022858e-05, "loss": 0.0, "step": 5284 }, { "epoch": 2.18, "learning_rate": 1.4732340362778352e-05, "loss": 0.0, "step": 5286 }, { "epoch": 2.18, "learning_rate": 1.4717593275328123e-05, "loss": 0.0001, "step": 5288 }, { "epoch": 2.18, "learning_rate": 1.4702846187877895e-05, "loss": 0.0, "step": 5290 }, { "epoch": 2.18, "learning_rate": 1.4688099100427665e-05, "loss": 0.0, "step": 5292 }, { "epoch": 2.18, "learning_rate": 1.4673352012977437e-05, "loss": 0.0, "step": 5294 }, { "epoch": 2.18, "learning_rate": 1.4658604925527209e-05, "loss": 0.0, "step": 5296 }, { "epoch": 2.18, "learning_rate": 1.464385783807698e-05, "loss": 0.0, "step": 5298 }, { "epoch": 2.18, "learning_rate": 1.4629110750626752e-05, "loss": 0.0, "step": 5300 }, { "epoch": 2.18, "learning_rate": 1.4614363663176522e-05, "loss": 0.0, "step": 5302 }, { "epoch": 2.19, "learning_rate": 1.4599616575726294e-05, "loss": 0.0, "step": 5304 }, { "epoch": 2.19, "learning_rate": 1.4584869488276064e-05, "loss": 0.0, "step": 5306 }, { "epoch": 2.19, "learning_rate": 1.4570122400825836e-05, "loss": 0.0001, "step": 5308 }, { "epoch": 2.19, "learning_rate": 1.4555375313375608e-05, "loss": 0.0, "step": 5310 }, { "epoch": 2.19, "learning_rate": 1.4540628225925382e-05, "loss": 0.0, "step": 5312 }, { "epoch": 2.19, "learning_rate": 1.4525881138475153e-05, "loss": 0.0, "step": 5314 }, { "epoch": 2.19, "learning_rate": 1.4511134051024925e-05, "loss": 0.0, "step": 5316 }, { "epoch": 2.19, "learning_rate": 1.4496386963574695e-05, "loss": 0.0, "step": 5318 }, { "epoch": 2.19, "learning_rate": 1.4481639876124467e-05, "loss": 0.0001, "step": 5320 }, { "epoch": 2.19, "learning_rate": 1.4466892788674239e-05, "loss": 0.0, "step": 5322 }, { "epoch": 2.19, "learning_rate": 1.445214570122401e-05, "loss": 0.0, "step": 5324 }, { "epoch": 2.19, "learning_rate": 1.4437398613773782e-05, "loss": 0.0, "step": 5326 }, { "epoch": 2.2, "learning_rate": 1.4422651526323552e-05, "loss": 0.0001, "step": 5328 }, { "epoch": 2.2, "learning_rate": 1.4407904438873324e-05, "loss": 0.0001, "step": 5330 }, { "epoch": 2.2, "learning_rate": 1.4393157351423094e-05, "loss": 0.0, "step": 5332 }, { "epoch": 2.2, "learning_rate": 1.4378410263972866e-05, "loss": 0.0, "step": 5334 }, { "epoch": 2.2, "learning_rate": 1.4363663176522638e-05, "loss": 0.0, "step": 5336 }, { "epoch": 2.2, "learning_rate": 1.4348916089072409e-05, "loss": 0.0, "step": 5338 }, { "epoch": 2.2, "learning_rate": 1.4334169001622181e-05, "loss": 0.0, "step": 5340 }, { "epoch": 2.2, "learning_rate": 1.4319421914171951e-05, "loss": 0.0001, "step": 5342 }, { "epoch": 2.2, "learning_rate": 1.4304674826721723e-05, "loss": 0.0, "step": 5344 }, { "epoch": 2.2, "learning_rate": 1.4289927739271494e-05, "loss": 0.0, "step": 5346 }, { "epoch": 2.2, "learning_rate": 1.4275180651821266e-05, "loss": 0.0, "step": 5348 }, { "epoch": 2.2, "learning_rate": 1.4260433564371036e-05, "loss": 0.0, "step": 5350 }, { "epoch": 2.21, "learning_rate": 1.4245686476920808e-05, "loss": 0.0, "step": 5352 }, { "epoch": 2.21, "learning_rate": 1.423093938947058e-05, "loss": 0.0, "step": 5354 }, { "epoch": 2.21, "learning_rate": 1.421619230202035e-05, "loss": 0.0, "step": 5356 }, { "epoch": 2.21, "learning_rate": 1.4201445214570123e-05, "loss": 0.0003, "step": 5358 }, { "epoch": 2.21, "learning_rate": 1.4186698127119893e-05, "loss": 0.0, "step": 5360 }, { "epoch": 2.21, "learning_rate": 1.4171951039669665e-05, "loss": 0.0001, "step": 5362 }, { "epoch": 2.21, "learning_rate": 1.4157203952219435e-05, "loss": 0.0, "step": 5364 }, { "epoch": 2.21, "learning_rate": 1.4142456864769207e-05, "loss": 0.0, "step": 5366 }, { "epoch": 2.21, "learning_rate": 1.4127709777318981e-05, "loss": 0.0, "step": 5368 }, { "epoch": 2.21, "learning_rate": 1.4112962689868753e-05, "loss": 0.0001, "step": 5370 }, { "epoch": 2.21, "learning_rate": 1.4098215602418524e-05, "loss": 0.0001, "step": 5372 }, { "epoch": 2.21, "learning_rate": 1.4083468514968296e-05, "loss": 0.0001, "step": 5374 }, { "epoch": 2.22, "learning_rate": 1.4068721427518066e-05, "loss": 0.0, "step": 5376 }, { "epoch": 2.22, "learning_rate": 1.4053974340067838e-05, "loss": 0.0, "step": 5378 }, { "epoch": 2.22, "learning_rate": 1.403922725261761e-05, "loss": 0.0, "step": 5380 }, { "epoch": 2.22, "learning_rate": 1.402448016516738e-05, "loss": 0.0, "step": 5382 }, { "epoch": 2.22, "learning_rate": 1.4009733077717153e-05, "loss": 0.0003, "step": 5384 }, { "epoch": 2.22, "learning_rate": 1.3994985990266923e-05, "loss": 0.0, "step": 5386 }, { "epoch": 2.22, "learning_rate": 1.3980238902816695e-05, "loss": 0.0001, "step": 5388 }, { "epoch": 2.22, "learning_rate": 1.3965491815366465e-05, "loss": 0.0, "step": 5390 }, { "epoch": 2.22, "learning_rate": 1.3950744727916237e-05, "loss": 0.0, "step": 5392 }, { "epoch": 2.22, "learning_rate": 1.3935997640466008e-05, "loss": 0.0, "step": 5394 }, { "epoch": 2.22, "learning_rate": 1.392125055301578e-05, "loss": 0.0, "step": 5396 }, { "epoch": 2.22, "learning_rate": 1.3906503465565552e-05, "loss": 0.0001, "step": 5398 }, { "epoch": 2.22, "learning_rate": 1.3891756378115322e-05, "loss": 0.0, "step": 5400 }, { "epoch": 2.23, "learning_rate": 1.3877009290665094e-05, "loss": 0.0001, "step": 5402 }, { "epoch": 2.23, "learning_rate": 1.3862262203214865e-05, "loss": 0.0, "step": 5404 }, { "epoch": 2.23, "learning_rate": 1.3847515115764637e-05, "loss": 0.0, "step": 5406 }, { "epoch": 2.23, "learning_rate": 1.3832768028314407e-05, "loss": 0.0, "step": 5408 }, { "epoch": 2.23, "learning_rate": 1.381802094086418e-05, "loss": 0.0, "step": 5410 }, { "epoch": 2.23, "learning_rate": 1.3803273853413951e-05, "loss": 0.0, "step": 5412 }, { "epoch": 2.23, "learning_rate": 1.3788526765963722e-05, "loss": 0.0, "step": 5414 }, { "epoch": 2.23, "learning_rate": 1.3773779678513494e-05, "loss": 0.0, "step": 5416 }, { "epoch": 2.23, "learning_rate": 1.3759032591063264e-05, "loss": 0.0, "step": 5418 }, { "epoch": 2.23, "learning_rate": 1.3744285503613036e-05, "loss": 0.0, "step": 5420 }, { "epoch": 2.23, "learning_rate": 1.3729538416162807e-05, "loss": 0.0001, "step": 5422 }, { "epoch": 2.23, "learning_rate": 1.3714791328712582e-05, "loss": 0.0, "step": 5424 }, { "epoch": 2.24, "learning_rate": 1.3700044241262352e-05, "loss": 0.0001, "step": 5426 }, { "epoch": 2.24, "learning_rate": 1.3685297153812124e-05, "loss": 0.0, "step": 5428 }, { "epoch": 2.24, "learning_rate": 1.3670550066361895e-05, "loss": 0.0001, "step": 5430 }, { "epoch": 2.24, "learning_rate": 1.3655802978911667e-05, "loss": 0.0, "step": 5432 }, { "epoch": 2.24, "learning_rate": 1.3641055891461437e-05, "loss": 0.0, "step": 5434 }, { "epoch": 2.24, "learning_rate": 1.362630880401121e-05, "loss": 0.0, "step": 5436 }, { "epoch": 2.24, "learning_rate": 1.361156171656098e-05, "loss": 0.0, "step": 5438 }, { "epoch": 2.24, "learning_rate": 1.3596814629110752e-05, "loss": 0.0, "step": 5440 }, { "epoch": 2.24, "learning_rate": 1.3582067541660524e-05, "loss": 0.0, "step": 5442 }, { "epoch": 2.24, "learning_rate": 1.3567320454210294e-05, "loss": 0.0, "step": 5444 }, { "epoch": 2.24, "learning_rate": 1.3552573366760066e-05, "loss": 0.0, "step": 5446 }, { "epoch": 2.24, "learning_rate": 1.3537826279309837e-05, "loss": 0.0, "step": 5448 }, { "epoch": 2.25, "learning_rate": 1.3523079191859609e-05, "loss": 0.0002, "step": 5450 }, { "epoch": 2.25, "learning_rate": 1.3508332104409379e-05, "loss": 0.0, "step": 5452 }, { "epoch": 2.25, "learning_rate": 1.3493585016959151e-05, "loss": 0.0, "step": 5454 }, { "epoch": 2.25, "learning_rate": 1.3478837929508923e-05, "loss": 0.0001, "step": 5456 }, { "epoch": 2.25, "learning_rate": 1.3464090842058693e-05, "loss": 0.0, "step": 5458 }, { "epoch": 2.25, "learning_rate": 1.3449343754608466e-05, "loss": 0.0, "step": 5460 }, { "epoch": 2.25, "learning_rate": 1.3434596667158236e-05, "loss": 0.0, "step": 5462 }, { "epoch": 2.25, "learning_rate": 1.3419849579708008e-05, "loss": 0.0, "step": 5464 }, { "epoch": 2.25, "learning_rate": 1.3405102492257778e-05, "loss": 0.0, "step": 5466 }, { "epoch": 2.25, "learning_rate": 1.339035540480755e-05, "loss": 0.0, "step": 5468 }, { "epoch": 2.25, "learning_rate": 1.337560831735732e-05, "loss": 0.0, "step": 5470 }, { "epoch": 2.25, "learning_rate": 1.3360861229907093e-05, "loss": 0.0, "step": 5472 }, { "epoch": 2.26, "learning_rate": 1.3346114142456865e-05, "loss": 0.0, "step": 5474 }, { "epoch": 2.26, "learning_rate": 1.3331367055006635e-05, "loss": 0.0, "step": 5476 }, { "epoch": 2.26, "learning_rate": 1.3316619967556409e-05, "loss": 0.0, "step": 5478 }, { "epoch": 2.26, "learning_rate": 1.3301872880106181e-05, "loss": 0.0001, "step": 5480 }, { "epoch": 2.26, "learning_rate": 1.3287125792655953e-05, "loss": 0.0, "step": 5482 }, { "epoch": 2.26, "learning_rate": 1.3272378705205723e-05, "loss": 0.0, "step": 5484 }, { "epoch": 2.26, "learning_rate": 1.3257631617755496e-05, "loss": 0.0, "step": 5486 }, { "epoch": 2.26, "learning_rate": 1.3242884530305266e-05, "loss": 0.0001, "step": 5488 }, { "epoch": 2.26, "learning_rate": 1.3228137442855038e-05, "loss": 0.0, "step": 5490 }, { "epoch": 2.26, "learning_rate": 1.3213390355404808e-05, "loss": 0.0, "step": 5492 }, { "epoch": 2.26, "learning_rate": 1.319864326795458e-05, "loss": 0.0, "step": 5494 }, { "epoch": 2.26, "learning_rate": 1.318389618050435e-05, "loss": 0.0003, "step": 5496 }, { "epoch": 2.27, "learning_rate": 1.3169149093054123e-05, "loss": 0.0, "step": 5498 }, { "epoch": 2.27, "learning_rate": 1.3154402005603895e-05, "loss": 0.0, "step": 5500 }, { "epoch": 2.27, "learning_rate": 1.3139654918153665e-05, "loss": 0.0, "step": 5502 }, { "epoch": 2.27, "learning_rate": 1.3124907830703437e-05, "loss": 0.0, "step": 5504 }, { "epoch": 2.27, "learning_rate": 1.3110160743253208e-05, "loss": 0.0, "step": 5506 }, { "epoch": 2.27, "learning_rate": 1.309541365580298e-05, "loss": 0.0, "step": 5508 }, { "epoch": 2.27, "learning_rate": 1.308066656835275e-05, "loss": 0.0, "step": 5510 }, { "epoch": 2.27, "learning_rate": 1.3065919480902522e-05, "loss": 0.0, "step": 5512 }, { "epoch": 2.27, "learning_rate": 1.3051172393452294e-05, "loss": 0.0001, "step": 5514 }, { "epoch": 2.27, "learning_rate": 1.3036425306002065e-05, "loss": 0.0, "step": 5516 }, { "epoch": 2.27, "learning_rate": 1.3021678218551837e-05, "loss": 0.0, "step": 5518 }, { "epoch": 2.27, "learning_rate": 1.3006931131101607e-05, "loss": 0.0001, "step": 5520 }, { "epoch": 2.28, "learning_rate": 1.2992184043651379e-05, "loss": 0.0, "step": 5522 }, { "epoch": 2.28, "learning_rate": 1.297743695620115e-05, "loss": 0.0, "step": 5524 }, { "epoch": 2.28, "learning_rate": 1.2962689868750921e-05, "loss": 0.0, "step": 5526 }, { "epoch": 2.28, "learning_rate": 1.2947942781300692e-05, "loss": 0.0, "step": 5528 }, { "epoch": 2.28, "learning_rate": 1.2933195693850464e-05, "loss": 0.0, "step": 5530 }, { "epoch": 2.28, "learning_rate": 1.2918448606400236e-05, "loss": 0.0, "step": 5532 }, { "epoch": 2.28, "learning_rate": 1.290370151895001e-05, "loss": 0.0, "step": 5534 }, { "epoch": 2.28, "learning_rate": 1.288895443149978e-05, "loss": 0.0, "step": 5536 }, { "epoch": 2.28, "learning_rate": 1.2874207344049552e-05, "loss": 0.0002, "step": 5538 }, { "epoch": 2.28, "learning_rate": 1.2859460256599323e-05, "loss": 0.0, "step": 5540 }, { "epoch": 2.28, "learning_rate": 1.2844713169149095e-05, "loss": 0.0001, "step": 5542 }, { "epoch": 2.28, "learning_rate": 1.2829966081698867e-05, "loss": 0.0, "step": 5544 }, { "epoch": 2.29, "learning_rate": 1.2815218994248637e-05, "loss": 0.0, "step": 5546 }, { "epoch": 2.29, "learning_rate": 1.2800471906798409e-05, "loss": 0.0, "step": 5548 }, { "epoch": 2.29, "learning_rate": 1.278572481934818e-05, "loss": 0.0, "step": 5550 }, { "epoch": 2.29, "learning_rate": 1.2770977731897951e-05, "loss": 0.0, "step": 5552 }, { "epoch": 2.29, "learning_rate": 1.2756230644447722e-05, "loss": 0.0, "step": 5554 }, { "epoch": 2.29, "learning_rate": 1.2741483556997494e-05, "loss": 0.0001, "step": 5556 }, { "epoch": 2.29, "learning_rate": 1.2726736469547266e-05, "loss": 0.0, "step": 5558 }, { "epoch": 2.29, "learning_rate": 1.2711989382097036e-05, "loss": 0.0, "step": 5560 }, { "epoch": 2.29, "learning_rate": 1.2697242294646808e-05, "loss": 0.0, "step": 5562 }, { "epoch": 2.29, "learning_rate": 1.2682495207196579e-05, "loss": 0.0, "step": 5564 }, { "epoch": 2.29, "learning_rate": 1.266774811974635e-05, "loss": 0.0, "step": 5566 }, { "epoch": 2.29, "learning_rate": 1.2653001032296121e-05, "loss": 0.0, "step": 5568 }, { "epoch": 2.3, "learning_rate": 1.2638253944845893e-05, "loss": 0.0, "step": 5570 }, { "epoch": 2.3, "learning_rate": 1.2623506857395664e-05, "loss": 0.0, "step": 5572 }, { "epoch": 2.3, "learning_rate": 1.2608759769945436e-05, "loss": 0.0, "step": 5574 }, { "epoch": 2.3, "learning_rate": 1.2594012682495208e-05, "loss": 0.0, "step": 5576 }, { "epoch": 2.3, "learning_rate": 1.2579265595044978e-05, "loss": 0.0, "step": 5578 }, { "epoch": 2.3, "learning_rate": 1.256451850759475e-05, "loss": 0.0, "step": 5580 }, { "epoch": 2.3, "learning_rate": 1.254977142014452e-05, "loss": 0.0, "step": 5582 }, { "epoch": 2.3, "learning_rate": 1.2535024332694293e-05, "loss": 0.0, "step": 5584 }, { "epoch": 2.3, "learning_rate": 1.2520277245244063e-05, "loss": 0.0, "step": 5586 }, { "epoch": 2.3, "learning_rate": 1.2505530157793835e-05, "loss": 0.0, "step": 5588 }, { "epoch": 2.3, "learning_rate": 1.2490783070343607e-05, "loss": 0.0, "step": 5590 }, { "epoch": 2.3, "learning_rate": 1.2476035982893379e-05, "loss": 0.0, "step": 5592 }, { "epoch": 2.3, "learning_rate": 1.246128889544315e-05, "loss": 0.0001, "step": 5594 }, { "epoch": 2.31, "learning_rate": 1.2446541807992922e-05, "loss": 0.0, "step": 5596 }, { "epoch": 2.31, "learning_rate": 1.2431794720542694e-05, "loss": 0.0, "step": 5598 }, { "epoch": 2.31, "learning_rate": 1.2417047633092464e-05, "loss": 0.0002, "step": 5600 }, { "epoch": 2.31, "learning_rate": 1.2402300545642236e-05, "loss": 0.0, "step": 5602 }, { "epoch": 2.31, "learning_rate": 1.2387553458192008e-05, "loss": 0.0, "step": 5604 }, { "epoch": 2.31, "learning_rate": 1.237280637074178e-05, "loss": 0.0, "step": 5606 }, { "epoch": 2.31, "learning_rate": 1.235805928329155e-05, "loss": 0.0, "step": 5608 }, { "epoch": 2.31, "learning_rate": 1.2343312195841323e-05, "loss": 0.0, "step": 5610 }, { "epoch": 2.31, "learning_rate": 1.2328565108391093e-05, "loss": 0.0, "step": 5612 }, { "epoch": 2.31, "learning_rate": 1.2313818020940865e-05, "loss": 0.0, "step": 5614 }, { "epoch": 2.31, "learning_rate": 1.2299070933490635e-05, "loss": 0.0, "step": 5616 }, { "epoch": 2.31, "learning_rate": 1.2284323846040407e-05, "loss": 0.0, "step": 5618 }, { "epoch": 2.32, "learning_rate": 1.226957675859018e-05, "loss": 0.0, "step": 5620 }, { "epoch": 2.32, "learning_rate": 1.225482967113995e-05, "loss": 0.0, "step": 5622 }, { "epoch": 2.32, "learning_rate": 1.2240082583689722e-05, "loss": 0.0032, "step": 5624 }, { "epoch": 2.32, "learning_rate": 1.2225335496239492e-05, "loss": 0.0, "step": 5626 }, { "epoch": 2.32, "learning_rate": 1.2210588408789264e-05, "loss": 0.0, "step": 5628 }, { "epoch": 2.32, "learning_rate": 1.2195841321339035e-05, "loss": 0.0002, "step": 5630 }, { "epoch": 2.32, "learning_rate": 1.2181094233888808e-05, "loss": 0.0002, "step": 5632 }, { "epoch": 2.32, "learning_rate": 1.2166347146438579e-05, "loss": 0.0, "step": 5634 }, { "epoch": 2.32, "learning_rate": 1.2151600058988351e-05, "loss": 0.0001, "step": 5636 }, { "epoch": 2.32, "learning_rate": 1.2136852971538121e-05, "loss": 0.0002, "step": 5638 }, { "epoch": 2.32, "learning_rate": 1.2122105884087893e-05, "loss": 0.0001, "step": 5640 }, { "epoch": 2.32, "learning_rate": 1.2107358796637665e-05, "loss": 0.0, "step": 5642 }, { "epoch": 2.33, "learning_rate": 1.2092611709187436e-05, "loss": 0.0, "step": 5644 }, { "epoch": 2.33, "learning_rate": 1.2077864621737208e-05, "loss": 0.0001, "step": 5646 }, { "epoch": 2.33, "learning_rate": 1.2063117534286978e-05, "loss": 0.0001, "step": 5648 }, { "epoch": 2.33, "learning_rate": 1.204837044683675e-05, "loss": 0.0001, "step": 5650 }, { "epoch": 2.33, "learning_rate": 1.203362335938652e-05, "loss": 0.0, "step": 5652 }, { "epoch": 2.33, "learning_rate": 1.2018876271936293e-05, "loss": 0.0, "step": 5654 }, { "epoch": 2.33, "learning_rate": 1.2004129184486065e-05, "loss": 0.0, "step": 5656 }, { "epoch": 2.33, "learning_rate": 1.1989382097035835e-05, "loss": 0.0, "step": 5658 }, { "epoch": 2.33, "learning_rate": 1.1974635009585609e-05, "loss": 0.0, "step": 5660 }, { "epoch": 2.33, "learning_rate": 1.195988792213538e-05, "loss": 0.0, "step": 5662 }, { "epoch": 2.33, "learning_rate": 1.1945140834685151e-05, "loss": 0.0, "step": 5664 }, { "epoch": 2.33, "learning_rate": 1.1930393747234922e-05, "loss": 0.0, "step": 5666 }, { "epoch": 2.34, "learning_rate": 1.1915646659784694e-05, "loss": 0.0, "step": 5668 }, { "epoch": 2.34, "learning_rate": 1.1900899572334464e-05, "loss": 0.0, "step": 5670 }, { "epoch": 2.34, "learning_rate": 1.1886152484884236e-05, "loss": 0.0, "step": 5672 }, { "epoch": 2.34, "learning_rate": 1.1871405397434007e-05, "loss": 0.0, "step": 5674 }, { "epoch": 2.34, "learning_rate": 1.1856658309983779e-05, "loss": 0.0, "step": 5676 }, { "epoch": 2.34, "learning_rate": 1.184191122253355e-05, "loss": 0.0, "step": 5678 }, { "epoch": 2.34, "learning_rate": 1.1827164135083321e-05, "loss": 0.0001, "step": 5680 }, { "epoch": 2.34, "learning_rate": 1.1812417047633093e-05, "loss": 0.0, "step": 5682 }, { "epoch": 2.34, "learning_rate": 1.1797669960182863e-05, "loss": 0.0, "step": 5684 }, { "epoch": 2.34, "learning_rate": 1.1782922872732635e-05, "loss": 0.0, "step": 5686 }, { "epoch": 2.34, "learning_rate": 1.1768175785282408e-05, "loss": 0.0, "step": 5688 }, { "epoch": 2.34, "learning_rate": 1.175342869783218e-05, "loss": 0.0, "step": 5690 }, { "epoch": 2.35, "learning_rate": 1.173868161038195e-05, "loss": 0.0, "step": 5692 }, { "epoch": 2.35, "learning_rate": 1.1723934522931722e-05, "loss": 0.0, "step": 5694 }, { "epoch": 2.35, "learning_rate": 1.1709187435481492e-05, "loss": 0.0, "step": 5696 }, { "epoch": 2.35, "learning_rate": 1.1694440348031264e-05, "loss": 0.0, "step": 5698 }, { "epoch": 2.35, "learning_rate": 1.1679693260581037e-05, "loss": 0.0, "step": 5700 }, { "epoch": 2.35, "learning_rate": 1.1664946173130807e-05, "loss": 0.0, "step": 5702 }, { "epoch": 2.35, "learning_rate": 1.1650199085680579e-05, "loss": 0.0, "step": 5704 }, { "epoch": 2.35, "learning_rate": 1.163545199823035e-05, "loss": 0.0, "step": 5706 }, { "epoch": 2.35, "learning_rate": 1.1620704910780121e-05, "loss": 0.0, "step": 5708 }, { "epoch": 2.35, "learning_rate": 1.1605957823329892e-05, "loss": 0.0, "step": 5710 }, { "epoch": 2.35, "learning_rate": 1.1591210735879664e-05, "loss": 0.0, "step": 5712 }, { "epoch": 2.35, "learning_rate": 1.1576463648429436e-05, "loss": 0.0, "step": 5714 }, { "epoch": 2.36, "learning_rate": 1.1561716560979208e-05, "loss": 0.0, "step": 5716 }, { "epoch": 2.36, "learning_rate": 1.1546969473528978e-05, "loss": 0.0, "step": 5718 }, { "epoch": 2.36, "learning_rate": 1.153222238607875e-05, "loss": 0.0, "step": 5720 }, { "epoch": 2.36, "learning_rate": 1.1517475298628522e-05, "loss": 0.0, "step": 5722 }, { "epoch": 2.36, "learning_rate": 1.1502728211178293e-05, "loss": 0.0, "step": 5724 }, { "epoch": 2.36, "learning_rate": 1.1487981123728065e-05, "loss": 0.0001, "step": 5726 }, { "epoch": 2.36, "learning_rate": 1.1473234036277835e-05, "loss": 0.0, "step": 5728 }, { "epoch": 2.36, "learning_rate": 1.1458486948827607e-05, "loss": 0.0, "step": 5730 }, { "epoch": 2.36, "learning_rate": 1.1443739861377378e-05, "loss": 0.0, "step": 5732 }, { "epoch": 2.36, "learning_rate": 1.142899277392715e-05, "loss": 0.0, "step": 5734 }, { "epoch": 2.36, "learning_rate": 1.1414245686476922e-05, "loss": 0.0, "step": 5736 }, { "epoch": 2.36, "learning_rate": 1.1399498599026692e-05, "loss": 0.0, "step": 5738 }, { "epoch": 2.37, "learning_rate": 1.1384751511576464e-05, "loss": 0.0, "step": 5740 }, { "epoch": 2.37, "learning_rate": 1.1370004424126236e-05, "loss": 0.0, "step": 5742 }, { "epoch": 2.37, "learning_rate": 1.1355257336676008e-05, "loss": 0.0, "step": 5744 }, { "epoch": 2.37, "learning_rate": 1.1340510249225779e-05, "loss": 0.0, "step": 5746 }, { "epoch": 2.37, "learning_rate": 1.132576316177555e-05, "loss": 0.0, "step": 5748 }, { "epoch": 2.37, "learning_rate": 1.1311016074325321e-05, "loss": 0.0, "step": 5750 }, { "epoch": 2.37, "learning_rate": 1.1296268986875093e-05, "loss": 0.0001, "step": 5752 }, { "epoch": 2.37, "learning_rate": 1.1281521899424864e-05, "loss": 0.0001, "step": 5754 }, { "epoch": 2.37, "learning_rate": 1.1266774811974636e-05, "loss": 0.0, "step": 5756 }, { "epoch": 2.37, "learning_rate": 1.1252027724524408e-05, "loss": 0.0, "step": 5758 }, { "epoch": 2.37, "learning_rate": 1.1237280637074178e-05, "loss": 0.0, "step": 5760 }, { "epoch": 2.37, "learning_rate": 1.122253354962395e-05, "loss": 0.0, "step": 5762 }, { "epoch": 2.37, "learning_rate": 1.120778646217372e-05, "loss": 0.0, "step": 5764 }, { "epoch": 2.38, "learning_rate": 1.1193039374723492e-05, "loss": 0.0, "step": 5766 }, { "epoch": 2.38, "learning_rate": 1.1178292287273263e-05, "loss": 0.0, "step": 5768 }, { "epoch": 2.38, "learning_rate": 1.1163545199823037e-05, "loss": 0.0, "step": 5770 }, { "epoch": 2.38, "learning_rate": 1.1148798112372807e-05, "loss": 0.0, "step": 5772 }, { "epoch": 2.38, "learning_rate": 1.1134051024922579e-05, "loss": 0.0, "step": 5774 }, { "epoch": 2.38, "learning_rate": 1.111930393747235e-05, "loss": 0.0, "step": 5776 }, { "epoch": 2.38, "learning_rate": 1.1104556850022121e-05, "loss": 0.0, "step": 5778 }, { "epoch": 2.38, "learning_rate": 1.1089809762571894e-05, "loss": 0.0001, "step": 5780 }, { "epoch": 2.38, "learning_rate": 1.1075062675121664e-05, "loss": 0.0, "step": 5782 }, { "epoch": 2.38, "learning_rate": 1.1060315587671436e-05, "loss": 0.0001, "step": 5784 }, { "epoch": 2.38, "learning_rate": 1.1045568500221206e-05, "loss": 0.0, "step": 5786 }, { "epoch": 2.38, "learning_rate": 1.1030821412770978e-05, "loss": 0.0001, "step": 5788 }, { "epoch": 2.39, "learning_rate": 1.1016074325320749e-05, "loss": 0.0, "step": 5790 }, { "epoch": 2.39, "learning_rate": 1.100132723787052e-05, "loss": 0.0, "step": 5792 }, { "epoch": 2.39, "learning_rate": 1.0986580150420291e-05, "loss": 0.0, "step": 5794 }, { "epoch": 2.39, "learning_rate": 1.0971833062970063e-05, "loss": 0.0001, "step": 5796 }, { "epoch": 2.39, "learning_rate": 1.0957085975519835e-05, "loss": 0.0, "step": 5798 }, { "epoch": 2.39, "learning_rate": 1.0942338888069607e-05, "loss": 0.0, "step": 5800 }, { "epoch": 2.39, "learning_rate": 1.092759180061938e-05, "loss": 0.0, "step": 5802 }, { "epoch": 2.39, "learning_rate": 1.091284471316915e-05, "loss": 0.0, "step": 5804 }, { "epoch": 2.39, "learning_rate": 1.0898097625718922e-05, "loss": 0.0, "step": 5806 }, { "epoch": 2.39, "learning_rate": 1.0883350538268692e-05, "loss": 0.0, "step": 5808 }, { "epoch": 2.39, "learning_rate": 1.0868603450818464e-05, "loss": 0.0, "step": 5810 }, { "epoch": 2.39, "learning_rate": 1.0853856363368235e-05, "loss": 0.0, "step": 5812 }, { "epoch": 2.4, "learning_rate": 1.0839109275918007e-05, "loss": 0.0, "step": 5814 }, { "epoch": 2.4, "learning_rate": 1.0824362188467777e-05, "loss": 0.0, "step": 5816 }, { "epoch": 2.4, "learning_rate": 1.0809615101017549e-05, "loss": 0.0, "step": 5818 }, { "epoch": 2.4, "learning_rate": 1.0794868013567321e-05, "loss": 0.0, "step": 5820 }, { "epoch": 2.4, "learning_rate": 1.0780120926117092e-05, "loss": 0.0, "step": 5822 }, { "epoch": 2.4, "learning_rate": 1.0765373838666864e-05, "loss": 0.0, "step": 5824 }, { "epoch": 2.4, "learning_rate": 1.0750626751216636e-05, "loss": 0.0, "step": 5826 }, { "epoch": 2.4, "learning_rate": 1.0735879663766408e-05, "loss": 0.0, "step": 5828 }, { "epoch": 2.4, "learning_rate": 1.0721132576316178e-05, "loss": 0.0, "step": 5830 }, { "epoch": 2.4, "learning_rate": 1.070638548886595e-05, "loss": 0.0, "step": 5832 }, { "epoch": 2.4, "learning_rate": 1.069163840141572e-05, "loss": 0.0, "step": 5834 }, { "epoch": 2.4, "learning_rate": 1.0676891313965493e-05, "loss": 0.0, "step": 5836 }, { "epoch": 2.41, "learning_rate": 1.0662144226515265e-05, "loss": 0.0001, "step": 5838 }, { "epoch": 2.41, "learning_rate": 1.0647397139065035e-05, "loss": 0.0, "step": 5840 }, { "epoch": 2.41, "learning_rate": 1.0632650051614807e-05, "loss": 0.0, "step": 5842 }, { "epoch": 2.41, "learning_rate": 1.0617902964164577e-05, "loss": 0.0, "step": 5844 }, { "epoch": 2.41, "learning_rate": 1.060315587671435e-05, "loss": 0.0, "step": 5846 }, { "epoch": 2.41, "learning_rate": 1.058840878926412e-05, "loss": 0.0, "step": 5848 }, { "epoch": 2.41, "learning_rate": 1.0573661701813892e-05, "loss": 0.0, "step": 5850 }, { "epoch": 2.41, "learning_rate": 1.0558914614363662e-05, "loss": 0.0, "step": 5852 }, { "epoch": 2.41, "learning_rate": 1.0544167526913436e-05, "loss": 0.0, "step": 5854 }, { "epoch": 2.41, "learning_rate": 1.0529420439463206e-05, "loss": 0.0, "step": 5856 }, { "epoch": 2.41, "learning_rate": 1.0514673352012978e-05, "loss": 0.0, "step": 5858 }, { "epoch": 2.41, "learning_rate": 1.049992626456275e-05, "loss": 0.0001, "step": 5860 }, { "epoch": 2.42, "learning_rate": 1.0485179177112521e-05, "loss": 0.0, "step": 5862 }, { "epoch": 2.42, "learning_rate": 1.0470432089662293e-05, "loss": 0.0, "step": 5864 }, { "epoch": 2.42, "learning_rate": 1.0455685002212063e-05, "loss": 0.0, "step": 5866 }, { "epoch": 2.42, "learning_rate": 1.0440937914761835e-05, "loss": 0.0, "step": 5868 }, { "epoch": 2.42, "learning_rate": 1.0426190827311606e-05, "loss": 0.0, "step": 5870 }, { "epoch": 2.42, "learning_rate": 1.0411443739861378e-05, "loss": 0.0, "step": 5872 }, { "epoch": 2.42, "learning_rate": 1.0396696652411148e-05, "loss": 0.0001, "step": 5874 }, { "epoch": 2.42, "learning_rate": 1.038194956496092e-05, "loss": 0.0, "step": 5876 }, { "epoch": 2.42, "learning_rate": 1.0367202477510692e-05, "loss": 0.0, "step": 5878 }, { "epoch": 2.42, "learning_rate": 1.0352455390060463e-05, "loss": 0.0, "step": 5880 }, { "epoch": 2.42, "learning_rate": 1.0337708302610236e-05, "loss": 0.0, "step": 5882 }, { "epoch": 2.42, "learning_rate": 1.0322961215160007e-05, "loss": 0.0, "step": 5884 }, { "epoch": 2.43, "learning_rate": 1.0308214127709779e-05, "loss": 0.0, "step": 5886 }, { "epoch": 2.43, "learning_rate": 1.029346704025955e-05, "loss": 0.0, "step": 5888 }, { "epoch": 2.43, "learning_rate": 1.0278719952809321e-05, "loss": 0.0002, "step": 5890 }, { "epoch": 2.43, "learning_rate": 1.0263972865359092e-05, "loss": 0.0, "step": 5892 }, { "epoch": 2.43, "learning_rate": 1.0249225777908864e-05, "loss": 0.0, "step": 5894 }, { "epoch": 2.43, "learning_rate": 1.0234478690458634e-05, "loss": 0.0, "step": 5896 }, { "epoch": 2.43, "learning_rate": 1.0219731603008406e-05, "loss": 0.0001, "step": 5898 }, { "epoch": 2.43, "learning_rate": 1.0204984515558178e-05, "loss": 0.0, "step": 5900 }, { "epoch": 2.43, "learning_rate": 1.0190237428107949e-05, "loss": 0.0, "step": 5902 }, { "epoch": 2.43, "learning_rate": 1.017549034065772e-05, "loss": 0.0, "step": 5904 }, { "epoch": 2.43, "learning_rate": 1.0160743253207491e-05, "loss": 0.0, "step": 5906 }, { "epoch": 2.43, "learning_rate": 1.0145996165757263e-05, "loss": 0.0, "step": 5908 }, { "epoch": 2.44, "learning_rate": 1.0131249078307035e-05, "loss": 0.0, "step": 5910 }, { "epoch": 2.44, "learning_rate": 1.0116501990856807e-05, "loss": 0.0001, "step": 5912 }, { "epoch": 2.44, "learning_rate": 1.0101754903406577e-05, "loss": 0.0, "step": 5914 }, { "epoch": 2.44, "learning_rate": 1.008700781595635e-05, "loss": 0.0, "step": 5916 }, { "epoch": 2.44, "learning_rate": 1.007226072850612e-05, "loss": 0.0, "step": 5918 }, { "epoch": 2.44, "learning_rate": 1.0057513641055892e-05, "loss": 0.0, "step": 5920 }, { "epoch": 2.44, "learning_rate": 1.0042766553605664e-05, "loss": 0.0001, "step": 5922 }, { "epoch": 2.44, "learning_rate": 1.0028019466155434e-05, "loss": 0.0, "step": 5924 }, { "epoch": 2.44, "learning_rate": 1.0013272378705206e-05, "loss": 0.0, "step": 5926 }, { "epoch": 2.44, "learning_rate": 9.998525291254977e-06, "loss": 0.0, "step": 5928 }, { "epoch": 2.44, "learning_rate": 9.983778203804749e-06, "loss": 0.0, "step": 5930 }, { "epoch": 2.44, "learning_rate": 9.96903111635452e-06, "loss": 0.0, "step": 5932 }, { "epoch": 2.44, "learning_rate": 9.954284028904291e-06, "loss": 0.0, "step": 5934 }, { "epoch": 2.45, "learning_rate": 9.939536941454063e-06, "loss": 0.0, "step": 5936 }, { "epoch": 2.45, "learning_rate": 9.924789854003835e-06, "loss": 0.0, "step": 5938 }, { "epoch": 2.45, "learning_rate": 9.910042766553606e-06, "loss": 0.0, "step": 5940 }, { "epoch": 2.45, "learning_rate": 9.895295679103378e-06, "loss": 0.0, "step": 5942 }, { "epoch": 2.45, "learning_rate": 9.88054859165315e-06, "loss": 0.0, "step": 5944 }, { "epoch": 2.45, "learning_rate": 9.86580150420292e-06, "loss": 0.0, "step": 5946 }, { "epoch": 2.45, "learning_rate": 9.851054416752692e-06, "loss": 0.0, "step": 5948 }, { "epoch": 2.45, "learning_rate": 9.836307329302463e-06, "loss": 0.0, "step": 5950 }, { "epoch": 2.45, "learning_rate": 9.821560241852235e-06, "loss": 0.0, "step": 5952 }, { "epoch": 2.45, "learning_rate": 9.806813154402005e-06, "loss": 0.0, "step": 5954 }, { "epoch": 2.45, "learning_rate": 9.792066066951777e-06, "loss": 0.0, "step": 5956 }, { "epoch": 2.45, "learning_rate": 9.77731897950155e-06, "loss": 0.0001, "step": 5958 }, { "epoch": 2.46, "learning_rate": 9.76257189205132e-06, "loss": 0.0, "step": 5960 }, { "epoch": 2.46, "learning_rate": 9.747824804601092e-06, "loss": 0.0001, "step": 5962 }, { "epoch": 2.46, "learning_rate": 9.733077717150864e-06, "loss": 0.0, "step": 5964 }, { "epoch": 2.46, "learning_rate": 9.718330629700636e-06, "loss": 0.0, "step": 5966 }, { "epoch": 2.46, "learning_rate": 9.703583542250406e-06, "loss": 0.0, "step": 5968 }, { "epoch": 2.46, "learning_rate": 9.688836454800178e-06, "loss": 0.0, "step": 5970 }, { "epoch": 2.46, "learning_rate": 9.674089367349949e-06, "loss": 0.0, "step": 5972 }, { "epoch": 2.46, "learning_rate": 9.65934227989972e-06, "loss": 0.0, "step": 5974 }, { "epoch": 2.46, "learning_rate": 9.644595192449491e-06, "loss": 0.0, "step": 5976 }, { "epoch": 2.46, "learning_rate": 9.629848104999263e-06, "loss": 0.0, "step": 5978 }, { "epoch": 2.46, "learning_rate": 9.615101017549035e-06, "loss": 0.0, "step": 5980 }, { "epoch": 2.46, "learning_rate": 9.600353930098806e-06, "loss": 0.0, "step": 5982 }, { "epoch": 2.47, "learning_rate": 9.585606842648578e-06, "loss": 0.0, "step": 5984 }, { "epoch": 2.47, "learning_rate": 9.570859755198348e-06, "loss": 0.0, "step": 5986 }, { "epoch": 2.47, "learning_rate": 9.55611266774812e-06, "loss": 0.0, "step": 5988 }, { "epoch": 2.47, "learning_rate": 9.54136558029789e-06, "loss": 0.0, "step": 5990 }, { "epoch": 2.47, "learning_rate": 9.526618492847664e-06, "loss": 0.0, "step": 5992 }, { "epoch": 2.47, "learning_rate": 9.511871405397434e-06, "loss": 0.0, "step": 5994 }, { "epoch": 2.47, "learning_rate": 9.497124317947207e-06, "loss": 0.0001, "step": 5996 }, { "epoch": 2.47, "learning_rate": 9.482377230496977e-06, "loss": 0.0, "step": 5998 }, { "epoch": 2.47, "learning_rate": 9.467630143046749e-06, "loss": 0.0, "step": 6000 }, { "epoch": 2.47, "learning_rate": 9.452883055596521e-06, "loss": 0.0, "step": 6002 }, { "epoch": 2.47, "learning_rate": 9.438135968146291e-06, "loss": 0.0, "step": 6004 }, { "epoch": 2.47, "learning_rate": 9.423388880696063e-06, "loss": 0.0, "step": 6006 }, { "epoch": 2.48, "learning_rate": 9.408641793245834e-06, "loss": 0.0, "step": 6008 }, { "epoch": 2.48, "learning_rate": 9.393894705795606e-06, "loss": 0.0, "step": 6010 }, { "epoch": 2.48, "learning_rate": 9.379147618345376e-06, "loss": 0.0, "step": 6012 }, { "epoch": 2.48, "learning_rate": 9.364400530895148e-06, "loss": 0.0, "step": 6014 }, { "epoch": 2.48, "learning_rate": 9.34965344344492e-06, "loss": 0.0, "step": 6016 }, { "epoch": 2.48, "learning_rate": 9.33490635599469e-06, "loss": 0.0, "step": 6018 }, { "epoch": 2.48, "learning_rate": 9.320159268544463e-06, "loss": 0.0001, "step": 6020 }, { "epoch": 2.48, "learning_rate": 9.305412181094235e-06, "loss": 0.0, "step": 6022 }, { "epoch": 2.48, "learning_rate": 9.290665093644007e-06, "loss": 0.0, "step": 6024 }, { "epoch": 2.48, "learning_rate": 9.275918006193777e-06, "loss": 0.0, "step": 6026 }, { "epoch": 2.48, "learning_rate": 9.26117091874355e-06, "loss": 0.0, "step": 6028 }, { "epoch": 2.48, "learning_rate": 9.24642383129332e-06, "loss": 0.0, "step": 6030 }, { "epoch": 2.49, "learning_rate": 9.231676743843092e-06, "loss": 0.0, "step": 6032 }, { "epoch": 2.49, "learning_rate": 9.216929656392862e-06, "loss": 0.0002, "step": 6034 }, { "epoch": 2.49, "learning_rate": 9.202182568942634e-06, "loss": 0.0, "step": 6036 }, { "epoch": 2.49, "learning_rate": 9.187435481492406e-06, "loss": 0.0, "step": 6038 }, { "epoch": 2.49, "learning_rate": 9.172688394042177e-06, "loss": 0.0, "step": 6040 }, { "epoch": 2.49, "learning_rate": 9.157941306591949e-06, "loss": 0.0, "step": 6042 }, { "epoch": 2.49, "learning_rate": 9.143194219141719e-06, "loss": 0.0, "step": 6044 }, { "epoch": 2.49, "learning_rate": 9.128447131691491e-06, "loss": 0.0, "step": 6046 }, { "epoch": 2.49, "learning_rate": 9.113700044241263e-06, "loss": 0.0, "step": 6048 }, { "epoch": 2.49, "learning_rate": 9.098952956791035e-06, "loss": 0.0001, "step": 6050 }, { "epoch": 2.49, "learning_rate": 9.084205869340806e-06, "loss": 0.0, "step": 6052 }, { "epoch": 2.49, "learning_rate": 9.069458781890578e-06, "loss": 0.0, "step": 6054 }, { "epoch": 2.5, "learning_rate": 9.054711694440348e-06, "loss": 0.0, "step": 6056 }, { "epoch": 2.5, "learning_rate": 9.03996460699012e-06, "loss": 0.0, "step": 6058 }, { "epoch": 2.5, "learning_rate": 9.025217519539892e-06, "loss": 0.0, "step": 6060 }, { "epoch": 2.5, "learning_rate": 9.010470432089663e-06, "loss": 0.0, "step": 6062 }, { "epoch": 2.5, "learning_rate": 8.995723344639435e-06, "loss": 0.0, "step": 6064 }, { "epoch": 2.5, "learning_rate": 8.980976257189205e-06, "loss": 0.0, "step": 6066 }, { "epoch": 2.5, "learning_rate": 8.966229169738977e-06, "loss": 0.0, "step": 6068 }, { "epoch": 2.5, "learning_rate": 8.951482082288747e-06, "loss": 0.0, "step": 6070 }, { "epoch": 2.5, "learning_rate": 8.93673499483852e-06, "loss": 0.0, "step": 6072 }, { "epoch": 2.5, "learning_rate": 8.92198790738829e-06, "loss": 0.0, "step": 6074 }, { "epoch": 2.5, "learning_rate": 8.907240819938064e-06, "loss": 0.0, "step": 6076 }, { "epoch": 2.5, "learning_rate": 8.892493732487834e-06, "loss": 0.0, "step": 6078 }, { "epoch": 2.51, "learning_rate": 8.877746645037606e-06, "loss": 0.0, "step": 6080 }, { "epoch": 2.51, "learning_rate": 8.862999557587378e-06, "loss": 0.0, "step": 6082 }, { "epoch": 2.51, "learning_rate": 8.848252470137148e-06, "loss": 0.0, "step": 6084 }, { "epoch": 2.51, "learning_rate": 8.83350538268692e-06, "loss": 0.0, "step": 6086 }, { "epoch": 2.51, "learning_rate": 8.81875829523669e-06, "loss": 0.0, "step": 6088 }, { "epoch": 2.51, "learning_rate": 8.804011207786463e-06, "loss": 0.0, "step": 6090 }, { "epoch": 2.51, "learning_rate": 8.789264120336233e-06, "loss": 0.0, "step": 6092 }, { "epoch": 2.51, "learning_rate": 8.774517032886005e-06, "loss": 0.0, "step": 6094 }, { "epoch": 2.51, "learning_rate": 8.759769945435776e-06, "loss": 0.0, "step": 6096 }, { "epoch": 2.51, "learning_rate": 8.745022857985548e-06, "loss": 0.0, "step": 6098 }, { "epoch": 2.51, "learning_rate": 8.73027577053532e-06, "loss": 0.0, "step": 6100 }, { "epoch": 2.51, "learning_rate": 8.71552868308509e-06, "loss": 0.0, "step": 6102 }, { "epoch": 2.52, "learning_rate": 8.700781595634864e-06, "loss": 0.0, "step": 6104 }, { "epoch": 2.52, "learning_rate": 8.686034508184634e-06, "loss": 0.0, "step": 6106 }, { "epoch": 2.52, "learning_rate": 8.671287420734406e-06, "loss": 0.0, "step": 6108 }, { "epoch": 2.52, "learning_rate": 8.656540333284177e-06, "loss": 0.0, "step": 6110 }, { "epoch": 2.52, "learning_rate": 8.641793245833949e-06, "loss": 0.0, "step": 6112 }, { "epoch": 2.52, "learning_rate": 8.627046158383719e-06, "loss": 0.0, "step": 6114 }, { "epoch": 2.52, "learning_rate": 8.612299070933491e-06, "loss": 0.0, "step": 6116 }, { "epoch": 2.52, "learning_rate": 8.597551983483262e-06, "loss": 0.0, "step": 6118 }, { "epoch": 2.52, "learning_rate": 8.582804896033034e-06, "loss": 0.0, "step": 6120 }, { "epoch": 2.52, "learning_rate": 8.568057808582806e-06, "loss": 0.0001, "step": 6122 }, { "epoch": 2.52, "learning_rate": 8.553310721132576e-06, "loss": 0.0, "step": 6124 }, { "epoch": 2.52, "learning_rate": 8.538563633682348e-06, "loss": 0.0, "step": 6126 }, { "epoch": 2.52, "learning_rate": 8.523816546232118e-06, "loss": 0.0, "step": 6128 }, { "epoch": 2.53, "learning_rate": 8.50906945878189e-06, "loss": 0.0, "step": 6130 }, { "epoch": 2.53, "learning_rate": 8.494322371331663e-06, "loss": 0.0, "step": 6132 }, { "epoch": 2.53, "learning_rate": 8.479575283881435e-06, "loss": 0.0, "step": 6134 }, { "epoch": 2.53, "learning_rate": 8.464828196431205e-06, "loss": 0.0, "step": 6136 }, { "epoch": 2.53, "learning_rate": 8.450081108980977e-06, "loss": 0.0, "step": 6138 }, { "epoch": 2.53, "learning_rate": 8.43533402153075e-06, "loss": 0.0, "step": 6140 }, { "epoch": 2.53, "learning_rate": 8.42058693408052e-06, "loss": 0.0, "step": 6142 }, { "epoch": 2.53, "learning_rate": 8.405839846630292e-06, "loss": 0.0, "step": 6144 }, { "epoch": 2.53, "learning_rate": 8.391092759180062e-06, "loss": 0.0002, "step": 6146 }, { "epoch": 2.53, "learning_rate": 8.376345671729834e-06, "loss": 0.0, "step": 6148 }, { "epoch": 2.53, "learning_rate": 8.361598584279604e-06, "loss": 0.0, "step": 6150 }, { "epoch": 2.53, "learning_rate": 8.346851496829376e-06, "loss": 0.0, "step": 6152 }, { "epoch": 2.54, "learning_rate": 8.332104409379147e-06, "loss": 0.0, "step": 6154 }, { "epoch": 2.54, "learning_rate": 8.317357321928919e-06, "loss": 0.0, "step": 6156 }, { "epoch": 2.54, "learning_rate": 8.302610234478691e-06, "loss": 0.0, "step": 6158 }, { "epoch": 2.54, "learning_rate": 8.287863147028463e-06, "loss": 0.0, "step": 6160 }, { "epoch": 2.54, "learning_rate": 8.273116059578235e-06, "loss": 0.0, "step": 6162 }, { "epoch": 2.54, "learning_rate": 8.258368972128005e-06, "loss": 0.0, "step": 6164 }, { "epoch": 2.54, "learning_rate": 8.243621884677777e-06, "loss": 0.0, "step": 6166 }, { "epoch": 2.54, "learning_rate": 8.228874797227548e-06, "loss": 0.0, "step": 6168 }, { "epoch": 2.54, "learning_rate": 8.21412770977732e-06, "loss": 0.0, "step": 6170 }, { "epoch": 2.54, "learning_rate": 8.19938062232709e-06, "loss": 0.0, "step": 6172 }, { "epoch": 2.54, "learning_rate": 8.184633534876862e-06, "loss": 0.0, "step": 6174 }, { "epoch": 2.54, "learning_rate": 8.169886447426633e-06, "loss": 0.0, "step": 6176 }, { "epoch": 2.55, "learning_rate": 8.155139359976405e-06, "loss": 0.0, "step": 6178 }, { "epoch": 2.55, "learning_rate": 8.140392272526177e-06, "loss": 0.0, "step": 6180 }, { "epoch": 2.55, "learning_rate": 8.125645185075947e-06, "loss": 0.0, "step": 6182 }, { "epoch": 2.55, "learning_rate": 8.11089809762572e-06, "loss": 0.0, "step": 6184 }, { "epoch": 2.55, "learning_rate": 8.09615101017549e-06, "loss": 0.0, "step": 6186 }, { "epoch": 2.55, "learning_rate": 8.081403922725263e-06, "loss": 0.0, "step": 6188 }, { "epoch": 2.55, "learning_rate": 8.066656835275034e-06, "loss": 0.0, "step": 6190 }, { "epoch": 2.55, "learning_rate": 8.051909747824806e-06, "loss": 0.0, "step": 6192 }, { "epoch": 2.55, "learning_rate": 8.037162660374576e-06, "loss": 0.0, "step": 6194 }, { "epoch": 2.55, "learning_rate": 8.022415572924348e-06, "loss": 0.0, "step": 6196 }, { "epoch": 2.55, "learning_rate": 8.007668485474119e-06, "loss": 0.0, "step": 6198 }, { "epoch": 2.55, "learning_rate": 7.99292139802389e-06, "loss": 0.0, "step": 6200 }, { "epoch": 2.56, "learning_rate": 7.978174310573663e-06, "loss": 0.0, "step": 6202 }, { "epoch": 2.56, "learning_rate": 7.963427223123433e-06, "loss": 0.0, "step": 6204 }, { "epoch": 2.56, "learning_rate": 7.948680135673205e-06, "loss": 0.0, "step": 6206 }, { "epoch": 2.56, "learning_rate": 7.933933048222975e-06, "loss": 0.0002, "step": 6208 }, { "epoch": 2.56, "learning_rate": 7.919185960772748e-06, "loss": 0.0001, "step": 6210 }, { "epoch": 2.56, "learning_rate": 7.904438873322518e-06, "loss": 0.0, "step": 6212 }, { "epoch": 2.56, "learning_rate": 7.88969178587229e-06, "loss": 0.0, "step": 6214 }, { "epoch": 2.56, "learning_rate": 7.874944698422062e-06, "loss": 0.0, "step": 6216 }, { "epoch": 2.56, "learning_rate": 7.860197610971834e-06, "loss": 0.0, "step": 6218 }, { "epoch": 2.56, "learning_rate": 7.845450523521604e-06, "loss": 0.0, "step": 6220 }, { "epoch": 2.56, "learning_rate": 7.830703436071377e-06, "loss": 0.0, "step": 6222 }, { "epoch": 2.56, "learning_rate": 7.815956348621149e-06, "loss": 0.0, "step": 6224 }, { "epoch": 2.57, "learning_rate": 7.801209261170919e-06, "loss": 0.0, "step": 6226 }, { "epoch": 2.57, "learning_rate": 7.786462173720691e-06, "loss": 0.0, "step": 6228 }, { "epoch": 2.57, "learning_rate": 7.771715086270461e-06, "loss": 0.0, "step": 6230 }, { "epoch": 2.57, "learning_rate": 7.756967998820233e-06, "loss": 0.0, "step": 6232 }, { "epoch": 2.57, "learning_rate": 7.742220911370004e-06, "loss": 0.0, "step": 6234 }, { "epoch": 2.57, "learning_rate": 7.727473823919776e-06, "loss": 0.0, "step": 6236 }, { "epoch": 2.57, "learning_rate": 7.712726736469548e-06, "loss": 0.0, "step": 6238 }, { "epoch": 2.57, "learning_rate": 7.697979649019318e-06, "loss": 0.0, "step": 6240 }, { "epoch": 2.57, "learning_rate": 7.68323256156909e-06, "loss": 0.0, "step": 6242 }, { "epoch": 2.57, "learning_rate": 7.668485474118862e-06, "loss": 0.0, "step": 6244 }, { "epoch": 2.57, "learning_rate": 7.653738386668634e-06, "loss": 0.0, "step": 6246 }, { "epoch": 2.57, "learning_rate": 7.638991299218405e-06, "loss": 0.0, "step": 6248 }, { "epoch": 2.58, "learning_rate": 7.624244211768176e-06, "loss": 0.0, "step": 6250 }, { "epoch": 2.58, "learning_rate": 7.609497124317948e-06, "loss": 0.0, "step": 6252 }, { "epoch": 2.58, "learning_rate": 7.594750036867719e-06, "loss": 0.0, "step": 6254 }, { "epoch": 2.58, "learning_rate": 7.5800029494174905e-06, "loss": 0.0, "step": 6256 }, { "epoch": 2.58, "learning_rate": 7.565255861967262e-06, "loss": 0.0, "step": 6258 }, { "epoch": 2.58, "learning_rate": 7.550508774517033e-06, "loss": 0.0, "step": 6260 }, { "epoch": 2.58, "learning_rate": 7.535761687066804e-06, "loss": 0.0, "step": 6262 }, { "epoch": 2.58, "learning_rate": 7.521014599616575e-06, "loss": 0.0, "step": 6264 }, { "epoch": 2.58, "learning_rate": 7.506267512166347e-06, "loss": 0.0001, "step": 6266 }, { "epoch": 2.58, "learning_rate": 7.491520424716119e-06, "loss": 0.0, "step": 6268 }, { "epoch": 2.58, "learning_rate": 7.476773337265891e-06, "loss": 0.0, "step": 6270 }, { "epoch": 2.58, "learning_rate": 7.462026249815662e-06, "loss": 0.0, "step": 6272 }, { "epoch": 2.59, "learning_rate": 7.447279162365434e-06, "loss": 0.0, "step": 6274 }, { "epoch": 2.59, "learning_rate": 7.432532074915205e-06, "loss": 0.0, "step": 6276 }, { "epoch": 2.59, "learning_rate": 7.417784987464976e-06, "loss": 0.0, "step": 6278 }, { "epoch": 2.59, "learning_rate": 7.403037900014748e-06, "loss": 0.0, "step": 6280 }, { "epoch": 2.59, "learning_rate": 7.388290812564519e-06, "loss": 0.0, "step": 6282 }, { "epoch": 2.59, "learning_rate": 7.37354372511429e-06, "loss": 0.0, "step": 6284 }, { "epoch": 2.59, "learning_rate": 7.358796637664061e-06, "loss": 0.0, "step": 6286 }, { "epoch": 2.59, "learning_rate": 7.3440495502138325e-06, "loss": 0.0001, "step": 6288 }, { "epoch": 2.59, "learning_rate": 7.3293024627636045e-06, "loss": 0.0, "step": 6290 }, { "epoch": 2.59, "learning_rate": 7.314555375313376e-06, "loss": 0.0, "step": 6292 }, { "epoch": 2.59, "learning_rate": 7.299808287863147e-06, "loss": 0.0, "step": 6294 }, { "epoch": 2.59, "learning_rate": 7.285061200412918e-06, "loss": 0.0, "step": 6296 }, { "epoch": 2.59, "learning_rate": 7.270314112962691e-06, "loss": 0.0, "step": 6298 }, { "epoch": 2.6, "learning_rate": 7.255567025512462e-06, "loss": 0.0, "step": 6300 }, { "epoch": 2.6, "learning_rate": 7.2408199380622335e-06, "loss": 0.0, "step": 6302 }, { "epoch": 2.6, "learning_rate": 7.226072850612005e-06, "loss": 0.0, "step": 6304 }, { "epoch": 2.6, "learning_rate": 7.211325763161776e-06, "loss": 0.0, "step": 6306 }, { "epoch": 2.6, "learning_rate": 7.196578675711547e-06, "loss": 0.0, "step": 6308 }, { "epoch": 2.6, "learning_rate": 7.181831588261319e-06, "loss": 0.0004, "step": 6310 }, { "epoch": 2.6, "learning_rate": 7.1670845008110904e-06, "loss": 0.0, "step": 6312 }, { "epoch": 2.6, "learning_rate": 7.152337413360862e-06, "loss": 0.0, "step": 6314 }, { "epoch": 2.6, "learning_rate": 7.137590325910633e-06, "loss": 0.0, "step": 6316 }, { "epoch": 2.6, "learning_rate": 7.122843238460404e-06, "loss": 0.0, "step": 6318 }, { "epoch": 2.6, "learning_rate": 7.108096151010175e-06, "loss": 0.0001, "step": 6320 }, { "epoch": 2.6, "learning_rate": 7.0933490635599465e-06, "loss": 0.0, "step": 6322 }, { "epoch": 2.61, "learning_rate": 7.078601976109718e-06, "loss": 0.0, "step": 6324 }, { "epoch": 2.61, "learning_rate": 7.063854888659491e-06, "loss": 0.0003, "step": 6326 }, { "epoch": 2.61, "learning_rate": 7.049107801209262e-06, "loss": 0.0, "step": 6328 }, { "epoch": 2.61, "learning_rate": 7.034360713759033e-06, "loss": 0.0, "step": 6330 }, { "epoch": 2.61, "learning_rate": 7.019613626308805e-06, "loss": 0.0, "step": 6332 }, { "epoch": 2.61, "learning_rate": 7.004866538858576e-06, "loss": 0.0, "step": 6334 }, { "epoch": 2.61, "learning_rate": 6.9901194514083475e-06, "loss": 0.0, "step": 6336 }, { "epoch": 2.61, "learning_rate": 6.975372363958119e-06, "loss": 0.0, "step": 6338 }, { "epoch": 2.61, "learning_rate": 6.96062527650789e-06, "loss": 0.0, "step": 6340 }, { "epoch": 2.61, "learning_rate": 6.945878189057661e-06, "loss": 0.0, "step": 6342 }, { "epoch": 2.61, "learning_rate": 6.931131101607432e-06, "loss": 0.0, "step": 6344 }, { "epoch": 2.61, "learning_rate": 6.916384014157204e-06, "loss": 0.0, "step": 6346 }, { "epoch": 2.62, "learning_rate": 6.901636926706976e-06, "loss": 0.0001, "step": 6348 }, { "epoch": 2.62, "learning_rate": 6.886889839256747e-06, "loss": 0.0, "step": 6350 }, { "epoch": 2.62, "learning_rate": 6.872142751806518e-06, "loss": 0.0, "step": 6352 }, { "epoch": 2.62, "learning_rate": 6.857395664356291e-06, "loss": 0.0, "step": 6354 }, { "epoch": 2.62, "learning_rate": 6.842648576906062e-06, "loss": 0.0, "step": 6356 }, { "epoch": 2.62, "learning_rate": 6.827901489455833e-06, "loss": 0.0001, "step": 6358 }, { "epoch": 2.62, "learning_rate": 6.813154402005605e-06, "loss": 0.0, "step": 6360 }, { "epoch": 2.62, "learning_rate": 6.798407314555376e-06, "loss": 0.0, "step": 6362 }, { "epoch": 2.62, "learning_rate": 6.783660227105147e-06, "loss": 0.0, "step": 6364 }, { "epoch": 2.62, "learning_rate": 6.768913139654918e-06, "loss": 0.0, "step": 6366 }, { "epoch": 2.62, "learning_rate": 6.7541660522046895e-06, "loss": 0.0, "step": 6368 }, { "epoch": 2.62, "learning_rate": 6.7394189647544615e-06, "loss": 0.0, "step": 6370 }, { "epoch": 2.63, "learning_rate": 6.724671877304233e-06, "loss": 0.0, "step": 6372 }, { "epoch": 2.63, "learning_rate": 6.709924789854004e-06, "loss": 0.0001, "step": 6374 }, { "epoch": 2.63, "learning_rate": 6.695177702403775e-06, "loss": 0.0, "step": 6376 }, { "epoch": 2.63, "learning_rate": 6.680430614953546e-06, "loss": 0.0, "step": 6378 }, { "epoch": 2.63, "learning_rate": 6.665683527503318e-06, "loss": 0.0, "step": 6380 }, { "epoch": 2.63, "learning_rate": 6.6509364400530905e-06, "loss": 0.0001, "step": 6382 }, { "epoch": 2.63, "learning_rate": 6.636189352602862e-06, "loss": 0.0, "step": 6384 }, { "epoch": 2.63, "learning_rate": 6.621442265152633e-06, "loss": 0.0, "step": 6386 }, { "epoch": 2.63, "learning_rate": 6.606695177702404e-06, "loss": 0.0, "step": 6388 }, { "epoch": 2.63, "learning_rate": 6.591948090252175e-06, "loss": 0.0, "step": 6390 }, { "epoch": 2.63, "learning_rate": 6.5772010028019474e-06, "loss": 0.0, "step": 6392 }, { "epoch": 2.63, "learning_rate": 6.562453915351719e-06, "loss": 0.0, "step": 6394 }, { "epoch": 2.64, "learning_rate": 6.54770682790149e-06, "loss": 0.0, "step": 6396 }, { "epoch": 2.64, "learning_rate": 6.532959740451261e-06, "loss": 0.0, "step": 6398 }, { "epoch": 2.64, "learning_rate": 6.518212653001032e-06, "loss": 0.0, "step": 6400 }, { "epoch": 2.64, "learning_rate": 6.5034655655508035e-06, "loss": 0.0, "step": 6402 }, { "epoch": 2.64, "learning_rate": 6.488718478100575e-06, "loss": 0.0, "step": 6404 }, { "epoch": 2.64, "learning_rate": 6.473971390650346e-06, "loss": 0.0, "step": 6406 }, { "epoch": 2.64, "learning_rate": 6.459224303200118e-06, "loss": 0.0, "step": 6408 }, { "epoch": 2.64, "learning_rate": 6.44447721574989e-06, "loss": 0.0, "step": 6410 }, { "epoch": 2.64, "learning_rate": 6.429730128299661e-06, "loss": 0.0, "step": 6412 }, { "epoch": 2.64, "learning_rate": 6.414983040849433e-06, "loss": 0.0, "step": 6414 }, { "epoch": 2.64, "learning_rate": 6.4002359533992045e-06, "loss": 0.0, "step": 6416 }, { "epoch": 2.64, "learning_rate": 6.385488865948976e-06, "loss": 0.0, "step": 6418 }, { "epoch": 2.65, "learning_rate": 6.370741778498747e-06, "loss": 0.0, "step": 6420 }, { "epoch": 2.65, "learning_rate": 6.355994691048518e-06, "loss": 0.0, "step": 6422 }, { "epoch": 2.65, "learning_rate": 6.341247603598289e-06, "loss": 0.0, "step": 6424 }, { "epoch": 2.65, "learning_rate": 6.326500516148061e-06, "loss": 0.0, "step": 6426 }, { "epoch": 2.65, "learning_rate": 6.311753428697832e-06, "loss": 0.0, "step": 6428 }, { "epoch": 2.65, "learning_rate": 6.297006341247604e-06, "loss": 0.0, "step": 6430 }, { "epoch": 2.65, "learning_rate": 6.282259253797375e-06, "loss": 0.0, "step": 6432 }, { "epoch": 2.65, "learning_rate": 6.267512166347146e-06, "loss": 0.0, "step": 6434 }, { "epoch": 2.65, "learning_rate": 6.2527650788969175e-06, "loss": 0.0, "step": 6436 }, { "epoch": 2.65, "learning_rate": 6.2380179914466896e-06, "loss": 0.0, "step": 6438 }, { "epoch": 2.65, "learning_rate": 6.223270903996461e-06, "loss": 0.0, "step": 6440 }, { "epoch": 2.65, "learning_rate": 6.208523816546232e-06, "loss": 0.0, "step": 6442 }, { "epoch": 2.66, "learning_rate": 6.193776729096004e-06, "loss": 0.0, "step": 6444 }, { "epoch": 2.66, "learning_rate": 6.179029641645775e-06, "loss": 0.0, "step": 6446 }, { "epoch": 2.66, "learning_rate": 6.1642825541955465e-06, "loss": 0.0, "step": 6448 }, { "epoch": 2.66, "learning_rate": 6.149535466745318e-06, "loss": 0.0, "step": 6450 }, { "epoch": 2.66, "learning_rate": 6.13478837929509e-06, "loss": 0.0, "step": 6452 }, { "epoch": 2.66, "learning_rate": 6.120041291844861e-06, "loss": 0.0, "step": 6454 }, { "epoch": 2.66, "learning_rate": 6.105294204394632e-06, "loss": 0.0, "step": 6456 }, { "epoch": 2.66, "learning_rate": 6.090547116944404e-06, "loss": 0.0002, "step": 6458 }, { "epoch": 2.66, "learning_rate": 6.0758000294941755e-06, "loss": 0.0, "step": 6460 }, { "epoch": 2.66, "learning_rate": 6.061052942043947e-06, "loss": 0.0, "step": 6462 }, { "epoch": 2.66, "learning_rate": 6.046305854593718e-06, "loss": 0.0, "step": 6464 }, { "epoch": 2.66, "learning_rate": 6.031558767143489e-06, "loss": 0.0, "step": 6466 }, { "epoch": 2.67, "learning_rate": 6.01681167969326e-06, "loss": 0.0, "step": 6468 }, { "epoch": 2.67, "learning_rate": 6.002064592243032e-06, "loss": 0.0, "step": 6470 }, { "epoch": 2.67, "learning_rate": 5.9873175047928044e-06, "loss": 0.0, "step": 6472 }, { "epoch": 2.67, "learning_rate": 5.972570417342576e-06, "loss": 0.0, "step": 6474 }, { "epoch": 2.67, "learning_rate": 5.957823329892347e-06, "loss": 0.0, "step": 6476 }, { "epoch": 2.67, "learning_rate": 5.943076242442118e-06, "loss": 0.0, "step": 6478 }, { "epoch": 2.67, "learning_rate": 5.928329154991889e-06, "loss": 0.0, "step": 6480 }, { "epoch": 2.67, "learning_rate": 5.9135820675416605e-06, "loss": 0.0, "step": 6482 }, { "epoch": 2.67, "learning_rate": 5.898834980091432e-06, "loss": 0.0, "step": 6484 }, { "epoch": 2.67, "learning_rate": 5.884087892641204e-06, "loss": 0.0, "step": 6486 }, { "epoch": 2.67, "learning_rate": 5.869340805190975e-06, "loss": 0.0, "step": 6488 }, { "epoch": 2.67, "learning_rate": 5.854593717740746e-06, "loss": 0.0, "step": 6490 }, { "epoch": 2.67, "learning_rate": 5.839846630290518e-06, "loss": 0.0, "step": 6492 }, { "epoch": 2.68, "learning_rate": 5.8250995428402895e-06, "loss": 0.0, "step": 6494 }, { "epoch": 2.68, "learning_rate": 5.810352455390061e-06, "loss": 0.0, "step": 6496 }, { "epoch": 2.68, "learning_rate": 5.795605367939832e-06, "loss": 0.0, "step": 6498 }, { "epoch": 2.68, "learning_rate": 5.780858280489604e-06, "loss": 0.0001, "step": 6500 }, { "epoch": 2.68, "learning_rate": 5.766111193039375e-06, "loss": 0.0, "step": 6502 }, { "epoch": 2.68, "learning_rate": 5.751364105589146e-06, "loss": 0.0, "step": 6504 }, { "epoch": 2.68, "learning_rate": 5.736617018138918e-06, "loss": 0.0, "step": 6506 }, { "epoch": 2.68, "learning_rate": 5.721869930688689e-06, "loss": 0.0, "step": 6508 }, { "epoch": 2.68, "learning_rate": 5.707122843238461e-06, "loss": 0.0, "step": 6510 }, { "epoch": 2.68, "learning_rate": 5.692375755788232e-06, "loss": 0.0, "step": 6512 }, { "epoch": 2.68, "learning_rate": 5.677628668338004e-06, "loss": 0.0, "step": 6514 }, { "epoch": 2.68, "learning_rate": 5.662881580887775e-06, "loss": 0.0, "step": 6516 }, { "epoch": 2.69, "learning_rate": 5.6481344934375466e-06, "loss": 0.0, "step": 6518 }, { "epoch": 2.69, "learning_rate": 5.633387405987318e-06, "loss": 0.0, "step": 6520 }, { "epoch": 2.69, "learning_rate": 5.618640318537089e-06, "loss": 0.0, "step": 6522 }, { "epoch": 2.69, "learning_rate": 5.60389323108686e-06, "loss": 0.0, "step": 6524 }, { "epoch": 2.69, "learning_rate": 5.589146143636631e-06, "loss": 0.0, "step": 6526 }, { "epoch": 2.69, "learning_rate": 5.5743990561864035e-06, "loss": 0.0, "step": 6528 }, { "epoch": 2.69, "learning_rate": 5.559651968736175e-06, "loss": 0.0, "step": 6530 }, { "epoch": 2.69, "learning_rate": 5.544904881285947e-06, "loss": 0.0, "step": 6532 }, { "epoch": 2.69, "learning_rate": 5.530157793835718e-06, "loss": 0.0, "step": 6534 }, { "epoch": 2.69, "learning_rate": 5.515410706385489e-06, "loss": 0.0001, "step": 6536 }, { "epoch": 2.69, "learning_rate": 5.50066361893526e-06, "loss": 0.0, "step": 6538 }, { "epoch": 2.69, "learning_rate": 5.485916531485032e-06, "loss": 0.0, "step": 6540 }, { "epoch": 2.7, "learning_rate": 5.471169444034804e-06, "loss": 0.0, "step": 6542 }, { "epoch": 2.7, "learning_rate": 5.456422356584575e-06, "loss": 0.0, "step": 6544 }, { "epoch": 2.7, "learning_rate": 5.441675269134346e-06, "loss": 0.0, "step": 6546 }, { "epoch": 2.7, "learning_rate": 5.426928181684117e-06, "loss": 0.0, "step": 6548 }, { "epoch": 2.7, "learning_rate": 5.4121810942338885e-06, "loss": 0.0, "step": 6550 }, { "epoch": 2.7, "learning_rate": 5.397434006783661e-06, "loss": 0.0, "step": 6552 }, { "epoch": 2.7, "learning_rate": 5.382686919333432e-06, "loss": 0.0, "step": 6554 }, { "epoch": 2.7, "learning_rate": 5.367939831883204e-06, "loss": 0.0, "step": 6556 }, { "epoch": 2.7, "learning_rate": 5.353192744432975e-06, "loss": 0.0, "step": 6558 }, { "epoch": 2.7, "learning_rate": 5.338445656982746e-06, "loss": 0.0, "step": 6560 }, { "epoch": 2.7, "learning_rate": 5.3236985695325175e-06, "loss": 0.0, "step": 6562 }, { "epoch": 2.7, "learning_rate": 5.308951482082289e-06, "loss": 0.0, "step": 6564 }, { "epoch": 2.71, "learning_rate": 5.29420439463206e-06, "loss": 0.0001, "step": 6566 }, { "epoch": 2.71, "learning_rate": 5.279457307181831e-06, "loss": 0.0, "step": 6568 }, { "epoch": 2.71, "learning_rate": 5.264710219731603e-06, "loss": 0.0, "step": 6570 }, { "epoch": 2.71, "learning_rate": 5.249963132281375e-06, "loss": 0.0, "step": 6572 }, { "epoch": 2.71, "learning_rate": 5.2352160448311465e-06, "loss": 0.0, "step": 6574 }, { "epoch": 2.71, "learning_rate": 5.220468957380918e-06, "loss": 0.0, "step": 6576 }, { "epoch": 2.71, "learning_rate": 5.205721869930689e-06, "loss": 0.0, "step": 6578 }, { "epoch": 2.71, "learning_rate": 5.19097478248046e-06, "loss": 0.0, "step": 6580 }, { "epoch": 2.71, "learning_rate": 5.176227695030231e-06, "loss": 0.0, "step": 6582 }, { "epoch": 2.71, "learning_rate": 5.161480607580003e-06, "loss": 0.0, "step": 6584 }, { "epoch": 2.71, "learning_rate": 5.146733520129775e-06, "loss": 0.0, "step": 6586 }, { "epoch": 2.71, "learning_rate": 5.131986432679546e-06, "loss": 0.0, "step": 6588 }, { "epoch": 2.72, "learning_rate": 5.117239345229317e-06, "loss": 0.0, "step": 6590 }, { "epoch": 2.72, "learning_rate": 5.102492257779089e-06, "loss": 0.0, "step": 6592 }, { "epoch": 2.72, "learning_rate": 5.08774517032886e-06, "loss": 0.0, "step": 6594 }, { "epoch": 2.72, "learning_rate": 5.0729980828786315e-06, "loss": 0.0, "step": 6596 }, { "epoch": 2.72, "learning_rate": 5.0582509954284036e-06, "loss": 0.0, "step": 6598 }, { "epoch": 2.72, "learning_rate": 5.043503907978175e-06, "loss": 0.0, "step": 6600 }, { "epoch": 2.72, "learning_rate": 5.028756820527946e-06, "loss": 0.0, "step": 6602 }, { "epoch": 2.72, "learning_rate": 5.014009733077717e-06, "loss": 0.0, "step": 6604 }, { "epoch": 2.72, "learning_rate": 4.999262645627488e-06, "loss": 0.0, "step": 6606 }, { "epoch": 2.72, "learning_rate": 4.98451555817726e-06, "loss": 0.0, "step": 6608 }, { "epoch": 2.72, "learning_rate": 4.969768470727032e-06, "loss": 0.0, "step": 6610 }, { "epoch": 2.72, "learning_rate": 4.955021383276803e-06, "loss": 0.0, "step": 6612 }, { "epoch": 2.73, "learning_rate": 4.940274295826575e-06, "loss": 0.0, "step": 6614 }, { "epoch": 2.73, "learning_rate": 4.925527208376346e-06, "loss": 0.0, "step": 6616 }, { "epoch": 2.73, "learning_rate": 4.910780120926117e-06, "loss": 0.0, "step": 6618 }, { "epoch": 2.73, "learning_rate": 4.896033033475889e-06, "loss": 0.0, "step": 6620 }, { "epoch": 2.73, "learning_rate": 4.88128594602566e-06, "loss": 0.0002, "step": 6622 }, { "epoch": 2.73, "learning_rate": 4.866538858575432e-06, "loss": 0.0, "step": 6624 }, { "epoch": 2.73, "learning_rate": 4.851791771125203e-06, "loss": 0.0, "step": 6626 }, { "epoch": 2.73, "learning_rate": 4.837044683674974e-06, "loss": 0.0, "step": 6628 }, { "epoch": 2.73, "learning_rate": 4.8222975962247455e-06, "loss": 0.0, "step": 6630 }, { "epoch": 2.73, "learning_rate": 4.807550508774518e-06, "loss": 0.0, "step": 6632 }, { "epoch": 2.73, "learning_rate": 4.792803421324289e-06, "loss": 0.0, "step": 6634 }, { "epoch": 2.73, "learning_rate": 4.77805633387406e-06, "loss": 0.0, "step": 6636 }, { "epoch": 2.74, "learning_rate": 4.763309246423832e-06, "loss": 0.0, "step": 6638 }, { "epoch": 2.74, "learning_rate": 4.748562158973603e-06, "loss": 0.0, "step": 6640 }, { "epoch": 2.74, "learning_rate": 4.7338150715233745e-06, "loss": 0.0, "step": 6642 }, { "epoch": 2.74, "learning_rate": 4.719067984073146e-06, "loss": 0.0, "step": 6644 }, { "epoch": 2.74, "learning_rate": 4.704320896622917e-06, "loss": 0.0, "step": 6646 }, { "epoch": 2.74, "learning_rate": 4.689573809172688e-06, "loss": 0.0, "step": 6648 }, { "epoch": 2.74, "learning_rate": 4.67482672172246e-06, "loss": 0.0, "step": 6650 }, { "epoch": 2.74, "learning_rate": 4.660079634272231e-06, "loss": 0.0, "step": 6652 }, { "epoch": 2.74, "learning_rate": 4.6453325468220035e-06, "loss": 0.0, "step": 6654 }, { "epoch": 2.74, "learning_rate": 4.630585459371775e-06, "loss": 0.0, "step": 6656 }, { "epoch": 2.74, "learning_rate": 4.615838371921546e-06, "loss": 0.0, "step": 6658 }, { "epoch": 2.74, "learning_rate": 4.601091284471317e-06, "loss": 0.0, "step": 6660 }, { "epoch": 2.74, "learning_rate": 4.586344197021088e-06, "loss": 0.0, "step": 6662 }, { "epoch": 2.75, "learning_rate": 4.5715971095708595e-06, "loss": 0.0, "step": 6664 }, { "epoch": 2.75, "learning_rate": 4.556850022120632e-06, "loss": 0.0, "step": 6666 }, { "epoch": 2.75, "learning_rate": 4.542102934670403e-06, "loss": 0.0, "step": 6668 }, { "epoch": 2.75, "learning_rate": 4.527355847220174e-06, "loss": 0.0, "step": 6670 }, { "epoch": 2.75, "learning_rate": 4.512608759769946e-06, "loss": 0.0, "step": 6672 }, { "epoch": 2.75, "learning_rate": 4.497861672319717e-06, "loss": 0.0, "step": 6674 }, { "epoch": 2.75, "learning_rate": 4.4831145848694885e-06, "loss": 0.0, "step": 6676 }, { "epoch": 2.75, "learning_rate": 4.46836749741926e-06, "loss": 0.0, "step": 6678 }, { "epoch": 2.75, "learning_rate": 4.453620409969032e-06, "loss": 0.0, "step": 6680 }, { "epoch": 2.75, "learning_rate": 4.438873322518803e-06, "loss": 0.0, "step": 6682 }, { "epoch": 2.75, "learning_rate": 4.424126235068574e-06, "loss": 0.0, "step": 6684 }, { "epoch": 2.75, "learning_rate": 4.409379147618345e-06, "loss": 0.0, "step": 6686 }, { "epoch": 2.76, "learning_rate": 4.394632060168117e-06, "loss": 0.0, "step": 6688 }, { "epoch": 2.76, "learning_rate": 4.379884972717888e-06, "loss": 0.0, "step": 6690 }, { "epoch": 2.76, "learning_rate": 4.36513788526766e-06, "loss": 0.0, "step": 6692 }, { "epoch": 2.76, "learning_rate": 4.350390797817432e-06, "loss": 0.0001, "step": 6694 }, { "epoch": 2.76, "learning_rate": 4.335643710367203e-06, "loss": 0.0, "step": 6696 }, { "epoch": 2.76, "learning_rate": 4.320896622916974e-06, "loss": 0.0, "step": 6698 }, { "epoch": 2.76, "learning_rate": 4.306149535466746e-06, "loss": 0.0, "step": 6700 }, { "epoch": 2.76, "learning_rate": 4.291402448016517e-06, "loss": 0.0, "step": 6702 }, { "epoch": 2.76, "learning_rate": 4.276655360566288e-06, "loss": 0.0, "step": 6704 }, { "epoch": 2.76, "learning_rate": 4.261908273116059e-06, "loss": 0.0, "step": 6706 }, { "epoch": 2.76, "learning_rate": 4.247161185665831e-06, "loss": 0.0001, "step": 6708 }, { "epoch": 2.76, "learning_rate": 4.2324140982156025e-06, "loss": 0.0, "step": 6710 }, { "epoch": 2.77, "learning_rate": 4.217667010765375e-06, "loss": 0.0, "step": 6712 }, { "epoch": 2.77, "learning_rate": 4.202919923315146e-06, "loss": 0.0, "step": 6714 }, { "epoch": 2.77, "learning_rate": 4.188172835864917e-06, "loss": 0.0, "step": 6716 }, { "epoch": 2.77, "learning_rate": 4.173425748414688e-06, "loss": 0.0, "step": 6718 }, { "epoch": 2.77, "learning_rate": 4.1586786609644594e-06, "loss": 0.0, "step": 6720 }, { "epoch": 2.77, "learning_rate": 4.1439315735142315e-06, "loss": 0.0, "step": 6722 }, { "epoch": 2.77, "learning_rate": 4.129184486064003e-06, "loss": 0.0, "step": 6724 }, { "epoch": 2.77, "learning_rate": 4.114437398613774e-06, "loss": 0.0, "step": 6726 }, { "epoch": 2.77, "learning_rate": 4.099690311163545e-06, "loss": 0.0, "step": 6728 }, { "epoch": 2.77, "learning_rate": 4.084943223713316e-06, "loss": 0.0, "step": 6730 }, { "epoch": 2.77, "learning_rate": 4.070196136263088e-06, "loss": 0.0, "step": 6732 }, { "epoch": 2.77, "learning_rate": 4.05544904881286e-06, "loss": 0.0, "step": 6734 }, { "epoch": 2.78, "learning_rate": 4.040701961362632e-06, "loss": 0.0, "step": 6736 }, { "epoch": 2.78, "learning_rate": 4.025954873912403e-06, "loss": 0.0, "step": 6738 }, { "epoch": 2.78, "learning_rate": 4.011207786462174e-06, "loss": 0.0, "step": 6740 }, { "epoch": 2.78, "learning_rate": 3.996460699011945e-06, "loss": 0.0, "step": 6742 }, { "epoch": 2.78, "learning_rate": 3.9817136115617165e-06, "loss": 0.0, "step": 6744 }, { "epoch": 2.78, "learning_rate": 3.966966524111488e-06, "loss": 0.0, "step": 6746 }, { "epoch": 2.78, "learning_rate": 3.952219436661259e-06, "loss": 0.0, "step": 6748 }, { "epoch": 2.78, "learning_rate": 3.937472349211031e-06, "loss": 0.0, "step": 6750 }, { "epoch": 2.78, "learning_rate": 3.922725261760802e-06, "loss": 0.0, "step": 6752 }, { "epoch": 2.78, "learning_rate": 3.907978174310574e-06, "loss": 0.0, "step": 6754 }, { "epoch": 2.78, "learning_rate": 3.8932310868603455e-06, "loss": 0.0, "step": 6756 }, { "epoch": 2.78, "learning_rate": 3.878483999410117e-06, "loss": 0.0, "step": 6758 }, { "epoch": 2.79, "learning_rate": 3.863736911959888e-06, "loss": 0.0, "step": 6760 }, { "epoch": 2.79, "learning_rate": 3.848989824509659e-06, "loss": 0.0, "step": 6762 }, { "epoch": 2.79, "learning_rate": 3.834242737059431e-06, "loss": 0.0, "step": 6764 }, { "epoch": 2.79, "learning_rate": 3.819495649609202e-06, "loss": 0.0, "step": 6766 }, { "epoch": 2.79, "learning_rate": 3.804748562158974e-06, "loss": 0.0, "step": 6768 }, { "epoch": 2.79, "learning_rate": 3.7900014747087453e-06, "loss": 0.0, "step": 6770 }, { "epoch": 2.79, "learning_rate": 3.7752543872585165e-06, "loss": 0.0, "step": 6772 }, { "epoch": 2.79, "learning_rate": 3.7605072998082877e-06, "loss": 0.0, "step": 6774 }, { "epoch": 2.79, "learning_rate": 3.7457602123580593e-06, "loss": 0.0, "step": 6776 }, { "epoch": 2.79, "learning_rate": 3.731013124907831e-06, "loss": 0.0, "step": 6778 }, { "epoch": 2.79, "learning_rate": 3.7162660374576026e-06, "loss": 0.0, "step": 6780 }, { "epoch": 2.79, "learning_rate": 3.701518950007374e-06, "loss": 0.0, "step": 6782 }, { "epoch": 2.8, "learning_rate": 3.686771862557145e-06, "loss": 0.0, "step": 6784 }, { "epoch": 2.8, "learning_rate": 3.6720247751069162e-06, "loss": 0.0, "step": 6786 }, { "epoch": 2.8, "learning_rate": 3.657277687656688e-06, "loss": 0.0, "step": 6788 }, { "epoch": 2.8, "learning_rate": 3.642530600206459e-06, "loss": 0.0, "step": 6790 }, { "epoch": 2.8, "learning_rate": 3.627783512756231e-06, "loss": 0.0, "step": 6792 }, { "epoch": 2.8, "learning_rate": 3.6130364253060024e-06, "loss": 0.0, "step": 6794 }, { "epoch": 2.8, "learning_rate": 3.5982893378557736e-06, "loss": 0.0, "step": 6796 }, { "epoch": 2.8, "learning_rate": 3.5835422504055452e-06, "loss": 0.0, "step": 6798 }, { "epoch": 2.8, "learning_rate": 3.5687951629553164e-06, "loss": 0.0001, "step": 6800 }, { "epoch": 2.8, "learning_rate": 3.5540480755050876e-06, "loss": 0.0, "step": 6802 }, { "epoch": 2.8, "learning_rate": 3.539300988054859e-06, "loss": 0.0, "step": 6804 }, { "epoch": 2.8, "learning_rate": 3.524553900604631e-06, "loss": 0.0001, "step": 6806 }, { "epoch": 2.81, "learning_rate": 3.5098068131544026e-06, "loss": 0.0, "step": 6808 }, { "epoch": 2.81, "learning_rate": 3.4950597257041738e-06, "loss": 0.0, "step": 6810 }, { "epoch": 2.81, "learning_rate": 3.480312638253945e-06, "loss": 0.0, "step": 6812 }, { "epoch": 2.81, "learning_rate": 3.465565550803716e-06, "loss": 0.0, "step": 6814 }, { "epoch": 2.81, "learning_rate": 3.450818463353488e-06, "loss": 0.0, "step": 6816 }, { "epoch": 2.81, "learning_rate": 3.436071375903259e-06, "loss": 0.0, "step": 6818 }, { "epoch": 2.81, "learning_rate": 3.421324288453031e-06, "loss": 0.0, "step": 6820 }, { "epoch": 2.81, "learning_rate": 3.4065772010028023e-06, "loss": 0.0, "step": 6822 }, { "epoch": 2.81, "learning_rate": 3.3918301135525735e-06, "loss": 0.0, "step": 6824 }, { "epoch": 2.81, "learning_rate": 3.3770830261023447e-06, "loss": 0.0, "step": 6826 }, { "epoch": 2.81, "learning_rate": 3.3623359386521164e-06, "loss": 0.0, "step": 6828 }, { "epoch": 2.81, "learning_rate": 3.3475888512018876e-06, "loss": 0.0, "step": 6830 }, { "epoch": 2.81, "learning_rate": 3.332841763751659e-06, "loss": 0.0, "step": 6832 }, { "epoch": 2.82, "learning_rate": 3.318094676301431e-06, "loss": 0.0, "step": 6834 }, { "epoch": 2.82, "learning_rate": 3.303347588851202e-06, "loss": 0.0, "step": 6836 }, { "epoch": 2.82, "learning_rate": 3.2886005014009737e-06, "loss": 0.0, "step": 6838 }, { "epoch": 2.82, "learning_rate": 3.273853413950745e-06, "loss": 0.0, "step": 6840 }, { "epoch": 2.82, "learning_rate": 3.259106326500516e-06, "loss": 0.0, "step": 6842 }, { "epoch": 2.82, "learning_rate": 3.2443592390502874e-06, "loss": 0.0, "step": 6844 }, { "epoch": 2.82, "learning_rate": 3.229612151600059e-06, "loss": 0.0, "step": 6846 }, { "epoch": 2.82, "learning_rate": 3.2148650641498306e-06, "loss": 0.016, "step": 6848 }, { "epoch": 2.82, "learning_rate": 3.2001179766996023e-06, "loss": 0.0, "step": 6850 }, { "epoch": 2.82, "learning_rate": 3.1853708892493735e-06, "loss": 0.0, "step": 6852 }, { "epoch": 2.82, "learning_rate": 3.1706238017991447e-06, "loss": 0.0001, "step": 6854 }, { "epoch": 2.82, "learning_rate": 3.155876714348916e-06, "loss": 0.0, "step": 6856 }, { "epoch": 2.83, "learning_rate": 3.1411296268986875e-06, "loss": 0.0, "step": 6858 }, { "epoch": 2.83, "learning_rate": 3.1263825394484588e-06, "loss": 0.0, "step": 6860 }, { "epoch": 2.83, "learning_rate": 3.1116354519982304e-06, "loss": 0.0, "step": 6862 }, { "epoch": 2.83, "learning_rate": 3.096888364548002e-06, "loss": 0.0, "step": 6864 }, { "epoch": 2.83, "learning_rate": 3.0821412770977732e-06, "loss": 0.0, "step": 6866 }, { "epoch": 2.83, "learning_rate": 3.067394189647545e-06, "loss": 0.0, "step": 6868 }, { "epoch": 2.83, "learning_rate": 3.052647102197316e-06, "loss": 0.0, "step": 6870 }, { "epoch": 2.83, "learning_rate": 3.0379000147470877e-06, "loss": 0.0, "step": 6872 }, { "epoch": 2.83, "learning_rate": 3.023152927296859e-06, "loss": 0.0, "step": 6874 }, { "epoch": 2.83, "learning_rate": 3.00840583984663e-06, "loss": 0.0, "step": 6876 }, { "epoch": 2.83, "learning_rate": 2.9936587523964022e-06, "loss": 0.0, "step": 6878 }, { "epoch": 2.83, "learning_rate": 2.9789116649461734e-06, "loss": 0.0, "step": 6880 }, { "epoch": 2.84, "learning_rate": 2.9641645774959446e-06, "loss": 0.0, "step": 6882 }, { "epoch": 2.84, "learning_rate": 2.949417490045716e-06, "loss": 0.0, "step": 6884 }, { "epoch": 2.84, "learning_rate": 2.9346704025954875e-06, "loss": 0.0, "step": 6886 }, { "epoch": 2.84, "learning_rate": 2.919923315145259e-06, "loss": 0.0001, "step": 6888 }, { "epoch": 2.84, "learning_rate": 2.9051762276950303e-06, "loss": 0.0, "step": 6890 }, { "epoch": 2.84, "learning_rate": 2.890429140244802e-06, "loss": 0.0, "step": 6892 }, { "epoch": 2.84, "learning_rate": 2.875682052794573e-06, "loss": 0.0001, "step": 6894 }, { "epoch": 2.84, "learning_rate": 2.8609349653443444e-06, "loss": 0.0, "step": 6896 }, { "epoch": 2.84, "learning_rate": 2.846187877894116e-06, "loss": 0.0, "step": 6898 }, { "epoch": 2.84, "learning_rate": 2.8314407904438877e-06, "loss": 0.0, "step": 6900 }, { "epoch": 2.84, "learning_rate": 2.816693702993659e-06, "loss": 0.0, "step": 6902 }, { "epoch": 2.84, "learning_rate": 2.80194661554343e-06, "loss": 0.0, "step": 6904 }, { "epoch": 2.85, "learning_rate": 2.7871995280932017e-06, "loss": 0.0, "step": 6906 }, { "epoch": 2.85, "learning_rate": 2.7724524406429734e-06, "loss": 0.0, "step": 6908 }, { "epoch": 2.85, "learning_rate": 2.7577053531927446e-06, "loss": 0.0, "step": 6910 }, { "epoch": 2.85, "learning_rate": 2.742958265742516e-06, "loss": 0.0, "step": 6912 }, { "epoch": 2.85, "learning_rate": 2.7282111782922874e-06, "loss": 0.0, "step": 6914 }, { "epoch": 2.85, "learning_rate": 2.7134640908420587e-06, "loss": 0.0, "step": 6916 }, { "epoch": 2.85, "learning_rate": 2.6987170033918303e-06, "loss": 0.0, "step": 6918 }, { "epoch": 2.85, "learning_rate": 2.683969915941602e-06, "loss": 0.0, "step": 6920 }, { "epoch": 2.85, "learning_rate": 2.669222828491373e-06, "loss": 0.0, "step": 6922 }, { "epoch": 2.85, "learning_rate": 2.6544757410411444e-06, "loss": 0.0, "step": 6924 }, { "epoch": 2.85, "learning_rate": 2.6397286535909156e-06, "loss": 0.0, "step": 6926 }, { "epoch": 2.85, "learning_rate": 2.6249815661406876e-06, "loss": 0.0, "step": 6928 }, { "epoch": 2.86, "learning_rate": 2.610234478690459e-06, "loss": 0.0, "step": 6930 }, { "epoch": 2.86, "learning_rate": 2.59548739124023e-06, "loss": 0.0, "step": 6932 }, { "epoch": 2.86, "learning_rate": 2.5807403037900017e-06, "loss": 0.0, "step": 6934 }, { "epoch": 2.86, "learning_rate": 2.565993216339773e-06, "loss": 0.0, "step": 6936 }, { "epoch": 2.86, "learning_rate": 2.5512461288895445e-06, "loss": 0.0, "step": 6938 }, { "epoch": 2.86, "learning_rate": 2.5364990414393158e-06, "loss": 0.0, "step": 6940 }, { "epoch": 2.86, "learning_rate": 2.5217519539890874e-06, "loss": 0.0, "step": 6942 }, { "epoch": 2.86, "learning_rate": 2.5070048665388586e-06, "loss": 0.0, "step": 6944 }, { "epoch": 2.86, "learning_rate": 2.49225777908863e-06, "loss": 0.0, "step": 6946 }, { "epoch": 2.86, "learning_rate": 2.4775106916384015e-06, "loss": 0.0, "step": 6948 }, { "epoch": 2.86, "learning_rate": 2.462763604188173e-06, "loss": 0.0, "step": 6950 }, { "epoch": 2.86, "learning_rate": 2.4480165167379443e-06, "loss": 0.0, "step": 6952 }, { "epoch": 2.87, "learning_rate": 2.433269429287716e-06, "loss": 0.0002, "step": 6954 }, { "epoch": 2.87, "learning_rate": 2.418522341837487e-06, "loss": 0.0, "step": 6956 }, { "epoch": 2.87, "learning_rate": 2.403775254387259e-06, "loss": 0.0, "step": 6958 }, { "epoch": 2.87, "learning_rate": 2.38902816693703e-06, "loss": 0.0, "step": 6960 }, { "epoch": 2.87, "learning_rate": 2.3742810794868016e-06, "loss": 0.0, "step": 6962 }, { "epoch": 2.87, "learning_rate": 2.359533992036573e-06, "loss": 0.0, "step": 6964 }, { "epoch": 2.87, "learning_rate": 2.344786904586344e-06, "loss": 0.0, "step": 6966 }, { "epoch": 2.87, "learning_rate": 2.3300398171361157e-06, "loss": 0.0, "step": 6968 }, { "epoch": 2.87, "learning_rate": 2.3152927296858873e-06, "loss": 0.0, "step": 6970 }, { "epoch": 2.87, "learning_rate": 2.3005456422356586e-06, "loss": 0.0, "step": 6972 }, { "epoch": 2.87, "learning_rate": 2.2857985547854298e-06, "loss": 0.0, "step": 6974 }, { "epoch": 2.87, "learning_rate": 2.2710514673352014e-06, "loss": 0.0001, "step": 6976 }, { "epoch": 2.88, "learning_rate": 2.256304379884973e-06, "loss": 0.0, "step": 6978 }, { "epoch": 2.88, "learning_rate": 2.2415572924347443e-06, "loss": 0.0, "step": 6980 }, { "epoch": 2.88, "learning_rate": 2.226810204984516e-06, "loss": 0.0, "step": 6982 }, { "epoch": 2.88, "learning_rate": 2.212063117534287e-06, "loss": 0.0, "step": 6984 }, { "epoch": 2.88, "learning_rate": 2.1973160300840583e-06, "loss": 0.0, "step": 6986 }, { "epoch": 2.88, "learning_rate": 2.18256894263383e-06, "loss": 0.0, "step": 6988 }, { "epoch": 2.88, "learning_rate": 2.1678218551836016e-06, "loss": 0.0, "step": 6990 }, { "epoch": 2.88, "learning_rate": 2.153074767733373e-06, "loss": 0.0, "step": 6992 }, { "epoch": 2.88, "learning_rate": 2.138327680283144e-06, "loss": 0.0, "step": 6994 }, { "epoch": 2.88, "learning_rate": 2.1235805928329157e-06, "loss": 0.0, "step": 6996 }, { "epoch": 2.88, "learning_rate": 2.1088335053826873e-06, "loss": 0.0, "step": 6998 }, { "epoch": 2.88, "learning_rate": 2.0940864179324585e-06, "loss": 0.0002, "step": 7000 }, { "epoch": 2.89, "learning_rate": 2.0793393304822297e-06, "loss": 0.0, "step": 7002 }, { "epoch": 2.89, "learning_rate": 2.0645922430320014e-06, "loss": 0.0, "step": 7004 }, { "epoch": 2.89, "learning_rate": 2.0498451555817726e-06, "loss": 0.0, "step": 7006 }, { "epoch": 2.89, "learning_rate": 2.035098068131544e-06, "loss": 0.0, "step": 7008 }, { "epoch": 2.89, "learning_rate": 2.020350980681316e-06, "loss": 0.0, "step": 7010 }, { "epoch": 2.89, "learning_rate": 2.005603893231087e-06, "loss": 0.0, "step": 7012 }, { "epoch": 2.89, "learning_rate": 1.9908568057808583e-06, "loss": 0.0, "step": 7014 }, { "epoch": 2.89, "learning_rate": 1.9761097183306295e-06, "loss": 0.0, "step": 7016 }, { "epoch": 2.89, "learning_rate": 1.961362630880401e-06, "loss": 0.0, "step": 7018 }, { "epoch": 2.89, "learning_rate": 1.9466155434301728e-06, "loss": 0.0, "step": 7020 }, { "epoch": 2.89, "learning_rate": 1.931868455979944e-06, "loss": 0.0, "step": 7022 }, { "epoch": 2.89, "learning_rate": 1.9171213685297156e-06, "loss": 0.0, "step": 7024 }, { "epoch": 2.89, "learning_rate": 1.902374281079487e-06, "loss": 0.0, "step": 7026 }, { "epoch": 2.9, "learning_rate": 1.8876271936292582e-06, "loss": 0.0, "step": 7028 }, { "epoch": 2.9, "learning_rate": 1.8728801061790297e-06, "loss": 0.0001, "step": 7030 }, { "epoch": 2.9, "learning_rate": 1.8581330187288013e-06, "loss": 0.0, "step": 7032 }, { "epoch": 2.9, "learning_rate": 1.8433859312785725e-06, "loss": 0.0, "step": 7034 }, { "epoch": 2.9, "learning_rate": 1.828638843828344e-06, "loss": 0.0, "step": 7036 }, { "epoch": 2.9, "learning_rate": 1.8138917563781156e-06, "loss": 0.0, "step": 7038 }, { "epoch": 2.9, "learning_rate": 1.7991446689278868e-06, "loss": 0.0, "step": 7040 }, { "epoch": 2.9, "learning_rate": 1.7843975814776582e-06, "loss": 0.0, "step": 7042 }, { "epoch": 2.9, "learning_rate": 1.7696504940274294e-06, "loss": 0.0, "step": 7044 }, { "epoch": 2.9, "learning_rate": 1.7549034065772013e-06, "loss": 0.0, "step": 7046 }, { "epoch": 2.9, "learning_rate": 1.7401563191269725e-06, "loss": 0.0, "step": 7048 }, { "epoch": 2.9, "learning_rate": 1.732782775401858e-06, "loss": 0.0157, "step": 7050 }, { "epoch": 2.91, "learning_rate": 1.7180356879516295e-06, "loss": 0.0, "step": 7052 }, { "epoch": 2.91, "learning_rate": 1.7032886005014012e-06, "loss": 0.0, "step": 7054 }, { "epoch": 2.91, "learning_rate": 1.6885415130511724e-06, "loss": 0.0, "step": 7056 }, { "epoch": 2.91, "learning_rate": 1.6737944256009438e-06, "loss": 0.0, "step": 7058 }, { "epoch": 2.91, "learning_rate": 1.6590473381507154e-06, "loss": 0.0, "step": 7060 }, { "epoch": 2.91, "learning_rate": 1.6443002507004869e-06, "loss": 0.0, "step": 7062 }, { "epoch": 2.91, "learning_rate": 1.629553163250258e-06, "loss": 0.0, "step": 7064 }, { "epoch": 2.91, "learning_rate": 1.6148060758000295e-06, "loss": 0.0, "step": 7066 }, { "epoch": 2.91, "learning_rate": 1.6000589883498011e-06, "loss": 0.0001, "step": 7068 }, { "epoch": 2.91, "learning_rate": 1.5853119008995723e-06, "loss": 0.0, "step": 7070 }, { "epoch": 2.91, "learning_rate": 1.5705648134493438e-06, "loss": 0.0, "step": 7072 }, { "epoch": 2.91, "learning_rate": 1.5558177259991152e-06, "loss": 0.0, "step": 7074 }, { "epoch": 2.92, "learning_rate": 1.5410706385488866e-06, "loss": 0.0, "step": 7076 }, { "epoch": 2.92, "learning_rate": 1.526323551098658e-06, "loss": 0.0, "step": 7078 }, { "epoch": 2.92, "learning_rate": 1.5115764636484295e-06, "loss": 0.0, "step": 7080 }, { "epoch": 2.92, "learning_rate": 1.4968293761982011e-06, "loss": 0.0, "step": 7082 }, { "epoch": 2.92, "learning_rate": 1.4820822887479723e-06, "loss": 0.0, "step": 7084 }, { "epoch": 2.92, "learning_rate": 1.4673352012977437e-06, "loss": 0.0, "step": 7086 }, { "epoch": 2.92, "learning_rate": 1.4525881138475152e-06, "loss": 0.0, "step": 7088 }, { "epoch": 2.92, "learning_rate": 1.4378410263972866e-06, "loss": 0.0001, "step": 7090 }, { "epoch": 2.92, "learning_rate": 1.423093938947058e-06, "loss": 0.0, "step": 7092 }, { "epoch": 2.92, "learning_rate": 1.4083468514968294e-06, "loss": 0.0, "step": 7094 }, { "epoch": 2.92, "learning_rate": 1.3935997640466009e-06, "loss": 0.0, "step": 7096 }, { "epoch": 2.92, "learning_rate": 1.3788526765963723e-06, "loss": 0.0, "step": 7098 }, { "epoch": 2.93, "learning_rate": 1.3641055891461437e-06, "loss": 0.0, "step": 7100 }, { "epoch": 2.93, "learning_rate": 1.3493585016959151e-06, "loss": 0.0, "step": 7102 }, { "epoch": 2.93, "learning_rate": 1.3346114142456866e-06, "loss": 0.0, "step": 7104 }, { "epoch": 2.93, "learning_rate": 1.3198643267954578e-06, "loss": 0.0, "step": 7106 }, { "epoch": 2.93, "learning_rate": 1.3051172393452294e-06, "loss": 0.0, "step": 7108 }, { "epoch": 2.93, "learning_rate": 1.2903701518950008e-06, "loss": 0.0001, "step": 7110 }, { "epoch": 2.93, "learning_rate": 1.2756230644447723e-06, "loss": 0.0, "step": 7112 }, { "epoch": 2.93, "learning_rate": 1.2608759769945437e-06, "loss": 0.0, "step": 7114 }, { "epoch": 2.93, "learning_rate": 1.246128889544315e-06, "loss": 0.0, "step": 7116 }, { "epoch": 2.93, "learning_rate": 1.2313818020940865e-06, "loss": 0.0, "step": 7118 }, { "epoch": 2.93, "learning_rate": 1.216634714643858e-06, "loss": 0.0001, "step": 7120 }, { "epoch": 2.93, "learning_rate": 1.2018876271936294e-06, "loss": 0.0002, "step": 7122 }, { "epoch": 2.94, "learning_rate": 1.1871405397434008e-06, "loss": 0.0, "step": 7124 }, { "epoch": 2.94, "learning_rate": 1.172393452293172e-06, "loss": 0.0, "step": 7126 }, { "epoch": 2.94, "learning_rate": 1.1576463648429437e-06, "loss": 0.0, "step": 7128 }, { "epoch": 2.94, "learning_rate": 1.1428992773927149e-06, "loss": 0.0, "step": 7130 }, { "epoch": 2.94, "learning_rate": 1.1281521899424865e-06, "loss": 0.0, "step": 7132 }, { "epoch": 2.94, "learning_rate": 1.113405102492258e-06, "loss": 0.0, "step": 7134 }, { "epoch": 2.94, "learning_rate": 1.0986580150420292e-06, "loss": 0.0, "step": 7136 }, { "epoch": 2.94, "learning_rate": 1.0839109275918008e-06, "loss": 0.0, "step": 7138 }, { "epoch": 2.94, "learning_rate": 1.069163840141572e-06, "loss": 0.0009, "step": 7140 }, { "epoch": 2.94, "learning_rate": 1.0544167526913436e-06, "loss": 0.0, "step": 7142 }, { "epoch": 2.94, "learning_rate": 1.0396696652411149e-06, "loss": 0.0, "step": 7144 }, { "epoch": 2.94, "learning_rate": 1.0249225777908863e-06, "loss": 0.0, "step": 7146 }, { "epoch": 2.95, "learning_rate": 1.010175490340658e-06, "loss": 0.0, "step": 7148 }, { "epoch": 2.95, "learning_rate": 9.954284028904291e-07, "loss": 0.0, "step": 7150 }, { "epoch": 2.95, "learning_rate": 9.806813154402006e-07, "loss": 0.0, "step": 7152 }, { "epoch": 2.95, "learning_rate": 9.65934227989972e-07, "loss": 0.0, "step": 7154 }, { "epoch": 2.95, "learning_rate": 9.511871405397435e-07, "loss": 0.0, "step": 7156 }, { "epoch": 2.95, "learning_rate": 9.364400530895148e-07, "loss": 0.0, "step": 7158 }, { "epoch": 2.95, "learning_rate": 9.216929656392863e-07, "loss": 0.0, "step": 7160 }, { "epoch": 2.95, "learning_rate": 9.069458781890578e-07, "loss": 0.0, "step": 7162 }, { "epoch": 2.95, "learning_rate": 8.921987907388291e-07, "loss": 0.0, "step": 7164 }, { "epoch": 2.95, "learning_rate": 8.774517032886006e-07, "loss": 0.0, "step": 7166 }, { "epoch": 2.95, "learning_rate": 8.62704615838372e-07, "loss": 0.0, "step": 7168 }, { "epoch": 2.95, "learning_rate": 8.479575283881434e-07, "loss": 0.0, "step": 7170 }, { "epoch": 2.96, "learning_rate": 8.332104409379147e-07, "loss": 0.0, "step": 7172 }, { "epoch": 2.96, "learning_rate": 8.184633534876862e-07, "loss": 0.0, "step": 7174 }, { "epoch": 2.96, "learning_rate": 8.037162660374577e-07, "loss": 0.0, "step": 7176 }, { "epoch": 2.96, "learning_rate": 7.88969178587229e-07, "loss": 0.0001, "step": 7178 }, { "epoch": 2.96, "learning_rate": 7.742220911370005e-07, "loss": 0.0, "step": 7180 }, { "epoch": 2.96, "learning_rate": 7.594750036867719e-07, "loss": 0.0, "step": 7182 }, { "epoch": 2.96, "learning_rate": 7.447279162365434e-07, "loss": 0.0, "step": 7184 }, { "epoch": 2.96, "learning_rate": 7.299808287863148e-07, "loss": 0.0, "step": 7186 }, { "epoch": 2.96, "learning_rate": 7.152337413360861e-07, "loss": 0.0, "step": 7188 }, { "epoch": 2.96, "learning_rate": 7.004866538858575e-07, "loss": 0.0, "step": 7190 }, { "epoch": 2.96, "learning_rate": 6.85739566435629e-07, "loss": 0.0, "step": 7192 }, { "epoch": 2.96, "learning_rate": 6.709924789854005e-07, "loss": 0.0, "step": 7194 }, { "epoch": 2.96, "learning_rate": 6.562453915351719e-07, "loss": 0.0001, "step": 7196 }, { "epoch": 2.97, "learning_rate": 6.414983040849432e-07, "loss": 0.0, "step": 7198 }, { "epoch": 2.97, "learning_rate": 6.267512166347147e-07, "loss": 0.0, "step": 7200 }, { "epoch": 2.97, "learning_rate": 6.120041291844861e-07, "loss": 0.0, "step": 7202 }, { "epoch": 2.97, "learning_rate": 5.972570417342575e-07, "loss": 0.0, "step": 7204 }, { "epoch": 2.97, "learning_rate": 5.825099542840289e-07, "loss": 0.0, "step": 7206 }, { "epoch": 2.97, "learning_rate": 5.677628668338004e-07, "loss": 0.0, "step": 7208 }, { "epoch": 2.97, "learning_rate": 5.530157793835718e-07, "loss": 0.0, "step": 7210 }, { "epoch": 2.97, "learning_rate": 5.382686919333432e-07, "loss": 0.0, "step": 7212 }, { "epoch": 2.97, "learning_rate": 5.235216044831146e-07, "loss": 0.0, "step": 7214 }, { "epoch": 2.97, "learning_rate": 5.08774517032886e-07, "loss": 0.0, "step": 7216 }, { "epoch": 2.97, "learning_rate": 4.940274295826574e-07, "loss": 0.0, "step": 7218 }, { "epoch": 2.97, "learning_rate": 4.792803421324289e-07, "loss": 0.0, "step": 7220 }, { "epoch": 2.98, "learning_rate": 4.645332546822003e-07, "loss": 0.0, "step": 7222 }, { "epoch": 2.98, "learning_rate": 4.497861672319717e-07, "loss": 0.0, "step": 7224 }, { "epoch": 2.98, "learning_rate": 4.350390797817431e-07, "loss": 0.0, "step": 7226 }, { "epoch": 2.98, "learning_rate": 4.2029199233151455e-07, "loss": 0.0, "step": 7228 }, { "epoch": 2.98, "learning_rate": 4.055449048812859e-07, "loss": 0.0, "step": 7230 }, { "epoch": 2.98, "learning_rate": 3.9079781743105734e-07, "loss": 0.0, "step": 7232 }, { "epoch": 2.98, "learning_rate": 3.7605072998082877e-07, "loss": 0.0, "step": 7234 }, { "epoch": 2.98, "learning_rate": 3.6130364253060025e-07, "loss": 0.0, "step": 7236 }, { "epoch": 2.98, "learning_rate": 3.4655655508037167e-07, "loss": 0.0, "step": 7238 }, { "epoch": 2.98, "learning_rate": 3.3180946763014304e-07, "loss": 0.0, "step": 7240 }, { "epoch": 2.98, "learning_rate": 3.1706238017991447e-07, "loss": 0.0, "step": 7242 }, { "epoch": 2.98, "learning_rate": 3.023152927296859e-07, "loss": 0.0001, "step": 7244 }, { "epoch": 2.99, "learning_rate": 2.875682052794573e-07, "loss": 0.0, "step": 7246 }, { "epoch": 2.99, "learning_rate": 2.7282111782922874e-07, "loss": 0.0001, "step": 7248 }, { "epoch": 2.99, "learning_rate": 2.5807403037900017e-07, "loss": 0.0, "step": 7250 }, { "epoch": 2.99, "learning_rate": 2.433269429287716e-07, "loss": 0.0, "step": 7252 }, { "epoch": 2.99, "learning_rate": 2.28579855478543e-07, "loss": 0.0, "step": 7254 }, { "epoch": 2.99, "learning_rate": 2.1383276802831444e-07, "loss": 0.0, "step": 7256 }, { "epoch": 2.99, "learning_rate": 1.9908568057808584e-07, "loss": 0.0, "step": 7258 }, { "epoch": 2.99, "learning_rate": 1.8433859312785724e-07, "loss": 0.0002, "step": 7260 }, { "epoch": 2.99, "learning_rate": 1.6959150567762867e-07, "loss": 0.0001, "step": 7262 }, { "epoch": 2.99, "learning_rate": 1.548444182274001e-07, "loss": 0.0, "step": 7264 }, { "epoch": 2.99, "learning_rate": 1.4009733077717152e-07, "loss": 0.0, "step": 7266 }, { "epoch": 2.99, "learning_rate": 1.2535024332694294e-07, "loss": 0.0, "step": 7268 }, { "epoch": 3.0, "learning_rate": 1.1060315587671434e-07, "loss": 0.0, "step": 7270 }, { "epoch": 3.0, "learning_rate": 9.585606842648578e-08, "loss": 0.0, "step": 7272 }, { "epoch": 3.0, "learning_rate": 8.110898097625719e-08, "loss": 0.0, "step": 7274 }, { "epoch": 3.0, "learning_rate": 6.636189352602861e-08, "loss": 0.0, "step": 7276 }, { "epoch": 3.0, "learning_rate": 5.161480607580003e-08, "loss": 0.0, "step": 7278 }, { "epoch": 3.0, "learning_rate": 3.686771862557145e-08, "loss": 0.0, "step": 7280 } ], "max_steps": 7281, "num_train_epochs": 3, "total_flos": 2.563609032226944e+16, "trial_name": null, "trial_params": null }