{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9998557067962099, "eval_steps": 500, "global_step": 5197, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 8.974358974358975e-07, "loss": 2.9732, "loss_": 1.436, "moe_loss": 0.1675, "moe_loss_longrong": 1.4982, "step": 7 }, { "epoch": 0.0, "learning_rate": 1.794871794871795e-06, "loss": 2.9414, "loss_": 1.3375, "moe_loss": 0.1674, "moe_loss_longrong": 1.4964, "step": 14 }, { "epoch": 0.0, "learning_rate": 2.6923076923076923e-06, "loss": 2.8682, "loss_": 1.1438, "moe_loss": 0.1663, "moe_loss_longrong": 1.4917, "step": 21 }, { "epoch": 0.01, "learning_rate": 3.58974358974359e-06, "loss": 2.806, "loss_": 1.2312, "moe_loss": 0.1655, "moe_loss_longrong": 1.488, "step": 28 }, { "epoch": 0.01, "learning_rate": 4.487179487179488e-06, "loss": 2.7734, "loss_": 1.2285, "moe_loss": 0.1645, "moe_loss_longrong": 1.4882, "step": 35 }, { "epoch": 0.01, "learning_rate": 5.384615384615385e-06, "loss": 2.7314, "loss_": 1.0526, "moe_loss": 0.1635, "moe_loss_longrong": 1.4847, "step": 42 }, { "epoch": 0.01, "learning_rate": 6.282051282051282e-06, "loss": 2.6961, "loss_": 0.9249, "moe_loss": 0.1627, "moe_loss_longrong": 1.4803, "step": 49 }, { "epoch": 0.01, "learning_rate": 7.17948717948718e-06, "loss": 2.6779, "loss_": 0.9535, "moe_loss": 0.1665, "moe_loss_longrong": 1.5082, "step": 56 }, { "epoch": 0.01, "learning_rate": 8.076923076923077e-06, "loss": 2.6983, "loss_": 1.1541, "moe_loss": 0.1617, "moe_loss_longrong": 1.4777, "step": 63 }, { "epoch": 0.01, "learning_rate": 8.974358974358976e-06, "loss": 2.6718, "loss_": 1.0194, "moe_loss": 0.1613, "moe_loss_longrong": 1.4718, "step": 70 }, { "epoch": 0.01, "learning_rate": 9.871794871794872e-06, "loss": 2.6443, "loss_": 0.7302, "moe_loss": 0.1615, "moe_loss_longrong": 1.4723, "step": 77 }, { "epoch": 0.02, "learning_rate": 1.076923076923077e-05, "loss": 2.7002, "loss_": 1.1497, "moe_loss": 0.1615, "moe_loss_longrong": 1.4679, "step": 84 }, { "epoch": 0.02, "learning_rate": 1.1666666666666668e-05, "loss": 2.6528, "loss_": 1.114, "moe_loss": 0.1613, "moe_loss_longrong": 1.4653, "step": 91 }, { "epoch": 0.02, "learning_rate": 1.2564102564102565e-05, "loss": 2.667, "loss_": 1.0337, "moe_loss": 0.1614, "moe_loss_longrong": 1.466, "step": 98 }, { "epoch": 0.02, "learning_rate": 1.3461538461538463e-05, "loss": 2.61, "loss_": 0.8435, "moe_loss": 0.1643, "moe_loss_longrong": 1.4985, "step": 105 }, { "epoch": 0.02, "learning_rate": 1.435897435897436e-05, "loss": 2.6878, "loss_": 1.2116, "moe_loss": 0.1611, "moe_loss_longrong": 1.4619, "step": 112 }, { "epoch": 0.02, "learning_rate": 1.5256410256410257e-05, "loss": 2.6614, "loss_": 1.2295, "moe_loss": 0.1612, "moe_loss_longrong": 1.4628, "step": 119 }, { "epoch": 0.02, "learning_rate": 1.6153846153846154e-05, "loss": 2.6541, "loss_": 1.087, "moe_loss": 0.1612, "moe_loss_longrong": 1.4593, "step": 126 }, { "epoch": 0.03, "learning_rate": 1.7051282051282053e-05, "loss": 2.6268, "loss_": 1.0603, "moe_loss": 0.1611, "moe_loss_longrong": 1.4563, "step": 133 }, { "epoch": 0.03, "learning_rate": 1.794871794871795e-05, "loss": 2.6591, "loss_": 1.1362, "moe_loss": 0.1611, "moe_loss_longrong": 1.4566, "step": 140 }, { "epoch": 0.03, "learning_rate": 1.8846153846153846e-05, "loss": 2.6675, "loss_": 0.9348, "moe_loss": 0.1611, "moe_loss_longrong": 1.4551, "step": 147 }, { "epoch": 0.03, "learning_rate": 1.9743589743589745e-05, "loss": 2.6317, "loss_": 1.1054, "moe_loss": 0.1611, "moe_loss_longrong": 1.4542, "step": 154 }, { "epoch": 0.03, "learning_rate": 1.999995145147809e-05, "loss": 2.658, "loss_": 0.9526, "moe_loss": 0.1611, "moe_loss_longrong": 1.4514, "step": 161 }, { "epoch": 0.03, "learning_rate": 1.9999720361590812e-05, "loss": 2.6381, "loss_": 0.9977, "moe_loss": 0.1615, "moe_loss_longrong": 1.4506, "step": 168 }, { "epoch": 0.03, "learning_rate": 1.9999298966967264e-05, "loss": 2.6193, "loss_": 0.8738, "moe_loss": 0.1612, "moe_loss_longrong": 1.4527, "step": 175 }, { "epoch": 0.04, "learning_rate": 1.9998687275627008e-05, "loss": 2.617, "loss_": 1.0383, "moe_loss": 0.1612, "moe_loss_longrong": 1.4503, "step": 182 }, { "epoch": 0.04, "learning_rate": 1.999788529921114e-05, "loss": 2.6334, "loss_": 0.9878, "moe_loss": 0.1611, "moe_loss_longrong": 1.4484, "step": 189 }, { "epoch": 0.04, "learning_rate": 1.9996893052982083e-05, "loss": 2.6288, "loss_": 0.9578, "moe_loss": 0.161, "moe_loss_longrong": 1.4476, "step": 196 }, { "epoch": 0.04, "learning_rate": 1.9995710555823277e-05, "loss": 2.6573, "loss_": 1.0551, "moe_loss": 0.1608, "moe_loss_longrong": 1.4478, "step": 203 }, { "epoch": 0.04, "learning_rate": 1.9994337830238836e-05, "loss": 2.6195, "loss_": 1.2421, "moe_loss": 0.1609, "moe_loss_longrong": 1.4449, "step": 210 }, { "epoch": 0.04, "learning_rate": 1.9992774902353104e-05, "loss": 2.5979, "loss_": 1.0235, "moe_loss": 0.1627, "moe_loss_longrong": 1.4802, "step": 217 }, { "epoch": 0.04, "learning_rate": 1.9991021801910177e-05, "loss": 2.6143, "loss_": 1.1486, "moe_loss": 0.1608, "moe_loss_longrong": 1.4443, "step": 224 }, { "epoch": 0.04, "learning_rate": 1.9989078562273313e-05, "loss": 2.6047, "loss_": 0.9541, "moe_loss": 0.161, "moe_loss_longrong": 1.4455, "step": 231 }, { "epoch": 0.05, "learning_rate": 1.9986945220424326e-05, "loss": 2.6336, "loss_": 1.1406, "moe_loss": 0.1607, "moe_loss_longrong": 1.4428, "step": 238 }, { "epoch": 0.05, "learning_rate": 1.9984621816962843e-05, "loss": 2.6217, "loss_": 1.0207, "moe_loss": 0.1608, "moe_loss_longrong": 1.4447, "step": 245 }, { "epoch": 0.05, "learning_rate": 1.9982108396105584e-05, "loss": 2.6014, "loss_": 1.3744, "moe_loss": 0.1608, "moe_loss_longrong": 1.4426, "step": 252 }, { "epoch": 0.05, "learning_rate": 1.9979405005685466e-05, "loss": 2.6134, "loss_": 0.9548, "moe_loss": 0.1609, "moe_loss_longrong": 1.4415, "step": 259 }, { "epoch": 0.05, "learning_rate": 1.997651169715073e-05, "loss": 2.6022, "loss_": 1.058, "moe_loss": 0.1607, "moe_loss_longrong": 1.4413, "step": 266 }, { "epoch": 0.05, "learning_rate": 1.9973428525563948e-05, "loss": 2.6219, "loss_": 1.1897, "moe_loss": 0.1607, "moe_loss_longrong": 1.4404, "step": 273 }, { "epoch": 0.05, "learning_rate": 1.9970155549600978e-05, "loss": 2.6232, "loss_": 1.25, "moe_loss": 0.1607, "moe_loss_longrong": 1.4401, "step": 280 }, { "epoch": 0.06, "learning_rate": 1.996669283154984e-05, "loss": 2.5805, "loss_": 1.138, "moe_loss": 0.1607, "moe_loss_longrong": 1.4387, "step": 287 }, { "epoch": 0.06, "learning_rate": 1.996304043730955e-05, "loss": 2.6188, "loss_": 1.2563, "moe_loss": 0.1607, "moe_loss_longrong": 1.4386, "step": 294 }, { "epoch": 0.06, "learning_rate": 1.995919843638883e-05, "loss": 2.5867, "loss_": 0.9975, "moe_loss": 0.1607, "moe_loss_longrong": 1.4387, "step": 301 }, { "epoch": 0.06, "learning_rate": 1.9955166901904838e-05, "loss": 2.5987, "loss_": 1.157, "moe_loss": 0.1607, "moe_loss_longrong": 1.4366, "step": 308 }, { "epoch": 0.06, "learning_rate": 1.9950945910581718e-05, "loss": 2.5875, "loss_": 1.0582, "moe_loss": 0.1606, "moe_loss_longrong": 1.4378, "step": 315 }, { "epoch": 0.06, "learning_rate": 1.9946535542749187e-05, "loss": 2.5848, "loss_": 0.9934, "moe_loss": 0.1624, "moe_loss_longrong": 1.4703, "step": 322 }, { "epoch": 0.06, "learning_rate": 1.9941935882340976e-05, "loss": 2.6086, "loss_": 0.8756, "moe_loss": 0.1607, "moe_loss_longrong": 1.4366, "step": 329 }, { "epoch": 0.06, "learning_rate": 1.9937147016893257e-05, "loss": 2.5968, "loss_": 1.1941, "moe_loss": 0.1608, "moe_loss_longrong": 1.4376, "step": 336 }, { "epoch": 0.07, "learning_rate": 1.9932169037542947e-05, "loss": 2.6158, "loss_": 0.9761, "moe_loss": 0.1606, "moe_loss_longrong": 1.4358, "step": 343 }, { "epoch": 0.07, "learning_rate": 1.9927002039026002e-05, "loss": 2.5944, "loss_": 1.2162, "moe_loss": 0.1605, "moe_loss_longrong": 1.4346, "step": 350 }, { "epoch": 0.07, "learning_rate": 1.9921646119675606e-05, "loss": 2.5806, "loss_": 0.8511, "moe_loss": 0.1606, "moe_loss_longrong": 1.4358, "step": 357 }, { "epoch": 0.07, "learning_rate": 1.9916101381420285e-05, "loss": 2.6285, "loss_": 0.9065, "moe_loss": 0.1605, "moe_loss_longrong": 1.4361, "step": 364 }, { "epoch": 0.07, "learning_rate": 1.991036792978199e-05, "loss": 2.6076, "loss_": 0.7095, "moe_loss": 0.1606, "moe_loss_longrong": 1.4347, "step": 371 }, { "epoch": 0.07, "learning_rate": 1.9904445873874068e-05, "loss": 2.5824, "loss_": 0.571, "moe_loss": 0.1606, "moe_loss_longrong": 1.4343, "step": 378 }, { "epoch": 0.07, "learning_rate": 1.98983353263992e-05, "loss": 2.5803, "loss_": 0.9037, "moe_loss": 0.1606, "moe_loss_longrong": 1.4344, "step": 385 }, { "epoch": 0.08, "learning_rate": 1.9892036403647256e-05, "loss": 2.6071, "loss_": 1.0289, "moe_loss": 0.1606, "moe_loss_longrong": 1.4333, "step": 392 }, { "epoch": 0.08, "learning_rate": 1.9885549225493064e-05, "loss": 2.6155, "loss_": 1.227, "moe_loss": 0.1605, "moe_loss_longrong": 1.4324, "step": 399 }, { "epoch": 0.08, "learning_rate": 1.9878873915394154e-05, "loss": 2.6057, "loss_": 1.276, "moe_loss": 0.1607, "moe_loss_longrong": 1.4326, "step": 406 }, { "epoch": 0.08, "learning_rate": 1.987201060038839e-05, "loss": 2.5446, "loss_": 1.1148, "moe_loss": 0.1606, "moe_loss_longrong": 1.432, "step": 413 }, { "epoch": 0.08, "learning_rate": 1.986495941109156e-05, "loss": 2.5787, "loss_": 0.9601, "moe_loss": 0.1605, "moe_loss_longrong": 1.4317, "step": 420 }, { "epoch": 0.08, "learning_rate": 1.9857720481694887e-05, "loss": 2.6018, "loss_": 0.8145, "moe_loss": 0.1606, "moe_loss_longrong": 1.4318, "step": 427 }, { "epoch": 0.08, "learning_rate": 1.985029394996248e-05, "loss": 2.5863, "loss_": 0.9872, "moe_loss": 0.1618, "moe_loss_longrong": 1.4613, "step": 434 }, { "epoch": 0.08, "learning_rate": 1.9842679957228706e-05, "loss": 2.5837, "loss_": 1.165, "moe_loss": 0.1605, "moe_loss_longrong": 1.43, "step": 441 }, { "epoch": 0.09, "learning_rate": 1.9834878648395507e-05, "loss": 2.6015, "loss_": 0.9562, "moe_loss": 0.1605, "moe_loss_longrong": 1.4307, "step": 448 }, { "epoch": 0.09, "learning_rate": 1.9826890171929634e-05, "loss": 2.5453, "loss_": 0.8231, "moe_loss": 0.1605, "moe_loss_longrong": 1.4306, "step": 455 }, { "epoch": 0.09, "learning_rate": 1.981871467985983e-05, "loss": 2.578, "loss_": 0.9864, "moe_loss": 0.1605, "moe_loss_longrong": 1.4306, "step": 462 }, { "epoch": 0.09, "learning_rate": 1.9810352327773935e-05, "loss": 2.5723, "loss_": 1.1748, "moe_loss": 0.1605, "moe_loss_longrong": 1.4305, "step": 469 }, { "epoch": 0.09, "learning_rate": 1.9801803274815915e-05, "loss": 2.6173, "loss_": 1.0737, "moe_loss": 0.1605, "moe_loss_longrong": 1.4297, "step": 476 }, { "epoch": 0.09, "learning_rate": 1.979306768368285e-05, "loss": 2.5664, "loss_": 1.3735, "moe_loss": 0.1605, "moe_loss_longrong": 1.4291, "step": 483 }, { "epoch": 0.09, "learning_rate": 1.9784145720621827e-05, "loss": 2.5832, "loss_": 1.0223, "moe_loss": 0.1605, "moe_loss_longrong": 1.4296, "step": 490 }, { "epoch": 0.1, "learning_rate": 1.9775037555426772e-05, "loss": 2.5448, "loss_": 1.2395, "moe_loss": 0.1607, "moe_loss_longrong": 1.4291, "step": 497 }, { "epoch": 0.1, "learning_rate": 1.9765743361435234e-05, "loss": 2.5729, "loss_": 1.1156, "moe_loss": 0.1604, "moe_loss_longrong": 1.4273, "step": 504 }, { "epoch": 0.1, "learning_rate": 1.975626331552507e-05, "loss": 2.5526, "loss_": 0.8797, "moe_loss": 0.1606, "moe_loss_longrong": 1.4281, "step": 511 }, { "epoch": 0.1, "learning_rate": 1.974659759811109e-05, "loss": 2.573, "loss_": 1.1636, "moe_loss": 0.1605, "moe_loss_longrong": 1.4274, "step": 518 }, { "epoch": 0.1, "learning_rate": 1.9736746393141617e-05, "loss": 2.59, "loss_": 1.1342, "moe_loss": 0.1606, "moe_loss_longrong": 1.4279, "step": 525 }, { "epoch": 0.1, "learning_rate": 1.9726709888094994e-05, "loss": 2.5921, "loss_": 0.8051, "moe_loss": 0.1605, "moe_loss_longrong": 1.4277, "step": 532 }, { "epoch": 0.1, "learning_rate": 1.9716488273976006e-05, "loss": 2.6023, "loss_": 1.2093, "moe_loss": 0.1605, "moe_loss_longrong": 1.4276, "step": 539 }, { "epoch": 0.11, "learning_rate": 1.970608174531224e-05, "loss": 2.5744, "loss_": 0.9951, "moe_loss": 0.1604, "moe_loss_longrong": 1.4256, "step": 546 }, { "epoch": 0.11, "learning_rate": 1.9695490500150418e-05, "loss": 2.5917, "loss_": 1.0794, "moe_loss": 0.1605, "moe_loss_longrong": 1.427, "step": 553 }, { "epoch": 0.11, "learning_rate": 1.9684714740052584e-05, "loss": 2.5849, "loss_": 0.8469, "moe_loss": 0.1605, "moe_loss_longrong": 1.426, "step": 560 }, { "epoch": 0.11, "learning_rate": 1.9673754670092283e-05, "loss": 2.5705, "loss_": 0.96, "moe_loss": 0.1604, "moe_loss_longrong": 1.4265, "step": 567 }, { "epoch": 0.11, "learning_rate": 1.9662610498850684e-05, "loss": 2.5672, "loss_": 1.038, "moe_loss": 0.1605, "moe_loss_longrong": 1.4258, "step": 574 }, { "epoch": 0.11, "learning_rate": 1.965128243841256e-05, "loss": 2.553, "loss_": 1.1173, "moe_loss": 0.1606, "moe_loss_longrong": 1.4267, "step": 581 }, { "epoch": 0.11, "learning_rate": 1.9639770704362305e-05, "loss": 2.5951, "loss_": 1.1815, "moe_loss": 0.1605, "moe_loss_longrong": 1.4255, "step": 588 }, { "epoch": 0.11, "learning_rate": 1.9628075515779796e-05, "loss": 2.5528, "loss_": 0.916, "moe_loss": 0.1604, "moe_loss_longrong": 1.4247, "step": 595 }, { "epoch": 0.12, "learning_rate": 1.961619709523623e-05, "loss": 2.5537, "loss_": 1.1069, "moe_loss": 0.1606, "moe_loss_longrong": 1.4252, "step": 602 }, { "epoch": 0.12, "learning_rate": 1.9604135668789897e-05, "loss": 2.553, "loss_": 0.8815, "moe_loss": 0.1616, "moe_loss_longrong": 1.4545, "step": 609 }, { "epoch": 0.12, "learning_rate": 1.959189146598188e-05, "loss": 2.557, "loss_": 0.3617, "moe_loss": 0.1605, "moe_loss_longrong": 1.4253, "step": 616 }, { "epoch": 0.12, "learning_rate": 1.9579464719831668e-05, "loss": 2.5735, "loss_": 1.1934, "moe_loss": 0.1604, "moe_loss_longrong": 1.4248, "step": 623 }, { "epoch": 0.12, "learning_rate": 1.9566855666832743e-05, "loss": 2.5679, "loss_": 1.2144, "moe_loss": 0.1604, "moe_loss_longrong": 1.4241, "step": 630 }, { "epoch": 0.12, "learning_rate": 1.9554064546948064e-05, "loss": 2.5541, "loss_": 0.7773, "moe_loss": 0.1604, "moe_loss_longrong": 1.4247, "step": 637 }, { "epoch": 0.12, "learning_rate": 1.9541091603605508e-05, "loss": 2.5396, "loss_": 1.0911, "moe_loss": 0.1605, "moe_loss_longrong": 1.4238, "step": 644 }, { "epoch": 0.13, "learning_rate": 1.9527937083693233e-05, "loss": 2.5328, "loss_": 1.2836, "moe_loss": 0.1604, "moe_loss_longrong": 1.4241, "step": 651 }, { "epoch": 0.13, "learning_rate": 1.951460123755499e-05, "loss": 2.559, "loss_": 0.6191, "moe_loss": 0.1603, "moe_loss_longrong": 1.4249, "step": 658 }, { "epoch": 0.13, "learning_rate": 1.9501084318985335e-05, "loss": 2.5656, "loss_": 0.5936, "moe_loss": 0.1604, "moe_loss_longrong": 1.4238, "step": 665 }, { "epoch": 0.13, "learning_rate": 1.948738658522483e-05, "loss": 2.5408, "loss_": 1.0426, "moe_loss": 0.1605, "moe_loss_longrong": 1.4256, "step": 672 }, { "epoch": 0.13, "learning_rate": 1.9473508296955126e-05, "loss": 2.5346, "loss_": 1.0259, "moe_loss": 0.1613, "moe_loss_longrong": 1.4496, "step": 679 }, { "epoch": 0.13, "learning_rate": 1.9459449718294008e-05, "loss": 2.5744, "loss_": 1.2413, "moe_loss": 0.1606, "moe_loss_longrong": 1.4233, "step": 686 }, { "epoch": 0.13, "learning_rate": 1.9445211116790365e-05, "loss": 2.5513, "loss_": 1.1087, "moe_loss": 0.1604, "moe_loss_longrong": 1.4224, "step": 693 }, { "epoch": 0.13, "learning_rate": 1.9430792763419105e-05, "loss": 2.5552, "loss_": 1.1375, "moe_loss": 0.1603, "moe_loss_longrong": 1.4219, "step": 700 }, { "epoch": 0.14, "learning_rate": 1.9416194932576e-05, "loss": 2.5634, "loss_": 0.8712, "moe_loss": 0.1606, "moe_loss_longrong": 1.4224, "step": 707 }, { "epoch": 0.14, "learning_rate": 1.9401417902072447e-05, "loss": 2.5538, "loss_": 0.9992, "moe_loss": 0.1604, "moe_loss_longrong": 1.4211, "step": 714 }, { "epoch": 0.14, "learning_rate": 1.93864619531302e-05, "loss": 2.5786, "loss_": 1.0579, "moe_loss": 0.1604, "moe_loss_longrong": 1.4223, "step": 721 }, { "epoch": 0.14, "learning_rate": 1.9371327370376018e-05, "loss": 2.5565, "loss_": 1.1717, "moe_loss": 0.1604, "moe_loss_longrong": 1.4223, "step": 728 }, { "epoch": 0.14, "learning_rate": 1.935601444183622e-05, "loss": 2.5491, "loss_": 1.0508, "moe_loss": 0.1605, "moe_loss_longrong": 1.4216, "step": 735 }, { "epoch": 0.14, "learning_rate": 1.934052345893125e-05, "loss": 2.5485, "loss_": 1.1535, "moe_loss": 0.1603, "moe_loss_longrong": 1.4201, "step": 742 }, { "epoch": 0.14, "learning_rate": 1.932485471647009e-05, "loss": 2.5434, "loss_": 0.8935, "moe_loss": 0.1613, "moe_loss_longrong": 1.4462, "step": 749 }, { "epoch": 0.15, "learning_rate": 1.9309008512644668e-05, "loss": 2.5549, "loss_": 0.9146, "moe_loss": 0.1604, "moe_loss_longrong": 1.4212, "step": 756 }, { "epoch": 0.15, "learning_rate": 1.929298514902418e-05, "loss": 2.5655, "loss_": 1.2834, "moe_loss": 0.1604, "moe_loss_longrong": 1.4206, "step": 763 }, { "epoch": 0.15, "learning_rate": 1.927678493054935e-05, "loss": 2.5664, "loss_": 1.1543, "moe_loss": 0.1603, "moe_loss_longrong": 1.4203, "step": 770 }, { "epoch": 0.15, "learning_rate": 1.9260408165526638e-05, "loss": 2.5559, "loss_": 1.1544, "moe_loss": 0.1604, "moe_loss_longrong": 1.4205, "step": 777 }, { "epoch": 0.15, "learning_rate": 1.9243855165622345e-05, "loss": 2.538, "loss_": 0.9985, "moe_loss": 0.1603, "moe_loss_longrong": 1.42, "step": 784 }, { "epoch": 0.15, "learning_rate": 1.9227126245856716e-05, "loss": 2.528, "loss_": 0.766, "moe_loss": 0.1609, "moe_loss_longrong": 1.4442, "step": 791 }, { "epoch": 0.15, "learning_rate": 1.921022172459791e-05, "loss": 2.56, "loss_": 1.0356, "moe_loss": 0.1603, "moe_loss_longrong": 1.4201, "step": 798 }, { "epoch": 0.15, "learning_rate": 1.9193141923555984e-05, "loss": 2.5418, "loss_": 1.0224, "moe_loss": 0.1604, "moe_loss_longrong": 1.4191, "step": 805 }, { "epoch": 0.16, "learning_rate": 1.917588716777672e-05, "loss": 2.5489, "loss_": 1.2415, "moe_loss": 0.1604, "moe_loss_longrong": 1.42, "step": 812 }, { "epoch": 0.16, "learning_rate": 1.9158457785635478e-05, "loss": 2.5647, "loss_": 1.0902, "moe_loss": 0.1603, "moe_loss_longrong": 1.4194, "step": 819 }, { "epoch": 0.16, "learning_rate": 1.914085410883093e-05, "loss": 2.5695, "loss_": 1.0454, "moe_loss": 0.1603, "moe_loss_longrong": 1.4196, "step": 826 }, { "epoch": 0.16, "learning_rate": 1.9123076472378753e-05, "loss": 2.5355, "loss_": 1.1475, "moe_loss": 0.1603, "moe_loss_longrong": 1.4185, "step": 833 }, { "epoch": 0.16, "learning_rate": 1.910512521460525e-05, "loss": 2.5557, "loss_": 0.7606, "moe_loss": 0.1603, "moe_loss_longrong": 1.4187, "step": 840 }, { "epoch": 0.16, "learning_rate": 1.908700067714091e-05, "loss": 2.5223, "loss_": 0.9963, "moe_loss": 0.1611, "moe_loss_longrong": 1.4451, "step": 847 }, { "epoch": 0.16, "learning_rate": 1.906870320491391e-05, "loss": 2.5422, "loss_": 0.6658, "moe_loss": 0.1603, "moe_loss_longrong": 1.4189, "step": 854 }, { "epoch": 0.17, "learning_rate": 1.9050233146143554e-05, "loss": 2.5373, "loss_": 1.022, "moe_loss": 0.1604, "moe_loss_longrong": 1.4191, "step": 861 }, { "epoch": 0.17, "learning_rate": 1.9031590852333637e-05, "loss": 2.5536, "loss_": 0.9191, "moe_loss": 0.1603, "moe_loss_longrong": 1.4191, "step": 868 }, { "epoch": 0.17, "learning_rate": 1.9012776678265756e-05, "loss": 2.5076, "loss_": 0.4788, "moe_loss": 0.1604, "moe_loss_longrong": 1.4179, "step": 875 }, { "epoch": 0.17, "learning_rate": 1.899379098199257e-05, "loss": 2.5061, "loss_": 1.1005, "moe_loss": 0.1604, "moe_loss_longrong": 1.4178, "step": 882 }, { "epoch": 0.17, "learning_rate": 1.897463412483098e-05, "loss": 2.5584, "loss_": 1.0856, "moe_loss": 0.1604, "moe_loss_longrong": 1.4184, "step": 889 }, { "epoch": 0.17, "learning_rate": 1.895530647135524e-05, "loss": 2.5329, "loss_": 0.7922, "moe_loss": 0.1604, "moe_loss_longrong": 1.4175, "step": 896 }, { "epoch": 0.17, "learning_rate": 1.8935808389390032e-05, "loss": 2.524, "loss_": 1.0799, "moe_loss": 0.1603, "moe_loss_longrong": 1.4163, "step": 903 }, { "epoch": 0.18, "learning_rate": 1.8916140250003475e-05, "loss": 2.5423, "loss_": 0.8152, "moe_loss": 0.1603, "moe_loss_longrong": 1.4174, "step": 910 }, { "epoch": 0.18, "learning_rate": 1.8896302427500042e-05, "loss": 2.533, "loss_": 1.0454, "moe_loss": 0.1605, "moe_loss_longrong": 1.4181, "step": 917 }, { "epoch": 0.18, "learning_rate": 1.8876295299413445e-05, "loss": 2.522, "loss_": 1.1185, "moe_loss": 0.1603, "moe_loss_longrong": 1.4169, "step": 924 }, { "epoch": 0.18, "learning_rate": 1.885611924649946e-05, "loss": 2.5539, "loss_": 1.1256, "moe_loss": 0.1603, "moe_loss_longrong": 1.4167, "step": 931 }, { "epoch": 0.18, "learning_rate": 1.883577465272866e-05, "loss": 2.5069, "loss_": 0.743, "moe_loss": 0.1604, "moe_loss_longrong": 1.4177, "step": 938 }, { "epoch": 0.18, "learning_rate": 1.8815261905279133e-05, "loss": 2.5429, "loss_": 1.0463, "moe_loss": 0.1604, "moe_loss_longrong": 1.4162, "step": 945 }, { "epoch": 0.18, "learning_rate": 1.879458139452909e-05, "loss": 2.5381, "loss_": 1.0908, "moe_loss": 0.1603, "moe_loss_longrong": 1.4152, "step": 952 }, { "epoch": 0.18, "learning_rate": 1.877373351404946e-05, "loss": 2.4924, "loss_": 1.1275, "moe_loss": 0.1603, "moe_loss_longrong": 1.4167, "step": 959 }, { "epoch": 0.19, "learning_rate": 1.8752718660596367e-05, "loss": 2.536, "loss_": 0.7467, "moe_loss": 0.1604, "moe_loss_longrong": 1.4165, "step": 966 }, { "epoch": 0.19, "learning_rate": 1.873153723410362e-05, "loss": 2.507, "loss_": 1.0083, "moe_loss": 0.1609, "moe_loss_longrong": 1.4404, "step": 973 }, { "epoch": 0.19, "learning_rate": 1.8710189637675055e-05, "loss": 2.5118, "loss_": 0.874, "moe_loss": 0.1603, "moe_loss_longrong": 1.4161, "step": 980 }, { "epoch": 0.19, "learning_rate": 1.8688676277576916e-05, "loss": 2.5415, "loss_": 1.1152, "moe_loss": 0.1603, "moe_loss_longrong": 1.4157, "step": 987 }, { "epoch": 0.19, "learning_rate": 1.866699756323008e-05, "loss": 2.5225, "loss_": 0.8857, "moe_loss": 0.1603, "moe_loss_longrong": 1.416, "step": 994 }, { "epoch": 0.19, "learning_rate": 1.8645153907202285e-05, "loss": 2.5093, "loss_": 1.0791, "moe_loss": 0.1603, "moe_loss_longrong": 1.4157, "step": 1001 }, { "epoch": 0.19, "learning_rate": 1.862314572520028e-05, "loss": 2.534, "loss_": 1.1649, "moe_loss": 0.1603, "moe_loss_longrong": 1.4167, "step": 1008 }, { "epoch": 0.2, "learning_rate": 1.86009734360619e-05, "loss": 2.559, "loss_": 1.2289, "moe_loss": 0.1604, "moe_loss_longrong": 1.4163, "step": 1015 }, { "epoch": 0.2, "learning_rate": 1.8578637461748105e-05, "loss": 2.5738, "loss_": 0.8422, "moe_loss": 0.1603, "moe_loss_longrong": 1.416, "step": 1022 }, { "epoch": 0.2, "learning_rate": 1.8556138227334957e-05, "loss": 2.5752, "loss_": 1.0332, "moe_loss": 0.1603, "moe_loss_longrong": 1.4157, "step": 1029 }, { "epoch": 0.2, "learning_rate": 1.853347616100552e-05, "loss": 2.5633, "loss_": 1.2742, "moe_loss": 0.1602, "moe_loss_longrong": 1.4152, "step": 1036 }, { "epoch": 0.2, "learning_rate": 1.8510651694041702e-05, "loss": 2.5491, "loss_": 1.1394, "moe_loss": 0.1603, "moe_loss_longrong": 1.4153, "step": 1043 }, { "epoch": 0.2, "learning_rate": 1.848766526081607e-05, "loss": 2.5032, "loss_": 1.0904, "moe_loss": 0.1603, "moe_loss_longrong": 1.4148, "step": 1050 }, { "epoch": 0.2, "learning_rate": 1.846451729878357e-05, "loss": 2.5687, "loss_": 1.0973, "moe_loss": 0.1603, "moe_loss_longrong": 1.4144, "step": 1057 }, { "epoch": 0.2, "learning_rate": 1.84412082484732e-05, "loss": 2.5378, "loss_": 0.7628, "moe_loss": 0.161, "moe_loss_longrong": 1.4372, "step": 1064 }, { "epoch": 0.21, "learning_rate": 1.841773855347963e-05, "loss": 2.5285, "loss_": 1.0832, "moe_loss": 0.1603, "moe_loss_longrong": 1.4148, "step": 1071 }, { "epoch": 0.21, "learning_rate": 1.8394108660454766e-05, "loss": 2.53, "loss_": 0.7952, "moe_loss": 0.1603, "moe_loss_longrong": 1.415, "step": 1078 }, { "epoch": 0.21, "learning_rate": 1.8370319019099236e-05, "loss": 2.5457, "loss_": 0.8855, "moe_loss": 0.1603, "moe_loss_longrong": 1.4146, "step": 1085 }, { "epoch": 0.21, "learning_rate": 1.8346370082153843e-05, "loss": 2.5227, "loss_": 1.1518, "moe_loss": 0.1603, "moe_loss_longrong": 1.4148, "step": 1092 }, { "epoch": 0.21, "learning_rate": 1.8322262305390948e-05, "loss": 2.5268, "loss_": 1.0055, "moe_loss": 0.1603, "moe_loss_longrong": 1.4139, "step": 1099 }, { "epoch": 0.21, "learning_rate": 1.8297996147605787e-05, "loss": 2.5418, "loss_": 1.2226, "moe_loss": 0.1603, "moe_loss_longrong": 1.4139, "step": 1106 }, { "epoch": 0.21, "learning_rate": 1.8273572070607756e-05, "loss": 2.5465, "loss_": 1.0475, "moe_loss": 0.1603, "moe_loss_longrong": 1.414, "step": 1113 }, { "epoch": 0.22, "learning_rate": 1.8248990539211596e-05, "loss": 2.5132, "loss_": 1.2063, "moe_loss": 0.1603, "moe_loss_longrong": 1.4148, "step": 1120 }, { "epoch": 0.22, "learning_rate": 1.822425202122858e-05, "loss": 2.5236, "loss_": 1.3605, "moe_loss": 0.1603, "moe_loss_longrong": 1.4145, "step": 1127 }, { "epoch": 0.22, "learning_rate": 1.819935698745759e-05, "loss": 2.517, "loss_": 0.9965, "moe_loss": 0.1602, "moe_loss_longrong": 1.4137, "step": 1134 }, { "epoch": 0.22, "learning_rate": 1.817430591167615e-05, "loss": 2.5347, "loss_": 1.1165, "moe_loss": 0.1602, "moe_loss_longrong": 1.4133, "step": 1141 }, { "epoch": 0.22, "learning_rate": 1.8149099270631434e-05, "loss": 2.5051, "loss_": 0.9918, "moe_loss": 0.1602, "moe_loss_longrong": 1.4133, "step": 1148 }, { "epoch": 0.22, "learning_rate": 1.8123737544031178e-05, "loss": 2.5228, "loss_": 1.0757, "moe_loss": 0.1603, "moe_loss_longrong": 1.4132, "step": 1155 }, { "epoch": 0.22, "learning_rate": 1.8098221214534543e-05, "loss": 2.5117, "loss_": 0.9441, "moe_loss": 0.1608, "moe_loss_longrong": 1.435, "step": 1162 }, { "epoch": 0.22, "learning_rate": 1.807255076774294e-05, "loss": 2.5292, "loss_": 0.9315, "moe_loss": 0.1608, "moe_loss_longrong": 1.4332, "step": 1169 }, { "epoch": 0.23, "learning_rate": 1.80467266921908e-05, "loss": 2.4974, "loss_": 0.9341, "moe_loss": 0.1603, "moe_loss_longrong": 1.414, "step": 1176 }, { "epoch": 0.23, "learning_rate": 1.802074947933625e-05, "loss": 2.5251, "loss_": 1.0369, "moe_loss": 0.1602, "moe_loss_longrong": 1.4127, "step": 1183 }, { "epoch": 0.23, "learning_rate": 1.799461962355178e-05, "loss": 2.5424, "loss_": 1.2525, "moe_loss": 0.1602, "moe_loss_longrong": 1.4133, "step": 1190 }, { "epoch": 0.23, "learning_rate": 1.7968337622114824e-05, "loss": 2.5123, "loss_": 1.4116, "moe_loss": 0.1603, "moe_loss_longrong": 1.4131, "step": 1197 }, { "epoch": 0.23, "learning_rate": 1.7941903975198305e-05, "loss": 2.5119, "loss_": 0.8823, "moe_loss": 0.1603, "moe_loss_longrong": 1.4132, "step": 1204 }, { "epoch": 0.23, "learning_rate": 1.791531918586112e-05, "loss": 2.5372, "loss_": 0.9075, "moe_loss": 0.1602, "moe_loss_longrong": 1.4125, "step": 1211 }, { "epoch": 0.23, "learning_rate": 1.7888583760038534e-05, "loss": 2.5356, "loss_": 1.065, "moe_loss": 0.1603, "moe_loss_longrong": 1.4133, "step": 1218 }, { "epoch": 0.24, "learning_rate": 1.78616982065326e-05, "loss": 2.5154, "loss_": 0.9056, "moe_loss": 0.1611, "moe_loss_longrong": 1.4338, "step": 1225 }, { "epoch": 0.24, "learning_rate": 1.7834663037002444e-05, "loss": 2.5377, "loss_": 0.8469, "moe_loss": 0.1603, "moe_loss_longrong": 1.4123, "step": 1232 }, { "epoch": 0.24, "learning_rate": 1.7807478765954532e-05, "loss": 2.5363, "loss_": 1.0507, "moe_loss": 0.1602, "moe_loss_longrong": 1.4116, "step": 1239 }, { "epoch": 0.24, "learning_rate": 1.778014591073288e-05, "loss": 2.5131, "loss_": 1.0501, "moe_loss": 0.1603, "moe_loss_longrong": 1.4123, "step": 1246 }, { "epoch": 0.24, "learning_rate": 1.7752664991509224e-05, "loss": 2.5127, "loss_": 1.0583, "moe_loss": 0.1603, "moe_loss_longrong": 1.412, "step": 1253 }, { "epoch": 0.24, "learning_rate": 1.7725036531273087e-05, "loss": 2.5147, "loss_": 1.0225, "moe_loss": 0.1602, "moe_loss_longrong": 1.4119, "step": 1260 }, { "epoch": 0.24, "learning_rate": 1.7697261055821864e-05, "loss": 2.5399, "loss_": 1.1622, "moe_loss": 0.1603, "moe_loss_longrong": 1.4123, "step": 1267 }, { "epoch": 0.25, "learning_rate": 1.7669339093750786e-05, "loss": 2.5042, "loss_": 1.1, "moe_loss": 0.1602, "moe_loss_longrong": 1.4118, "step": 1274 }, { "epoch": 0.25, "learning_rate": 1.7641271176442876e-05, "loss": 2.4867, "loss_": 1.1917, "moe_loss": 0.1602, "moe_loss_longrong": 1.4116, "step": 1281 }, { "epoch": 0.25, "learning_rate": 1.761305783805883e-05, "loss": 2.5159, "loss_": 1.1055, "moe_loss": 0.1603, "moe_loss_longrong": 1.4114, "step": 1288 }, { "epoch": 0.25, "learning_rate": 1.7584699615526857e-05, "loss": 2.5064, "loss_": 0.7231, "moe_loss": 0.1603, "moe_loss_longrong": 1.4119, "step": 1295 }, { "epoch": 0.25, "learning_rate": 1.755619704853246e-05, "loss": 2.5335, "loss_": 0.9998, "moe_loss": 0.1603, "moe_loss_longrong": 1.4112, "step": 1302 }, { "epoch": 0.25, "learning_rate": 1.752755067950814e-05, "loss": 2.5332, "loss_": 1.2823, "moe_loss": 0.1602, "moe_loss_longrong": 1.4114, "step": 1309 }, { "epoch": 0.25, "learning_rate": 1.749876105362313e-05, "loss": 2.5212, "loss_": 1.2273, "moe_loss": 0.1602, "moe_loss_longrong": 1.4109, "step": 1316 }, { "epoch": 0.25, "learning_rate": 1.746982871877296e-05, "loss": 2.5149, "loss_": 1.2552, "moe_loss": 0.1602, "moe_loss_longrong": 1.4113, "step": 1323 }, { "epoch": 0.26, "learning_rate": 1.744075422556906e-05, "loss": 2.4876, "loss_": 0.9236, "moe_loss": 0.1603, "moe_loss_longrong": 1.4115, "step": 1330 }, { "epoch": 0.26, "learning_rate": 1.741153812732828e-05, "loss": 2.5421, "loss_": 1.155, "moe_loss": 0.1602, "moe_loss_longrong": 1.4106, "step": 1337 }, { "epoch": 0.26, "learning_rate": 1.7382180980062365e-05, "loss": 2.5095, "loss_": 0.6978, "moe_loss": 0.1602, "moe_loss_longrong": 1.4109, "step": 1344 }, { "epoch": 0.26, "learning_rate": 1.735268334246734e-05, "loss": 2.5068, "loss_": 1.0142, "moe_loss": 0.1602, "moe_loss_longrong": 1.4105, "step": 1351 }, { "epoch": 0.26, "learning_rate": 1.7323045775912927e-05, "loss": 2.5247, "loss_": 0.9039, "moe_loss": 0.1603, "moe_loss_longrong": 1.4111, "step": 1358 }, { "epoch": 0.26, "learning_rate": 1.7293268844431826e-05, "loss": 2.5308, "loss_": 1.1161, "moe_loss": 0.1602, "moe_loss_longrong": 1.4101, "step": 1365 }, { "epoch": 0.26, "learning_rate": 1.7263353114708993e-05, "loss": 2.516, "loss_": 0.5494, "moe_loss": 0.1603, "moe_loss_longrong": 1.4105, "step": 1372 }, { "epoch": 0.27, "learning_rate": 1.7233299156070852e-05, "loss": 2.5405, "loss_": 1.0823, "moe_loss": 0.1602, "moe_loss_longrong": 1.4091, "step": 1379 }, { "epoch": 0.27, "learning_rate": 1.720310754047446e-05, "loss": 2.5123, "loss_": 1.016, "moe_loss": 0.1602, "moe_loss_longrong": 1.41, "step": 1386 }, { "epoch": 0.27, "learning_rate": 1.717277884249664e-05, "loss": 2.4917, "loss_": 1.1087, "moe_loss": 0.1602, "moe_loss_longrong": 1.4103, "step": 1393 }, { "epoch": 0.27, "learning_rate": 1.7142313639323012e-05, "loss": 2.5184, "loss_": 1.2711, "moe_loss": 0.1602, "moe_loss_longrong": 1.4101, "step": 1400 }, { "epoch": 0.27, "learning_rate": 1.7111712510737035e-05, "loss": 2.5037, "loss_": 1.1198, "moe_loss": 0.1603, "moe_loss_longrong": 1.4102, "step": 1407 }, { "epoch": 0.27, "learning_rate": 1.7080976039108964e-05, "loss": 2.5262, "loss_": 0.9286, "moe_loss": 0.1612, "moe_loss_longrong": 1.4315, "step": 1414 }, { "epoch": 0.27, "learning_rate": 1.7050104809384774e-05, "loss": 2.4892, "loss_": 1.0377, "moe_loss": 0.1609, "moe_loss_longrong": 1.4276, "step": 1421 }, { "epoch": 0.27, "learning_rate": 1.7019099409075014e-05, "loss": 2.5076, "loss_": 0.9405, "moe_loss": 0.1602, "moe_loss_longrong": 1.4092, "step": 1428 }, { "epoch": 0.28, "learning_rate": 1.6987960428243637e-05, "loss": 2.5198, "loss_": 1.3093, "moe_loss": 0.1602, "moe_loss_longrong": 1.4094, "step": 1435 }, { "epoch": 0.28, "learning_rate": 1.6956688459496767e-05, "loss": 2.5508, "loss_": 1.0043, "moe_loss": 0.1602, "moe_loss_longrong": 1.4083, "step": 1442 }, { "epoch": 0.28, "learning_rate": 1.6925284097971427e-05, "loss": 2.5299, "loss_": 1.1324, "moe_loss": 0.1602, "moe_loss_longrong": 1.4091, "step": 1449 }, { "epoch": 0.28, "learning_rate": 1.6893747941324197e-05, "loss": 2.5495, "loss_": 0.7979, "moe_loss": 0.1602, "moe_loss_longrong": 1.4097, "step": 1456 }, { "epoch": 0.28, "learning_rate": 1.6862080589719863e-05, "loss": 2.4692, "loss_": 0.563, "moe_loss": 0.1602, "moe_loss_longrong": 1.4097, "step": 1463 }, { "epoch": 0.28, "learning_rate": 1.6830282645819974e-05, "loss": 2.5107, "loss_": 0.3532, "moe_loss": 0.1604, "moe_loss_longrong": 1.4107, "step": 1470 }, { "epoch": 0.28, "learning_rate": 1.679835471477139e-05, "loss": 2.498, "loss_": 0.9877, "moe_loss": 0.1602, "moe_loss_longrong": 1.4086, "step": 1477 }, { "epoch": 0.29, "learning_rate": 1.6766297404194745e-05, "loss": 2.502, "loss_": 1.1402, "moe_loss": 0.1603, "moe_loss_longrong": 1.4091, "step": 1484 }, { "epoch": 0.29, "learning_rate": 1.673411132417291e-05, "loss": 2.5066, "loss_": 1.167, "moe_loss": 0.1602, "moe_loss_longrong": 1.409, "step": 1491 }, { "epoch": 0.29, "learning_rate": 1.6701797087239354e-05, "loss": 2.5273, "loss_": 0.914, "moe_loss": 0.1607, "moe_loss_longrong": 1.4264, "step": 1498 }, { "epoch": 0.29, "learning_rate": 1.666935530836651e-05, "loss": 2.5022, "loss_": 0.9724, "moe_loss": 0.1602, "moe_loss_longrong": 1.4087, "step": 1505 }, { "epoch": 0.29, "learning_rate": 1.663678660495406e-05, "loss": 2.4806, "loss_": 1.1766, "moe_loss": 0.1602, "moe_loss_longrong": 1.4086, "step": 1512 }, { "epoch": 0.29, "learning_rate": 1.6604091596817193e-05, "loss": 2.5228, "loss_": 0.8955, "moe_loss": 0.1602, "moe_loss_longrong": 1.4086, "step": 1519 }, { "epoch": 0.29, "learning_rate": 1.657127090617479e-05, "loss": 2.5303, "loss_": 0.9496, "moe_loss": 0.1602, "moe_loss_longrong": 1.4079, "step": 1526 }, { "epoch": 0.29, "learning_rate": 1.6538325157637614e-05, "loss": 2.5162, "loss_": 0.8978, "moe_loss": 0.1602, "moe_loss_longrong": 1.4085, "step": 1533 }, { "epoch": 0.3, "learning_rate": 1.650525497819639e-05, "loss": 2.5187, "loss_": 0.787, "moe_loss": 0.1608, "moe_loss_longrong": 1.4256, "step": 1540 }, { "epoch": 0.3, "learning_rate": 1.6472060997209898e-05, "loss": 2.5283, "loss_": 1.1671, "moe_loss": 0.1602, "moe_loss_longrong": 1.4085, "step": 1547 }, { "epoch": 0.3, "learning_rate": 1.6438743846392987e-05, "loss": 2.5049, "loss_": 0.7488, "moe_loss": 0.1602, "moe_loss_longrong": 1.4082, "step": 1554 }, { "epoch": 0.3, "learning_rate": 1.6405304159804534e-05, "loss": 2.4966, "loss_": 1.0698, "moe_loss": 0.1602, "moe_loss_longrong": 1.4078, "step": 1561 }, { "epoch": 0.3, "learning_rate": 1.6371742573835426e-05, "loss": 2.5307, "loss_": 1.1426, "moe_loss": 0.1602, "moe_loss_longrong": 1.4075, "step": 1568 }, { "epoch": 0.3, "learning_rate": 1.6338059727196386e-05, "loss": 2.4884, "loss_": 1.0242, "moe_loss": 0.1603, "moe_loss_longrong": 1.4078, "step": 1575 }, { "epoch": 0.3, "learning_rate": 1.6304256260905872e-05, "loss": 2.5239, "loss_": 1.1671, "moe_loss": 0.1602, "moe_loss_longrong": 1.4079, "step": 1582 }, { "epoch": 0.31, "learning_rate": 1.627033281827785e-05, "loss": 2.5292, "loss_": 0.9625, "moe_loss": 0.1607, "moe_loss_longrong": 1.4262, "step": 1589 }, { "epoch": 0.31, "learning_rate": 1.6236290044909543e-05, "loss": 2.5336, "loss_": 0.8255, "moe_loss": 0.1602, "moe_loss_longrong": 1.4081, "step": 1596 }, { "epoch": 0.31, "learning_rate": 1.6202128588669177e-05, "loss": 2.5205, "loss_": 0.7348, "moe_loss": 0.1603, "moe_loss_longrong": 1.4082, "step": 1603 }, { "epoch": 0.31, "learning_rate": 1.6167849099683623e-05, "loss": 2.4854, "loss_": 0.9767, "moe_loss": 0.1602, "moe_loss_longrong": 1.4075, "step": 1610 }, { "epoch": 0.31, "learning_rate": 1.6133452230326035e-05, "loss": 2.5265, "loss_": 0.9913, "moe_loss": 0.1602, "moe_loss_longrong": 1.4074, "step": 1617 }, { "epoch": 0.31, "learning_rate": 1.609893863520343e-05, "loss": 2.4785, "loss_": 0.9806, "moe_loss": 0.1602, "moe_loss_longrong": 1.4076, "step": 1624 }, { "epoch": 0.31, "learning_rate": 1.6064308971144236e-05, "loss": 2.5053, "loss_": 1.207, "moe_loss": 0.1602, "moe_loss_longrong": 1.4073, "step": 1631 }, { "epoch": 0.32, "learning_rate": 1.60295638971858e-05, "loss": 2.5212, "loss_": 1.2091, "moe_loss": 0.1602, "moe_loss_longrong": 1.4072, "step": 1638 }, { "epoch": 0.32, "learning_rate": 1.599470407456182e-05, "loss": 2.5177, "loss_": 1.0634, "moe_loss": 0.1602, "moe_loss_longrong": 1.4071, "step": 1645 }, { "epoch": 0.32, "learning_rate": 1.5959730166689783e-05, "loss": 2.5219, "loss_": 0.7302, "moe_loss": 0.1602, "moe_loss_longrong": 1.4069, "step": 1652 }, { "epoch": 0.32, "learning_rate": 1.5924642839158334e-05, "loss": 2.5273, "loss_": 1.1267, "moe_loss": 0.1602, "moe_loss_longrong": 1.4065, "step": 1659 }, { "epoch": 0.32, "learning_rate": 1.5889442759714603e-05, "loss": 2.5067, "loss_": 0.8492, "moe_loss": 0.1606, "moe_loss_longrong": 1.4234, "step": 1666 }, { "epoch": 0.32, "learning_rate": 1.5854130598251514e-05, "loss": 2.4997, "loss_": 1.0397, "moe_loss": 0.1602, "moe_loss_longrong": 1.4065, "step": 1673 }, { "epoch": 0.32, "learning_rate": 1.581870702679501e-05, "loss": 2.5277, "loss_": 0.9804, "moe_loss": 0.1602, "moe_loss_longrong": 1.4073, "step": 1680 }, { "epoch": 0.32, "learning_rate": 1.5783172719491288e-05, "loss": 2.5191, "loss_": 1.1463, "moe_loss": 0.1602, "moe_loss_longrong": 1.4064, "step": 1687 }, { "epoch": 0.33, "learning_rate": 1.5747528352593956e-05, "loss": 2.4859, "loss_": 1.0594, "moe_loss": 0.1602, "moe_loss_longrong": 1.4063, "step": 1694 }, { "epoch": 0.33, "learning_rate": 1.5711774604451168e-05, "loss": 2.5146, "loss_": 1.0352, "moe_loss": 0.1602, "moe_loss_longrong": 1.4063, "step": 1701 }, { "epoch": 0.33, "learning_rate": 1.567591215549271e-05, "loss": 2.5086, "loss_": 0.8248, "moe_loss": 0.1602, "moe_loss_longrong": 1.4067, "step": 1708 }, { "epoch": 0.33, "learning_rate": 1.5639941688217063e-05, "loss": 2.4807, "loss_": 0.8445, "moe_loss": 0.1602, "moe_loss_longrong": 1.4063, "step": 1715 }, { "epoch": 0.33, "learning_rate": 1.5603863887178393e-05, "loss": 2.5192, "loss_": 0.9476, "moe_loss": 0.1602, "moe_loss_longrong": 1.4057, "step": 1722 }, { "epoch": 0.33, "learning_rate": 1.5567679438973543e-05, "loss": 2.5131, "loss_": 1.1376, "moe_loss": 0.1602, "moe_loss_longrong": 1.4062, "step": 1729 }, { "epoch": 0.33, "learning_rate": 1.5531389032228955e-05, "loss": 2.4964, "loss_": 1.2426, "moe_loss": 0.1602, "moe_loss_longrong": 1.4064, "step": 1736 }, { "epoch": 0.34, "learning_rate": 1.549499335758757e-05, "loss": 2.5134, "loss_": 0.8763, "moe_loss": 0.1602, "moe_loss_longrong": 1.406, "step": 1743 }, { "epoch": 0.34, "learning_rate": 1.5458493107695688e-05, "loss": 2.4855, "loss_": 1.1827, "moe_loss": 0.1602, "moe_loss_longrong": 1.4061, "step": 1750 }, { "epoch": 0.34, "learning_rate": 1.542188897718977e-05, "loss": 2.4889, "loss_": 1.014, "moe_loss": 0.1602, "moe_loss_longrong": 1.4064, "step": 1757 }, { "epoch": 0.34, "learning_rate": 1.5385181662683244e-05, "loss": 2.5111, "loss_": 1.0933, "moe_loss": 0.1602, "moe_loss_longrong": 1.4058, "step": 1764 }, { "epoch": 0.34, "learning_rate": 1.534837186275322e-05, "loss": 2.5385, "loss_": 0.8571, "moe_loss": 0.1602, "moe_loss_longrong": 1.4062, "step": 1771 }, { "epoch": 0.34, "learning_rate": 1.531146027792722e-05, "loss": 2.5107, "loss_": 0.9431, "moe_loss": 0.1602, "moe_loss_longrong": 1.4058, "step": 1778 }, { "epoch": 0.34, "learning_rate": 1.527444761066982e-05, "loss": 2.5031, "loss_": 1.2333, "moe_loss": 0.1602, "moe_loss_longrong": 1.4056, "step": 1785 }, { "epoch": 0.34, "learning_rate": 1.523733456536931e-05, "loss": 2.4927, "loss_": 1.1367, "moe_loss": 0.1602, "moe_loss_longrong": 1.406, "step": 1792 }, { "epoch": 0.35, "learning_rate": 1.5200121848324276e-05, "loss": 2.5148, "loss_": 1.0457, "moe_loss": 0.1602, "moe_loss_longrong": 1.4052, "step": 1799 }, { "epoch": 0.35, "learning_rate": 1.5162810167730144e-05, "loss": 2.4974, "loss_": 0.8327, "moe_loss": 0.1602, "moe_loss_longrong": 1.4055, "step": 1806 }, { "epoch": 0.35, "learning_rate": 1.5125400233665728e-05, "loss": 2.4938, "loss_": 0.965, "moe_loss": 0.1603, "moe_loss_longrong": 1.4061, "step": 1813 }, { "epoch": 0.35, "learning_rate": 1.50878927580797e-05, "loss": 2.4854, "loss_": 1.0636, "moe_loss": 0.1602, "moe_loss_longrong": 1.4061, "step": 1820 }, { "epoch": 0.35, "learning_rate": 1.5050288454777047e-05, "loss": 2.4829, "loss_": 0.8501, "moe_loss": 0.1607, "moe_loss_longrong": 1.4209, "step": 1827 }, { "epoch": 0.35, "learning_rate": 1.501258803940548e-05, "loss": 2.5151, "loss_": 1.2857, "moe_loss": 0.1602, "moe_loss_longrong": 1.4049, "step": 1834 }, { "epoch": 0.35, "learning_rate": 1.4974792229441826e-05, "loss": 2.5045, "loss_": 1.0645, "moe_loss": 0.1602, "moe_loss_longrong": 1.4054, "step": 1841 }, { "epoch": 0.36, "learning_rate": 1.4936901744178367e-05, "loss": 2.5062, "loss_": 0.5678, "moe_loss": 0.1602, "moe_loss_longrong": 1.4056, "step": 1848 }, { "epoch": 0.36, "learning_rate": 1.489891730470914e-05, "loss": 2.4826, "loss_": 1.2262, "moe_loss": 0.1602, "moe_loss_longrong": 1.4049, "step": 1855 }, { "epoch": 0.36, "learning_rate": 1.4860839633916236e-05, "loss": 2.466, "loss_": 0.7849, "moe_loss": 0.1602, "moe_loss_longrong": 1.4048, "step": 1862 }, { "epoch": 0.36, "learning_rate": 1.4822669456456031e-05, "loss": 2.4872, "loss_": 0.8576, "moe_loss": 0.1602, "moe_loss_longrong": 1.4052, "step": 1869 }, { "epoch": 0.36, "learning_rate": 1.4784407498745394e-05, "loss": 2.4951, "loss_": 0.8778, "moe_loss": 0.1601, "moe_loss_longrong": 1.4052, "step": 1876 }, { "epoch": 0.36, "learning_rate": 1.4746054488947863e-05, "loss": 2.4876, "loss_": 0.8237, "moe_loss": 0.1602, "moe_loss_longrong": 1.405, "step": 1883 }, { "epoch": 0.36, "learning_rate": 1.470761115695979e-05, "loss": 2.4986, "loss_": 0.9971, "moe_loss": 0.1602, "moe_loss_longrong": 1.4045, "step": 1890 }, { "epoch": 0.36, "learning_rate": 1.4669078234396454e-05, "loss": 2.4678, "loss_": 1.1283, "moe_loss": 0.1602, "moe_loss_longrong": 1.4043, "step": 1897 }, { "epoch": 0.37, "learning_rate": 1.4630456454578122e-05, "loss": 2.516, "loss_": 0.9592, "moe_loss": 0.1602, "moe_loss_longrong": 1.4045, "step": 1904 }, { "epoch": 0.37, "learning_rate": 1.4591746552516109e-05, "loss": 2.5208, "loss_": 1.0451, "moe_loss": 0.1607, "moe_loss_longrong": 1.4182, "step": 1911 }, { "epoch": 0.37, "learning_rate": 1.4552949264898795e-05, "loss": 2.498, "loss_": 0.8697, "moe_loss": 0.1601, "moe_loss_longrong": 1.4045, "step": 1918 }, { "epoch": 0.37, "learning_rate": 1.4514065330077575e-05, "loss": 2.5174, "loss_": 0.8274, "moe_loss": 0.1601, "moe_loss_longrong": 1.4044, "step": 1925 }, { "epoch": 0.37, "learning_rate": 1.4475095488052843e-05, "loss": 2.5038, "loss_": 0.7792, "moe_loss": 0.1602, "moe_loss_longrong": 1.4052, "step": 1932 }, { "epoch": 0.37, "learning_rate": 1.4436040480459891e-05, "loss": 2.5116, "loss_": 0.9444, "moe_loss": 0.1602, "moe_loss_longrong": 1.4044, "step": 1939 }, { "epoch": 0.37, "learning_rate": 1.4396901050554794e-05, "loss": 2.4786, "loss_": 1.0648, "moe_loss": 0.1602, "moe_loss_longrong": 1.4047, "step": 1946 }, { "epoch": 0.38, "learning_rate": 1.435767794320027e-05, "loss": 2.4987, "loss_": 1.0158, "moe_loss": 0.1602, "moe_loss_longrong": 1.4046, "step": 1953 }, { "epoch": 0.38, "learning_rate": 1.4318371904851502e-05, "loss": 2.5188, "loss_": 1.1058, "moe_loss": 0.1602, "moe_loss_longrong": 1.4045, "step": 1960 }, { "epoch": 0.38, "learning_rate": 1.4278983683541934e-05, "loss": 2.491, "loss_": 1.1232, "moe_loss": 0.1606, "moe_loss_longrong": 1.4187, "step": 1967 }, { "epoch": 0.38, "learning_rate": 1.4239514028869032e-05, "loss": 2.487, "loss_": 0.9791, "moe_loss": 0.1602, "moe_loss_longrong": 1.4039, "step": 1974 }, { "epoch": 0.38, "learning_rate": 1.4199963691980027e-05, "loss": 2.492, "loss_": 1.0493, "moe_loss": 0.1602, "moe_loss_longrong": 1.4046, "step": 1981 }, { "epoch": 0.38, "learning_rate": 1.4160333425557616e-05, "loss": 2.5256, "loss_": 0.6311, "moe_loss": 0.1602, "moe_loss_longrong": 1.4039, "step": 1988 }, { "epoch": 0.38, "learning_rate": 1.4120623983805617e-05, "loss": 2.502, "loss_": 0.8536, "moe_loss": 0.1602, "moe_loss_longrong": 1.4037, "step": 1995 }, { "epoch": 0.39, "learning_rate": 1.408083612243465e-05, "loss": 2.4939, "loss_": 1.0558, "moe_loss": 0.1605, "moe_loss_longrong": 1.4185, "step": 2002 }, { "epoch": 0.39, "learning_rate": 1.4040970598647742e-05, "loss": 2.4975, "loss_": 0.9278, "moe_loss": 0.1606, "moe_loss_longrong": 1.4179, "step": 2009 }, { "epoch": 0.39, "learning_rate": 1.40010281711259e-05, "loss": 2.4624, "loss_": 0.8695, "moe_loss": 0.1602, "moe_loss_longrong": 1.4036, "step": 2016 }, { "epoch": 0.39, "learning_rate": 1.3961009600013702e-05, "loss": 2.4981, "loss_": 0.9502, "moe_loss": 0.1602, "moe_loss_longrong": 1.4041, "step": 2023 }, { "epoch": 0.39, "learning_rate": 1.39209156469048e-05, "loss": 2.4973, "loss_": 1.0486, "moe_loss": 0.1602, "moe_loss_longrong": 1.4037, "step": 2030 }, { "epoch": 0.39, "learning_rate": 1.3880747074827454e-05, "loss": 2.498, "loss_": 1.0935, "moe_loss": 0.1602, "moe_loss_longrong": 1.4031, "step": 2037 }, { "epoch": 0.39, "learning_rate": 1.384050464822999e-05, "loss": 2.4956, "loss_": 0.978, "moe_loss": 0.1602, "moe_loss_longrong": 1.4045, "step": 2044 }, { "epoch": 0.39, "learning_rate": 1.3800189132966257e-05, "loss": 2.4826, "loss_": 0.9682, "moe_loss": 0.1602, "moe_loss_longrong": 1.4039, "step": 2051 }, { "epoch": 0.4, "learning_rate": 1.3759801296281072e-05, "loss": 2.499, "loss_": 0.8618, "moe_loss": 0.1606, "moe_loss_longrong": 1.4181, "step": 2058 }, { "epoch": 0.4, "learning_rate": 1.371934190679558e-05, "loss": 2.4876, "loss_": 0.7575, "moe_loss": 0.1602, "moe_loss_longrong": 1.4032, "step": 2065 }, { "epoch": 0.4, "learning_rate": 1.3678811734492659e-05, "loss": 2.4821, "loss_": 0.8992, "moe_loss": 0.1602, "moe_loss_longrong": 1.4031, "step": 2072 }, { "epoch": 0.4, "learning_rate": 1.3638211550702256e-05, "loss": 2.4975, "loss_": 0.9085, "moe_loss": 0.1601, "moe_loss_longrong": 1.4031, "step": 2079 }, { "epoch": 0.4, "learning_rate": 1.3597542128086702e-05, "loss": 2.4958, "loss_": 1.1546, "moe_loss": 0.1602, "moe_loss_longrong": 1.403, "step": 2086 }, { "epoch": 0.4, "learning_rate": 1.3556804240626019e-05, "loss": 2.5323, "loss_": 1.0748, "moe_loss": 0.1602, "moe_loss_longrong": 1.4033, "step": 2093 }, { "epoch": 0.4, "learning_rate": 1.3515998663603174e-05, "loss": 2.5085, "loss_": 1.1199, "moe_loss": 0.1602, "moe_loss_longrong": 1.4034, "step": 2100 }, { "epoch": 0.41, "learning_rate": 1.3475126173589343e-05, "loss": 2.4864, "loss_": 0.8556, "moe_loss": 0.1601, "moe_loss_longrong": 1.4029, "step": 2107 }, { "epoch": 0.41, "learning_rate": 1.3434187548429126e-05, "loss": 2.5068, "loss_": 0.946, "moe_loss": 0.1602, "moe_loss_longrong": 1.4031, "step": 2114 }, { "epoch": 0.41, "learning_rate": 1.3393183567225724e-05, "loss": 2.4837, "loss_": 1.1161, "moe_loss": 0.1601, "moe_loss_longrong": 1.4029, "step": 2121 }, { "epoch": 0.41, "learning_rate": 1.3352115010326155e-05, "loss": 2.4825, "loss_": 0.6543, "moe_loss": 0.1602, "moe_loss_longrong": 1.4027, "step": 2128 }, { "epoch": 0.41, "learning_rate": 1.3310982659306352e-05, "loss": 2.5257, "loss_": 1.2189, "moe_loss": 0.1601, "moe_loss_longrong": 1.4026, "step": 2135 }, { "epoch": 0.41, "learning_rate": 1.3269787296956333e-05, "loss": 2.4993, "loss_": 0.9341, "moe_loss": 0.1601, "moe_loss_longrong": 1.4028, "step": 2142 }, { "epoch": 0.41, "learning_rate": 1.3228529707265279e-05, "loss": 2.4981, "loss_": 1.102, "moe_loss": 0.1602, "moe_loss_longrong": 1.4029, "step": 2149 }, { "epoch": 0.41, "learning_rate": 1.3187210675406617e-05, "loss": 2.5076, "loss_": 0.6091, "moe_loss": 0.1602, "moe_loss_longrong": 1.4026, "step": 2156 }, { "epoch": 0.42, "learning_rate": 1.3145830987723081e-05, "loss": 2.4946, "loss_": 0.972, "moe_loss": 0.1601, "moe_loss_longrong": 1.4025, "step": 2163 }, { "epoch": 0.42, "learning_rate": 1.3104391431711748e-05, "loss": 2.471, "loss_": 0.8826, "moe_loss": 0.1602, "moe_loss_longrong": 1.4026, "step": 2170 }, { "epoch": 0.42, "learning_rate": 1.306289279600905e-05, "loss": 2.4847, "loss_": 1.1855, "moe_loss": 0.1601, "moe_loss_longrong": 1.4025, "step": 2177 }, { "epoch": 0.42, "learning_rate": 1.3021335870375763e-05, "loss": 2.505, "loss_": 1.0819, "moe_loss": 0.1601, "moe_loss_longrong": 1.4025, "step": 2184 }, { "epoch": 0.42, "learning_rate": 1.297972144568198e-05, "loss": 2.4909, "loss_": 0.8074, "moe_loss": 0.1602, "moe_loss_longrong": 1.4024, "step": 2191 }, { "epoch": 0.42, "learning_rate": 1.2938050313892062e-05, "loss": 2.4929, "loss_": 1.0944, "moe_loss": 0.1607, "moe_loss_longrong": 1.4177, "step": 2198 }, { "epoch": 0.42, "learning_rate": 1.289632326804956e-05, "loss": 2.4747, "loss_": 0.8172, "moe_loss": 0.1602, "moe_loss_longrong": 1.4027, "step": 2205 }, { "epoch": 0.43, "learning_rate": 1.2854541102262119e-05, "loss": 2.4782, "loss_": 0.8552, "moe_loss": 0.1602, "moe_loss_longrong": 1.4028, "step": 2212 }, { "epoch": 0.43, "learning_rate": 1.2812704611686386e-05, "loss": 2.4825, "loss_": 0.9202, "moe_loss": 0.1601, "moe_loss_longrong": 1.402, "step": 2219 }, { "epoch": 0.43, "learning_rate": 1.2770814592512853e-05, "loss": 2.4951, "loss_": 1.1396, "moe_loss": 0.1601, "moe_loss_longrong": 1.4022, "step": 2226 }, { "epoch": 0.43, "learning_rate": 1.2728871841950719e-05, "loss": 2.4628, "loss_": 0.9138, "moe_loss": 0.1601, "moe_loss_longrong": 1.4025, "step": 2233 }, { "epoch": 0.43, "learning_rate": 1.2686877158212715e-05, "loss": 2.5028, "loss_": 0.8915, "moe_loss": 0.1602, "moe_loss_longrong": 1.402, "step": 2240 }, { "epoch": 0.43, "learning_rate": 1.2644831340499906e-05, "loss": 2.4802, "loss_": 1.3262, "moe_loss": 0.1601, "moe_loss_longrong": 1.4022, "step": 2247 }, { "epoch": 0.43, "learning_rate": 1.2602735188986498e-05, "loss": 2.4888, "loss_": 1.1958, "moe_loss": 0.1601, "moe_loss_longrong": 1.4025, "step": 2254 }, { "epoch": 0.43, "learning_rate": 1.2560589504804592e-05, "loss": 2.4964, "loss_": 1.0784, "moe_loss": 0.1602, "moe_loss_longrong": 1.4019, "step": 2261 }, { "epoch": 0.44, "learning_rate": 1.2518395090028952e-05, "loss": 2.4972, "loss_": 1.164, "moe_loss": 0.1601, "moe_loss_longrong": 1.402, "step": 2268 }, { "epoch": 0.44, "learning_rate": 1.2476152747661727e-05, "loss": 2.5173, "loss_": 1.083, "moe_loss": 0.1601, "moe_loss_longrong": 1.4018, "step": 2275 }, { "epoch": 0.44, "learning_rate": 1.243386328161718e-05, "loss": 2.5094, "loss_": 1.1749, "moe_loss": 0.1602, "moe_loss_longrong": 1.4021, "step": 2282 }, { "epoch": 0.44, "learning_rate": 1.2391527496706389e-05, "loss": 2.5007, "loss_": 1.2048, "moe_loss": 0.1602, "moe_loss_longrong": 1.4019, "step": 2289 }, { "epoch": 0.44, "learning_rate": 1.2349146198621917e-05, "loss": 2.4613, "loss_": 0.9356, "moe_loss": 0.1601, "moe_loss_longrong": 1.4018, "step": 2296 }, { "epoch": 0.44, "learning_rate": 1.23067201939225e-05, "loss": 2.522, "loss_": 1.3161, "moe_loss": 0.1601, "moe_loss_longrong": 1.4019, "step": 2303 }, { "epoch": 0.44, "learning_rate": 1.2264250290017675e-05, "loss": 2.4876, "loss_": 0.9183, "moe_loss": 0.1601, "moe_loss_longrong": 1.4018, "step": 2310 }, { "epoch": 0.45, "learning_rate": 1.222173729515243e-05, "loss": 2.4852, "loss_": 1.0262, "moe_loss": 0.1601, "moe_loss_longrong": 1.4013, "step": 2317 }, { "epoch": 0.45, "learning_rate": 1.217918201839182e-05, "loss": 2.4974, "loss_": 0.9078, "moe_loss": 0.1602, "moe_loss_longrong": 1.4019, "step": 2324 }, { "epoch": 0.45, "learning_rate": 1.2136585269605558e-05, "loss": 2.4873, "loss_": 1.063, "moe_loss": 0.1601, "moe_loss_longrong": 1.4015, "step": 2331 }, { "epoch": 0.45, "learning_rate": 1.209394785945263e-05, "loss": 2.4491, "loss_": 0.7031, "moe_loss": 0.1602, "moe_loss_longrong": 1.4026, "step": 2338 }, { "epoch": 0.45, "learning_rate": 1.2051270599365825e-05, "loss": 2.5059, "loss_": 1.0756, "moe_loss": 0.1602, "moe_loss_longrong": 1.4012, "step": 2345 }, { "epoch": 0.45, "learning_rate": 1.2008554301536328e-05, "loss": 2.4821, "loss_": 0.508, "moe_loss": 0.1601, "moe_loss_longrong": 1.4018, "step": 2352 }, { "epoch": 0.45, "learning_rate": 1.1965799778898258e-05, "loss": 2.4776, "loss_": 1.0053, "moe_loss": 0.1602, "moe_loss_longrong": 1.4016, "step": 2359 }, { "epoch": 0.46, "learning_rate": 1.1923007845113178e-05, "loss": 2.512, "loss_": 0.6722, "moe_loss": 0.1601, "moe_loss_longrong": 1.4016, "step": 2366 }, { "epoch": 0.46, "learning_rate": 1.1880179314554629e-05, "loss": 2.4488, "loss_": 0.4041, "moe_loss": 0.1602, "moe_loss_longrong": 1.402, "step": 2373 }, { "epoch": 0.46, "learning_rate": 1.1837315002292629e-05, "loss": 2.4889, "loss_": 1.1084, "moe_loss": 0.1601, "moe_loss_longrong": 1.401, "step": 2380 }, { "epoch": 0.46, "learning_rate": 1.1794415724078147e-05, "loss": 2.4732, "loss_": 0.6909, "moe_loss": 0.1602, "moe_loss_longrong": 1.4019, "step": 2387 }, { "epoch": 0.46, "learning_rate": 1.17514822963276e-05, "loss": 2.4599, "loss_": 1.0441, "moe_loss": 0.1602, "moe_loss_longrong": 1.4013, "step": 2394 }, { "epoch": 0.46, "learning_rate": 1.1708515536107299e-05, "loss": 2.472, "loss_": 0.7234, "moe_loss": 0.1601, "moe_loss_longrong": 1.4012, "step": 2401 }, { "epoch": 0.46, "learning_rate": 1.1665516261117914e-05, "loss": 2.4923, "loss_": 1.2036, "moe_loss": 0.1601, "moe_loss_longrong": 1.4014, "step": 2408 }, { "epoch": 0.46, "learning_rate": 1.1622485289678886e-05, "loss": 2.4794, "loss_": 0.9414, "moe_loss": 0.1601, "moe_loss_longrong": 1.4011, "step": 2415 }, { "epoch": 0.47, "learning_rate": 1.1579423440712887e-05, "loss": 2.4873, "loss_": 0.8799, "moe_loss": 0.1601, "moe_loss_longrong": 1.4007, "step": 2422 }, { "epoch": 0.47, "learning_rate": 1.153633153373022e-05, "loss": 2.4685, "loss_": 0.8351, "moe_loss": 0.1605, "moe_loss_longrong": 1.414, "step": 2429 }, { "epoch": 0.47, "learning_rate": 1.149321038881321e-05, "loss": 2.4965, "loss_": 1.0812, "moe_loss": 0.1601, "moe_loss_longrong": 1.4007, "step": 2436 }, { "epoch": 0.47, "learning_rate": 1.1450060826600618e-05, "loss": 2.467, "loss_": 1.0899, "moe_loss": 0.1601, "moe_loss_longrong": 1.4007, "step": 2443 }, { "epoch": 0.47, "learning_rate": 1.1406883668272015e-05, "loss": 2.5148, "loss_": 0.9878, "moe_loss": 0.1602, "moe_loss_longrong": 1.4008, "step": 2450 }, { "epoch": 0.47, "learning_rate": 1.1363679735532151e-05, "loss": 2.4869, "loss_": 1.0094, "moe_loss": 0.1601, "moe_loss_longrong": 1.4004, "step": 2457 }, { "epoch": 0.47, "learning_rate": 1.132044985059532e-05, "loss": 2.4687, "loss_": 0.8133, "moe_loss": 0.1601, "moe_loss_longrong": 1.4004, "step": 2464 }, { "epoch": 0.48, "learning_rate": 1.1277194836169714e-05, "loss": 2.4692, "loss_": 1.154, "moe_loss": 0.1601, "moe_loss_longrong": 1.401, "step": 2471 }, { "epoch": 0.48, "learning_rate": 1.1233915515441765e-05, "loss": 2.4768, "loss_": 0.9273, "moe_loss": 0.1601, "moe_loss_longrong": 1.4005, "step": 2478 }, { "epoch": 0.48, "learning_rate": 1.1190612712060475e-05, "loss": 2.465, "loss_": 0.9635, "moe_loss": 0.1601, "moe_loss_longrong": 1.4005, "step": 2485 }, { "epoch": 0.48, "learning_rate": 1.1147287250121745e-05, "loss": 2.5032, "loss_": 1.3144, "moe_loss": 0.1601, "moe_loss_longrong": 1.4006, "step": 2492 }, { "epoch": 0.48, "learning_rate": 1.11039399541527e-05, "loss": 2.4839, "loss_": 0.852, "moe_loss": 0.1601, "moe_loss_longrong": 1.4001, "step": 2499 }, { "epoch": 0.48, "learning_rate": 1.1060571649095972e-05, "loss": 2.4618, "loss_": 0.816, "moe_loss": 0.1605, "moe_loss_longrong": 1.4135, "step": 2506 }, { "epoch": 0.48, "learning_rate": 1.1017183160294033e-05, "loss": 2.5082, "loss_": 0.7247, "moe_loss": 0.1602, "moe_loss_longrong": 1.4005, "step": 2513 }, { "epoch": 0.48, "learning_rate": 1.0973775313473465e-05, "loss": 2.5026, "loss_": 0.9287, "moe_loss": 0.1601, "moe_loss_longrong": 1.4007, "step": 2520 }, { "epoch": 0.49, "learning_rate": 1.0930348934729249e-05, "loss": 2.4564, "loss_": 1.0246, "moe_loss": 0.1601, "moe_loss_longrong": 1.4009, "step": 2527 }, { "epoch": 0.49, "learning_rate": 1.0886904850509052e-05, "loss": 2.5123, "loss_": 1.1915, "moe_loss": 0.1601, "moe_loss_longrong": 1.4005, "step": 2534 }, { "epoch": 0.49, "learning_rate": 1.0843443887597495e-05, "loss": 2.4786, "loss_": 0.9271, "moe_loss": 0.1601, "moe_loss_longrong": 1.4004, "step": 2541 }, { "epoch": 0.49, "learning_rate": 1.0799966873100419e-05, "loss": 2.4941, "loss_": 1.2428, "moe_loss": 0.1601, "moe_loss_longrong": 1.4007, "step": 2548 }, { "epoch": 0.49, "learning_rate": 1.0756474634429133e-05, "loss": 2.4861, "loss_": 1.1406, "moe_loss": 0.1601, "moe_loss_longrong": 1.4004, "step": 2555 }, { "epoch": 0.49, "learning_rate": 1.0712967999284682e-05, "loss": 2.474, "loss_": 1.0874, "moe_loss": 0.1601, "moe_loss_longrong": 1.4006, "step": 2562 }, { "epoch": 0.49, "learning_rate": 1.0669447795642103e-05, "loss": 2.478, "loss_": 1.2379, "moe_loss": 0.1602, "moe_loss_longrong": 1.4004, "step": 2569 }, { "epoch": 0.5, "learning_rate": 1.0625914851734632e-05, "loss": 2.4567, "loss_": 1.0187, "moe_loss": 0.1601, "moe_loss_longrong": 1.4004, "step": 2576 }, { "epoch": 0.5, "learning_rate": 1.0582369996037985e-05, "loss": 2.4762, "loss_": 0.9279, "moe_loss": 0.1602, "moe_loss_longrong": 1.4007, "step": 2583 }, { "epoch": 0.5, "learning_rate": 1.053881405725456e-05, "loss": 2.4869, "loss_": 0.8599, "moe_loss": 0.1601, "moe_loss_longrong": 1.4, "step": 2590 }, { "epoch": 0.5, "learning_rate": 1.0495247864297684e-05, "loss": 2.5043, "loss_": 1.0374, "moe_loss": 0.1601, "moe_loss_longrong": 1.4, "step": 2597 }, { "epoch": 0.5, "learning_rate": 1.0451672246275826e-05, "loss": 2.4675, "loss_": 0.57, "moe_loss": 0.1602, "moe_loss_longrong": 1.4005, "step": 2604 }, { "epoch": 0.5, "learning_rate": 1.0408088032476822e-05, "loss": 2.4752, "loss_": 0.5417, "moe_loss": 0.1602, "moe_loss_longrong": 1.4003, "step": 2611 }, { "epoch": 0.5, "learning_rate": 1.036449605235211e-05, "loss": 2.4568, "loss_": 1.056, "moe_loss": 0.1601, "moe_loss_longrong": 1.3999, "step": 2618 }, { "epoch": 0.51, "learning_rate": 1.0320897135500904e-05, "loss": 2.4658, "loss_": 0.8557, "moe_loss": 0.1602, "moe_loss_longrong": 1.4, "step": 2625 }, { "epoch": 0.51, "learning_rate": 1.0277292111654447e-05, "loss": 2.4553, "loss_": 0.9975, "moe_loss": 0.1601, "moe_loss_longrong": 1.3999, "step": 2632 }, { "epoch": 0.51, "learning_rate": 1.0233681810660207e-05, "loss": 2.4815, "loss_": 1.0191, "moe_loss": 0.1601, "moe_loss_longrong": 1.4004, "step": 2639 }, { "epoch": 0.51, "learning_rate": 1.019006706246607e-05, "loss": 2.4735, "loss_": 1.0673, "moe_loss": 0.1601, "moe_loss_longrong": 1.4001, "step": 2646 }, { "epoch": 0.51, "learning_rate": 1.0146448697104561e-05, "loss": 2.458, "loss_": 1.2381, "moe_loss": 0.1601, "moe_loss_longrong": 1.4, "step": 2653 }, { "epoch": 0.51, "learning_rate": 1.010282754467705e-05, "loss": 2.463, "loss_": 1.1428, "moe_loss": 0.1601, "moe_loss_longrong": 1.3997, "step": 2660 }, { "epoch": 0.51, "learning_rate": 1.0059204435337938e-05, "loss": 2.483, "loss_": 1.2124, "moe_loss": 0.1601, "moe_loss_longrong": 1.3999, "step": 2667 }, { "epoch": 0.51, "learning_rate": 1.0015580199278873e-05, "loss": 2.4907, "loss_": 0.8323, "moe_loss": 0.1601, "moe_loss_longrong": 1.3995, "step": 2674 }, { "epoch": 0.52, "learning_rate": 9.971955666712945e-06, "loss": 2.4936, "loss_": 1.1091, "moe_loss": 0.1601, "moe_loss_longrong": 1.3998, "step": 2681 }, { "epoch": 0.52, "learning_rate": 9.928331667858886e-06, "loss": 2.5039, "loss_": 1.0505, "moe_loss": 0.1601, "moe_loss_longrong": 1.3998, "step": 2688 }, { "epoch": 0.52, "learning_rate": 9.884709032925274e-06, "loss": 2.4704, "loss_": 0.9685, "moe_loss": 0.1602, "moe_loss_longrong": 1.3998, "step": 2695 }, { "epoch": 0.52, "learning_rate": 9.841088592094726e-06, "loss": 2.4897, "loss_": 1.2011, "moe_loss": 0.1601, "moe_loss_longrong": 1.3993, "step": 2702 }, { "epoch": 0.52, "learning_rate": 9.797471175508101e-06, "loss": 2.4642, "loss_": 1.064, "moe_loss": 0.1601, "moe_loss_longrong": 1.3997, "step": 2709 }, { "epoch": 0.52, "learning_rate": 9.753857613248714e-06, "loss": 2.4746, "loss_": 1.089, "moe_loss": 0.1601, "moe_loss_longrong": 1.3995, "step": 2716 }, { "epoch": 0.52, "learning_rate": 9.710248735326519e-06, "loss": 2.4767, "loss_": 0.7312, "moe_loss": 0.1601, "moe_loss_longrong": 1.3996, "step": 2723 }, { "epoch": 0.53, "learning_rate": 9.666645371662324e-06, "loss": 2.4693, "loss_": 1.0271, "moe_loss": 0.1601, "moe_loss_longrong": 1.3994, "step": 2730 }, { "epoch": 0.53, "learning_rate": 9.623048352071998e-06, "loss": 2.4631, "loss_": 0.7867, "moe_loss": 0.1601, "moe_loss_longrong": 1.3992, "step": 2737 }, { "epoch": 0.53, "learning_rate": 9.579458506250668e-06, "loss": 2.4744, "loss_": 1.1123, "moe_loss": 0.1602, "moe_loss_longrong": 1.3996, "step": 2744 }, { "epoch": 0.53, "learning_rate": 9.535876663756955e-06, "loss": 2.4836, "loss_": 0.9437, "moe_loss": 0.1601, "moe_loss_longrong": 1.3994, "step": 2751 }, { "epoch": 0.53, "learning_rate": 9.492303653997146e-06, "loss": 2.4822, "loss_": 0.9857, "moe_loss": 0.1601, "moe_loss_longrong": 1.399, "step": 2758 }, { "epoch": 0.53, "learning_rate": 9.448740306209447e-06, "loss": 2.4816, "loss_": 0.9489, "moe_loss": 0.1601, "moe_loss_longrong": 1.3991, "step": 2765 }, { "epoch": 0.53, "learning_rate": 9.40518744944818e-06, "loss": 2.4744, "loss_": 0.6401, "moe_loss": 0.1601, "moe_loss_longrong": 1.3996, "step": 2772 }, { "epoch": 0.53, "learning_rate": 9.361645912568015e-06, "loss": 2.4736, "loss_": 0.8008, "moe_loss": 0.1602, "moe_loss_longrong": 1.3996, "step": 2779 }, { "epoch": 0.54, "learning_rate": 9.318116524208198e-06, "loss": 2.4719, "loss_": 0.9666, "moe_loss": 0.1601, "moe_loss_longrong": 1.3997, "step": 2786 }, { "epoch": 0.54, "learning_rate": 9.27460011277677e-06, "loss": 2.4865, "loss_": 1.0383, "moe_loss": 0.1601, "moe_loss_longrong": 1.3994, "step": 2793 }, { "epoch": 0.54, "learning_rate": 9.231097506434808e-06, "loss": 2.4683, "loss_": 0.807, "moe_loss": 0.1601, "moe_loss_longrong": 1.3995, "step": 2800 }, { "epoch": 0.54, "learning_rate": 9.187609533080668e-06, "loss": 2.4738, "loss_": 1.0131, "moe_loss": 0.1601, "moe_loss_longrong": 1.3992, "step": 2807 }, { "epoch": 0.54, "learning_rate": 9.144137020334214e-06, "loss": 2.4559, "loss_": 0.9178, "moe_loss": 0.1601, "moe_loss_longrong": 1.3994, "step": 2814 }, { "epoch": 0.54, "learning_rate": 9.100680795521104e-06, "loss": 2.4832, "loss_": 0.8958, "moe_loss": 0.1601, "moe_loss_longrong": 1.3993, "step": 2821 }, { "epoch": 0.54, "learning_rate": 9.057241685656995e-06, "loss": 2.4729, "loss_": 0.8244, "moe_loss": 0.1601, "moe_loss_longrong": 1.399, "step": 2828 }, { "epoch": 0.55, "learning_rate": 9.013820517431841e-06, "loss": 2.4458, "loss_": 0.6857, "moe_loss": 0.1601, "moe_loss_longrong": 1.3989, "step": 2835 }, { "epoch": 0.55, "learning_rate": 8.970418117194146e-06, "loss": 2.4789, "loss_": 0.8677, "moe_loss": 0.1601, "moe_loss_longrong": 1.3991, "step": 2842 }, { "epoch": 0.55, "learning_rate": 8.927035310935241e-06, "loss": 2.4633, "loss_": 0.755, "moe_loss": 0.1601, "moe_loss_longrong": 1.399, "step": 2849 }, { "epoch": 0.55, "learning_rate": 8.883672924273566e-06, "loss": 2.481, "loss_": 0.9947, "moe_loss": 0.1601, "moe_loss_longrong": 1.399, "step": 2856 }, { "epoch": 0.55, "learning_rate": 8.840331782438954e-06, "loss": 2.4821, "loss_": 0.6639, "moe_loss": 0.1601, "moe_loss_longrong": 1.3988, "step": 2863 }, { "epoch": 0.55, "learning_rate": 8.797012710256923e-06, "loss": 2.4683, "loss_": 1.2205, "moe_loss": 0.1601, "moe_loss_longrong": 1.3992, "step": 2870 }, { "epoch": 0.55, "learning_rate": 8.753716532132992e-06, "loss": 2.4611, "loss_": 0.9415, "moe_loss": 0.1601, "moe_loss_longrong": 1.3989, "step": 2877 }, { "epoch": 0.55, "learning_rate": 8.71044407203697e-06, "loss": 2.491, "loss_": 0.9864, "moe_loss": 0.1601, "moe_loss_longrong": 1.399, "step": 2884 }, { "epoch": 0.56, "learning_rate": 8.667196153487308e-06, "loss": 2.4726, "loss_": 0.865, "moe_loss": 0.1601, "moe_loss_longrong": 1.3986, "step": 2891 }, { "epoch": 0.56, "learning_rate": 8.623973599535385e-06, "loss": 2.457, "loss_": 0.8526, "moe_loss": 0.1601, "moe_loss_longrong": 1.399, "step": 2898 }, { "epoch": 0.56, "learning_rate": 8.580777232749883e-06, "loss": 2.4576, "loss_": 1.0116, "moe_loss": 0.1601, "moe_loss_longrong": 1.3988, "step": 2905 }, { "epoch": 0.56, "learning_rate": 8.537607875201106e-06, "loss": 2.5113, "loss_": 0.8642, "moe_loss": 0.1601, "moe_loss_longrong": 1.3987, "step": 2912 }, { "epoch": 0.56, "learning_rate": 8.494466348445345e-06, "loss": 2.4787, "loss_": 1.0994, "moe_loss": 0.1601, "moe_loss_longrong": 1.3988, "step": 2919 }, { "epoch": 0.56, "learning_rate": 8.451353473509254e-06, "loss": 2.4797, "loss_": 1.1009, "moe_loss": 0.1601, "moe_loss_longrong": 1.3983, "step": 2926 }, { "epoch": 0.56, "learning_rate": 8.408270070874201e-06, "loss": 2.4709, "loss_": 0.8487, "moe_loss": 0.1601, "moe_loss_longrong": 1.3984, "step": 2933 }, { "epoch": 0.57, "learning_rate": 8.365216960460675e-06, "loss": 2.5019, "loss_": 0.9758, "moe_loss": 0.1601, "moe_loss_longrong": 1.3987, "step": 2940 }, { "epoch": 0.57, "learning_rate": 8.322194961612668e-06, "loss": 2.4919, "loss_": 0.9281, "moe_loss": 0.1601, "moe_loss_longrong": 1.3986, "step": 2947 }, { "epoch": 0.57, "learning_rate": 8.279204893082083e-06, "loss": 2.4788, "loss_": 0.9675, "moe_loss": 0.1601, "moe_loss_longrong": 1.3986, "step": 2954 }, { "epoch": 0.57, "learning_rate": 8.23624757301318e-06, "loss": 2.4796, "loss_": 0.9676, "moe_loss": 0.1601, "moe_loss_longrong": 1.3986, "step": 2961 }, { "epoch": 0.57, "learning_rate": 8.193323818926955e-06, "loss": 2.4471, "loss_": 0.3893, "moe_loss": 0.1601, "moe_loss_longrong": 1.3986, "step": 2968 }, { "epoch": 0.57, "learning_rate": 8.150434447705623e-06, "loss": 2.4644, "loss_": 1.0186, "moe_loss": 0.1601, "moe_loss_longrong": 1.3985, "step": 2975 }, { "epoch": 0.57, "learning_rate": 8.107580275577059e-06, "loss": 2.4517, "loss_": 1.2343, "moe_loss": 0.1601, "moe_loss_longrong": 1.3985, "step": 2982 }, { "epoch": 0.58, "learning_rate": 8.064762118099258e-06, "loss": 2.4524, "loss_": 0.7846, "moe_loss": 0.1601, "moe_loss_longrong": 1.3985, "step": 2989 }, { "epoch": 0.58, "learning_rate": 8.021980790144828e-06, "loss": 2.4626, "loss_": 1.0468, "moe_loss": 0.1601, "moe_loss_longrong": 1.3988, "step": 2996 }, { "epoch": 0.58, "learning_rate": 7.979237105885467e-06, "loss": 2.4822, "loss_": 0.5538, "moe_loss": 0.1601, "moe_loss_longrong": 1.3985, "step": 3003 }, { "epoch": 0.58, "learning_rate": 7.936531878776484e-06, "loss": 2.4753, "loss_": 1.1616, "moe_loss": 0.1601, "moe_loss_longrong": 1.3983, "step": 3010 }, { "epoch": 0.58, "learning_rate": 7.893865921541294e-06, "loss": 2.4418, "loss_": 0.5863, "moe_loss": 0.1601, "moe_loss_longrong": 1.3987, "step": 3017 }, { "epoch": 0.58, "learning_rate": 7.85124004615598e-06, "loss": 2.4724, "loss_": 0.9406, "moe_loss": 0.1601, "moe_loss_longrong": 1.3982, "step": 3024 }, { "epoch": 0.58, "learning_rate": 7.808655063833832e-06, "loss": 2.4884, "loss_": 1.0104, "moe_loss": 0.1601, "moe_loss_longrong": 1.3989, "step": 3031 }, { "epoch": 0.58, "learning_rate": 7.766111785009888e-06, "loss": 2.4676, "loss_": 0.9396, "moe_loss": 0.1601, "moe_loss_longrong": 1.398, "step": 3038 }, { "epoch": 0.59, "learning_rate": 7.723611019325538e-06, "loss": 2.4705, "loss_": 0.9611, "moe_loss": 0.1601, "moe_loss_longrong": 1.3982, "step": 3045 }, { "epoch": 0.59, "learning_rate": 7.681153575613098e-06, "loss": 2.4555, "loss_": 0.931, "moe_loss": 0.1601, "moe_loss_longrong": 1.3986, "step": 3052 }, { "epoch": 0.59, "learning_rate": 7.638740261880423e-06, "loss": 2.4369, "loss_": 0.7901, "moe_loss": 0.1601, "moe_loss_longrong": 1.3985, "step": 3059 }, { "epoch": 0.59, "learning_rate": 7.596371885295542e-06, "loss": 2.4852, "loss_": 0.9128, "moe_loss": 0.1601, "moe_loss_longrong": 1.3981, "step": 3066 }, { "epoch": 0.59, "learning_rate": 7.55404925217127e-06, "loss": 2.5004, "loss_": 1.0571, "moe_loss": 0.1601, "moe_loss_longrong": 1.3983, "step": 3073 }, { "epoch": 0.59, "learning_rate": 7.511773167949885e-06, "loss": 2.4582, "loss_": 1.0777, "moe_loss": 0.1601, "moe_loss_longrong": 1.3986, "step": 3080 }, { "epoch": 0.59, "learning_rate": 7.46954443718779e-06, "loss": 2.4644, "loss_": 0.89, "moe_loss": 0.1601, "moe_loss_longrong": 1.3984, "step": 3087 }, { "epoch": 0.6, "learning_rate": 7.427363863540202e-06, "loss": 2.4668, "loss_": 1.0102, "moe_loss": 0.1601, "moe_loss_longrong": 1.3986, "step": 3094 }, { "epoch": 0.6, "learning_rate": 7.385232249745873e-06, "loss": 2.4733, "loss_": 0.6698, "moe_loss": 0.1601, "moe_loss_longrong": 1.3988, "step": 3101 }, { "epoch": 0.6, "learning_rate": 7.343150397611782e-06, "loss": 2.5122, "loss_": 1.2655, "moe_loss": 0.1601, "moe_loss_longrong": 1.3979, "step": 3108 }, { "epoch": 0.6, "learning_rate": 7.301119107997905e-06, "loss": 2.461, "loss_": 1.1851, "moe_loss": 0.1601, "moe_loss_longrong": 1.3978, "step": 3115 }, { "epoch": 0.6, "learning_rate": 7.2591391808019555e-06, "loss": 2.4727, "loss_": 0.8541, "moe_loss": 0.1601, "moe_loss_longrong": 1.3979, "step": 3122 }, { "epoch": 0.6, "learning_rate": 7.217211414944171e-06, "loss": 2.4443, "loss_": 1.0654, "moe_loss": 0.1601, "moe_loss_longrong": 1.3978, "step": 3129 }, { "epoch": 0.6, "learning_rate": 7.175336608352113e-06, "loss": 2.4922, "loss_": 1.184, "moe_loss": 0.1601, "moe_loss_longrong": 1.3982, "step": 3136 }, { "epoch": 0.6, "learning_rate": 7.133515557945463e-06, "loss": 2.4643, "loss_": 1.1851, "moe_loss": 0.1601, "moe_loss_longrong": 1.3981, "step": 3143 }, { "epoch": 0.61, "learning_rate": 7.091749059620881e-06, "loss": 2.4581, "loss_": 1.328, "moe_loss": 0.1601, "moe_loss_longrong": 1.3982, "step": 3150 }, { "epoch": 0.61, "learning_rate": 7.0500379082368305e-06, "loss": 2.4708, "loss_": 1.0728, "moe_loss": 0.1601, "moe_loss_longrong": 1.398, "step": 3157 }, { "epoch": 0.61, "learning_rate": 7.008382897598477e-06, "loss": 2.4901, "loss_": 0.9315, "moe_loss": 0.1606, "moe_loss_longrong": 1.4088, "step": 3164 }, { "epoch": 0.61, "learning_rate": 6.9667848204425785e-06, "loss": 2.4706, "loss_": 1.1113, "moe_loss": 0.1601, "moe_loss_longrong": 1.3979, "step": 3171 }, { "epoch": 0.61, "learning_rate": 6.9252444684223765e-06, "loss": 2.4442, "loss_": 0.7937, "moe_loss": 0.1601, "moe_loss_longrong": 1.3977, "step": 3178 }, { "epoch": 0.61, "learning_rate": 6.88376263209255e-06, "loss": 2.4406, "loss_": 1.1277, "moe_loss": 0.1601, "moe_loss_longrong": 1.3982, "step": 3185 }, { "epoch": 0.61, "learning_rate": 6.84234010089417e-06, "loss": 2.4761, "loss_": 0.9565, "moe_loss": 0.1601, "moe_loss_longrong": 1.3982, "step": 3192 }, { "epoch": 0.62, "learning_rate": 6.800977663139666e-06, "loss": 2.4832, "loss_": 0.7461, "moe_loss": 0.1605, "moe_loss_longrong": 1.4073, "step": 3199 }, { "epoch": 0.62, "learning_rate": 6.759676105997834e-06, "loss": 2.4752, "loss_": 1.1396, "moe_loss": 0.1601, "moe_loss_longrong": 1.3978, "step": 3206 }, { "epoch": 0.62, "learning_rate": 6.718436215478849e-06, "loss": 2.4594, "loss_": 1.1075, "moe_loss": 0.1601, "moe_loss_longrong": 1.3978, "step": 3213 }, { "epoch": 0.62, "learning_rate": 6.677258776419304e-06, "loss": 2.4703, "loss_": 0.9133, "moe_loss": 0.1601, "moe_loss_longrong": 1.3975, "step": 3220 }, { "epoch": 0.62, "learning_rate": 6.63614457246728e-06, "loss": 2.4534, "loss_": 0.9049, "moe_loss": 0.1601, "moe_loss_longrong": 1.3978, "step": 3227 }, { "epoch": 0.62, "learning_rate": 6.595094386067428e-06, "loss": 2.4945, "loss_": 1.1701, "moe_loss": 0.1601, "moe_loss_longrong": 1.3975, "step": 3234 }, { "epoch": 0.62, "learning_rate": 6.554108998446096e-06, "loss": 2.4832, "loss_": 0.9606, "moe_loss": 0.1601, "moe_loss_longrong": 1.3977, "step": 3241 }, { "epoch": 0.62, "learning_rate": 6.513189189596422e-06, "loss": 2.4639, "loss_": 1.0635, "moe_loss": 0.1601, "moe_loss_longrong": 1.3978, "step": 3248 }, { "epoch": 0.63, "learning_rate": 6.472335738263534e-06, "loss": 2.4609, "loss_": 0.9759, "moe_loss": 0.1601, "moe_loss_longrong": 1.3982, "step": 3255 }, { "epoch": 0.63, "learning_rate": 6.431549421929694e-06, "loss": 2.4641, "loss_": 0.7825, "moe_loss": 0.1601, "moe_loss_longrong": 1.398, "step": 3262 }, { "epoch": 0.63, "learning_rate": 6.390831016799527e-06, "loss": 2.458, "loss_": 0.8033, "moe_loss": 0.1601, "moe_loss_longrong": 1.3977, "step": 3269 }, { "epoch": 0.63, "learning_rate": 6.350181297785242e-06, "loss": 2.4584, "loss_": 1.0825, "moe_loss": 0.1601, "moe_loss_longrong": 1.3974, "step": 3276 }, { "epoch": 0.63, "learning_rate": 6.309601038491874e-06, "loss": 2.4911, "loss_": 0.7566, "moe_loss": 0.1601, "moe_loss_longrong": 1.3979, "step": 3283 }, { "epoch": 0.63, "learning_rate": 6.269091011202576e-06, "loss": 2.457, "loss_": 0.6181, "moe_loss": 0.1601, "moe_loss_longrong": 1.3977, "step": 3290 }, { "epoch": 0.63, "learning_rate": 6.2286519868639095e-06, "loss": 2.4458, "loss_": 1.0953, "moe_loss": 0.1601, "moe_loss_longrong": 1.3975, "step": 3297 }, { "epoch": 0.64, "learning_rate": 6.188284735071177e-06, "loss": 2.4848, "loss_": 0.9802, "moe_loss": 0.1601, "moe_loss_longrong": 1.3976, "step": 3304 }, { "epoch": 0.64, "learning_rate": 6.1479900240537956e-06, "loss": 2.4815, "loss_": 1.2048, "moe_loss": 0.1601, "moe_loss_longrong": 1.3977, "step": 3311 }, { "epoch": 0.64, "learning_rate": 6.107768620660633e-06, "loss": 2.4552, "loss_": 1.039, "moe_loss": 0.1601, "moe_loss_longrong": 1.3976, "step": 3318 }, { "epoch": 0.64, "learning_rate": 6.067621290345455e-06, "loss": 2.4365, "loss_": 1.2129, "moe_loss": 0.1601, "moe_loss_longrong": 1.3974, "step": 3325 }, { "epoch": 0.64, "learning_rate": 6.027548797152336e-06, "loss": 2.4546, "loss_": 0.6566, "moe_loss": 0.1601, "moe_loss_longrong": 1.3973, "step": 3332 }, { "epoch": 0.64, "learning_rate": 5.987551903701128e-06, "loss": 2.4409, "loss_": 1.0142, "moe_loss": 0.1605, "moe_loss_longrong": 1.408, "step": 3339 }, { "epoch": 0.64, "learning_rate": 5.947631371172943e-06, "loss": 2.4488, "loss_": 0.9704, "moe_loss": 0.1604, "moe_loss_longrong": 1.408, "step": 3346 }, { "epoch": 0.65, "learning_rate": 5.9077879592956675e-06, "loss": 2.4569, "loss_": 0.936, "moe_loss": 0.1601, "moe_loss_longrong": 1.3972, "step": 3353 }, { "epoch": 0.65, "learning_rate": 5.8680224263295045e-06, "loss": 2.4519, "loss_": 0.9728, "moe_loss": 0.1601, "moe_loss_longrong": 1.3972, "step": 3360 }, { "epoch": 0.65, "learning_rate": 5.828335529052541e-06, "loss": 2.4757, "loss_": 0.9242, "moe_loss": 0.1601, "moe_loss_longrong": 1.3972, "step": 3367 }, { "epoch": 0.65, "learning_rate": 5.788728022746348e-06, "loss": 2.4769, "loss_": 0.8005, "moe_loss": 0.1601, "moe_loss_longrong": 1.3976, "step": 3374 }, { "epoch": 0.65, "learning_rate": 5.749200661181611e-06, "loss": 2.4548, "loss_": 1.116, "moe_loss": 0.1601, "moe_loss_longrong": 1.3971, "step": 3381 }, { "epoch": 0.65, "learning_rate": 5.709754196603781e-06, "loss": 2.4687, "loss_": 0.8613, "moe_loss": 0.1601, "moe_loss_longrong": 1.3971, "step": 3388 }, { "epoch": 0.65, "learning_rate": 5.67038937971875e-06, "loss": 2.437, "loss_": 0.9275, "moe_loss": 0.1601, "moe_loss_longrong": 1.3974, "step": 3395 }, { "epoch": 0.65, "learning_rate": 5.631106959678575e-06, "loss": 2.4636, "loss_": 1.1476, "moe_loss": 0.1601, "moe_loss_longrong": 1.3974, "step": 3402 }, { "epoch": 0.66, "learning_rate": 5.5919076840672215e-06, "loss": 2.449, "loss_": 0.9428, "moe_loss": 0.1601, "moe_loss_longrong": 1.397, "step": 3409 }, { "epoch": 0.66, "learning_rate": 5.552792298886335e-06, "loss": 2.4572, "loss_": 0.8202, "moe_loss": 0.1601, "moe_loss_longrong": 1.3973, "step": 3416 }, { "epoch": 0.66, "learning_rate": 5.513761548541032e-06, "loss": 2.444, "loss_": 0.905, "moe_loss": 0.1601, "moe_loss_longrong": 1.3973, "step": 3423 }, { "epoch": 0.66, "learning_rate": 5.474816175825754e-06, "loss": 2.4189, "loss_": 1.1022, "moe_loss": 0.1601, "moe_loss_longrong": 1.3973, "step": 3430 }, { "epoch": 0.66, "learning_rate": 5.4359569219101115e-06, "loss": 2.5038, "loss_": 1.1099, "moe_loss": 0.1601, "moe_loss_longrong": 1.3969, "step": 3437 }, { "epoch": 0.66, "learning_rate": 5.397184526324792e-06, "loss": 2.4885, "loss_": 0.9227, "moe_loss": 0.1601, "moe_loss_longrong": 1.3973, "step": 3444 }, { "epoch": 0.66, "learning_rate": 5.358499726947488e-06, "loss": 2.4389, "loss_": 0.9602, "moe_loss": 0.1601, "moe_loss_longrong": 1.3971, "step": 3451 }, { "epoch": 0.67, "learning_rate": 5.31990325998883e-06, "loss": 2.4275, "loss_": 0.9191, "moe_loss": 0.1601, "moe_loss_longrong": 1.3969, "step": 3458 }, { "epoch": 0.67, "learning_rate": 5.281395859978414e-06, "loss": 2.4647, "loss_": 1.0229, "moe_loss": 0.1601, "moe_loss_longrong": 1.397, "step": 3465 }, { "epoch": 0.67, "learning_rate": 5.24297825975079e-06, "loss": 2.4649, "loss_": 0.9973, "moe_loss": 0.1601, "moe_loss_longrong": 1.3973, "step": 3472 }, { "epoch": 0.67, "learning_rate": 5.2046511904315265e-06, "loss": 2.4409, "loss_": 0.6513, "moe_loss": 0.1601, "moe_loss_longrong": 1.3969, "step": 3479 }, { "epoch": 0.67, "learning_rate": 5.166415381423306e-06, "loss": 2.4805, "loss_": 1.1712, "moe_loss": 0.1601, "moe_loss_longrong": 1.3971, "step": 3486 }, { "epoch": 0.67, "learning_rate": 5.128271560392037e-06, "loss": 2.4496, "loss_": 1.0721, "moe_loss": 0.1601, "moe_loss_longrong": 1.3971, "step": 3493 }, { "epoch": 0.67, "learning_rate": 5.09022045325299e-06, "loss": 2.473, "loss_": 1.1122, "moe_loss": 0.1601, "moe_loss_longrong": 1.3971, "step": 3500 }, { "epoch": 0.67, "learning_rate": 5.052262784157014e-06, "loss": 2.4654, "loss_": 1.0388, "moe_loss": 0.1601, "moe_loss_longrong": 1.397, "step": 3507 }, { "epoch": 0.68, "learning_rate": 5.014399275476721e-06, "loss": 2.463, "loss_": 1.0244, "moe_loss": 0.1601, "moe_loss_longrong": 1.3968, "step": 3514 }, { "epoch": 0.68, "learning_rate": 4.976630647792771e-06, "loss": 2.4481, "loss_": 0.7509, "moe_loss": 0.1601, "moe_loss_longrong": 1.3972, "step": 3521 }, { "epoch": 0.68, "learning_rate": 4.938957619880138e-06, "loss": 2.4624, "loss_": 1.0897, "moe_loss": 0.1601, "moe_loss_longrong": 1.397, "step": 3528 }, { "epoch": 0.68, "learning_rate": 4.901380908694434e-06, "loss": 2.4236, "loss_": 1.1599, "moe_loss": 0.1601, "moe_loss_longrong": 1.397, "step": 3535 }, { "epoch": 0.68, "learning_rate": 4.863901229358261e-06, "loss": 2.4483, "loss_": 0.8951, "moe_loss": 0.1601, "moe_loss_longrong": 1.3971, "step": 3542 }, { "epoch": 0.68, "learning_rate": 4.8265192951476206e-06, "loss": 2.4552, "loss_": 0.9006, "moe_loss": 0.1601, "moe_loss_longrong": 1.3969, "step": 3549 }, { "epoch": 0.68, "learning_rate": 4.789235817478322e-06, "loss": 2.457, "loss_": 1.0357, "moe_loss": 0.1601, "moe_loss_longrong": 1.3969, "step": 3556 }, { "epoch": 0.69, "learning_rate": 4.752051505892438e-06, "loss": 2.462, "loss_": 1.031, "moe_loss": 0.1601, "moe_loss_longrong": 1.3968, "step": 3563 }, { "epoch": 0.69, "learning_rate": 4.714967068044826e-06, "loss": 2.459, "loss_": 1.2418, "moe_loss": 0.1601, "moe_loss_longrong": 1.3969, "step": 3570 }, { "epoch": 0.69, "learning_rate": 4.677983209689631e-06, "loss": 2.4449, "loss_": 0.7941, "moe_loss": 0.1601, "moe_loss_longrong": 1.3973, "step": 3577 }, { "epoch": 0.69, "learning_rate": 4.641100634666877e-06, "loss": 2.4528, "loss_": 0.7962, "moe_loss": 0.1601, "moe_loss_longrong": 1.397, "step": 3584 }, { "epoch": 0.69, "learning_rate": 4.6043200448890724e-06, "loss": 2.4674, "loss_": 1.0349, "moe_loss": 0.1601, "moe_loss_longrong": 1.3967, "step": 3591 }, { "epoch": 0.69, "learning_rate": 4.567642140327823e-06, "loss": 2.4498, "loss_": 0.9343, "moe_loss": 0.1601, "moe_loss_longrong": 1.3966, "step": 3598 }, { "epoch": 0.69, "learning_rate": 4.531067619000553e-06, "loss": 2.4711, "loss_": 0.7285, "moe_loss": 0.1601, "moe_loss_longrong": 1.3967, "step": 3605 }, { "epoch": 0.69, "learning_rate": 4.494597176957186e-06, "loss": 2.4578, "loss_": 0.6286, "moe_loss": 0.1601, "moe_loss_longrong": 1.3968, "step": 3612 }, { "epoch": 0.7, "learning_rate": 4.458231508266912e-06, "loss": 2.4736, "loss_": 0.8458, "moe_loss": 0.1605, "moe_loss_longrong": 1.4062, "step": 3619 }, { "epoch": 0.7, "learning_rate": 4.421971305004989e-06, "loss": 2.4841, "loss_": 0.7491, "moe_loss": 0.1601, "moe_loss_longrong": 1.3965, "step": 3626 }, { "epoch": 0.7, "learning_rate": 4.385817257239556e-06, "loss": 2.4332, "loss_": 0.9237, "moe_loss": 0.1604, "moe_loss_longrong": 1.4068, "step": 3633 }, { "epoch": 0.7, "learning_rate": 4.349770053018502e-06, "loss": 2.4673, "loss_": 0.9196, "moe_loss": 0.1601, "moe_loss_longrong": 1.3968, "step": 3640 }, { "epoch": 0.7, "learning_rate": 4.313830378356384e-06, "loss": 2.4438, "loss_": 1.0917, "moe_loss": 0.1601, "moe_loss_longrong": 1.397, "step": 3647 }, { "epoch": 0.7, "learning_rate": 4.277998917221354e-06, "loss": 2.4672, "loss_": 0.8497, "moe_loss": 0.1604, "moe_loss_longrong": 1.4064, "step": 3654 }, { "epoch": 0.7, "learning_rate": 4.242276351522161e-06, "loss": 2.4468, "loss_": 0.8331, "moe_loss": 0.1601, "moe_loss_longrong": 1.3969, "step": 3661 }, { "epoch": 0.71, "learning_rate": 4.206663361095164e-06, "loss": 2.4639, "loss_": 0.9817, "moe_loss": 0.1601, "moe_loss_longrong": 1.3963, "step": 3668 }, { "epoch": 0.71, "learning_rate": 4.171160623691384e-06, "loss": 2.4403, "loss_": 1.0819, "moe_loss": 0.1601, "moe_loss_longrong": 1.3967, "step": 3675 }, { "epoch": 0.71, "learning_rate": 4.135768814963622e-06, "loss": 2.4281, "loss_": 0.9681, "moe_loss": 0.1601, "moe_loss_longrong": 1.3966, "step": 3682 }, { "epoch": 0.71, "learning_rate": 4.100488608453599e-06, "loss": 2.4383, "loss_": 0.8748, "moe_loss": 0.1601, "moe_loss_longrong": 1.3966, "step": 3689 }, { "epoch": 0.71, "learning_rate": 4.065320675579132e-06, "loss": 2.4811, "loss_": 1.2776, "moe_loss": 0.1601, "moe_loss_longrong": 1.3966, "step": 3696 }, { "epoch": 0.71, "learning_rate": 4.03026568562135e-06, "loss": 2.4559, "loss_": 0.8804, "moe_loss": 0.1601, "moe_loss_longrong": 1.3967, "step": 3703 }, { "epoch": 0.71, "learning_rate": 3.995324305711976e-06, "loss": 2.4263, "loss_": 0.9593, "moe_loss": 0.1601, "moe_loss_longrong": 1.3968, "step": 3710 }, { "epoch": 0.72, "learning_rate": 3.9604972008206085e-06, "loss": 2.4698, "loss_": 1.2848, "moe_loss": 0.1601, "moe_loss_longrong": 1.3967, "step": 3717 }, { "epoch": 0.72, "learning_rate": 3.9257850337420856e-06, "loss": 2.4923, "loss_": 1.0082, "moe_loss": 0.1601, "moe_loss_longrong": 1.3965, "step": 3724 }, { "epoch": 0.72, "learning_rate": 3.891188465083865e-06, "loss": 2.4502, "loss_": 1.0253, "moe_loss": 0.1604, "moe_loss_longrong": 1.4059, "step": 3731 }, { "epoch": 0.72, "learning_rate": 3.8567081532534374e-06, "loss": 2.4543, "loss_": 0.6744, "moe_loss": 0.1601, "moe_loss_longrong": 1.3962, "step": 3738 }, { "epoch": 0.72, "learning_rate": 3.822344754445826e-06, "loss": 2.4628, "loss_": 1.0211, "moe_loss": 0.1601, "moe_loss_longrong": 1.3968, "step": 3745 }, { "epoch": 0.72, "learning_rate": 3.788098922631067e-06, "loss": 2.4765, "loss_": 1.0228, "moe_loss": 0.1601, "moe_loss_longrong": 1.3968, "step": 3752 }, { "epoch": 0.72, "learning_rate": 3.753971309541784e-06, "loss": 2.4717, "loss_": 0.846, "moe_loss": 0.1601, "moe_loss_longrong": 1.3967, "step": 3759 }, { "epoch": 0.72, "learning_rate": 3.719962564660783e-06, "loss": 2.4447, "loss_": 0.4578, "moe_loss": 0.1601, "moe_loss_longrong": 1.3973, "step": 3766 }, { "epoch": 0.73, "learning_rate": 3.6860733352086866e-06, "loss": 2.4563, "loss_": 0.8938, "moe_loss": 0.1601, "moe_loss_longrong": 1.3965, "step": 3773 }, { "epoch": 0.73, "learning_rate": 3.652304266131612e-06, "loss": 2.4641, "loss_": 0.9597, "moe_loss": 0.1601, "moe_loss_longrong": 1.3966, "step": 3780 }, { "epoch": 0.73, "learning_rate": 3.618656000088916e-06, "loss": 2.4801, "loss_": 0.7477, "moe_loss": 0.1601, "moe_loss_longrong": 1.3968, "step": 3787 }, { "epoch": 0.73, "learning_rate": 3.585129177440938e-06, "loss": 2.4649, "loss_": 1.1009, "moe_loss": 0.1601, "moe_loss_longrong": 1.3965, "step": 3794 }, { "epoch": 0.73, "learning_rate": 3.5517244362368363e-06, "loss": 2.4828, "loss_": 1.1634, "moe_loss": 0.1601, "moe_loss_longrong": 1.3966, "step": 3801 }, { "epoch": 0.73, "learning_rate": 3.5184424122024406e-06, "loss": 2.4532, "loss_": 1.1849, "moe_loss": 0.1601, "moe_loss_longrong": 1.3965, "step": 3808 }, { "epoch": 0.73, "learning_rate": 3.485283738728139e-06, "loss": 2.4494, "loss_": 0.8625, "moe_loss": 0.1601, "moe_loss_longrong": 1.3965, "step": 3815 }, { "epoch": 0.74, "learning_rate": 3.452249046856836e-06, "loss": 2.4715, "loss_": 1.012, "moe_loss": 0.1604, "moe_loss_longrong": 1.4064, "step": 3822 }, { "epoch": 0.74, "learning_rate": 3.4193389652719478e-06, "loss": 2.4256, "loss_": 1.0154, "moe_loss": 0.1601, "moe_loss_longrong": 1.3961, "step": 3829 }, { "epoch": 0.74, "learning_rate": 3.3865541202854314e-06, "loss": 2.4636, "loss_": 1.1752, "moe_loss": 0.1601, "moe_loss_longrong": 1.3964, "step": 3836 }, { "epoch": 0.74, "learning_rate": 3.353895135825854e-06, "loss": 2.442, "loss_": 0.8945, "moe_loss": 0.1601, "moe_loss_longrong": 1.3964, "step": 3843 }, { "epoch": 0.74, "learning_rate": 3.321362633426547e-06, "loss": 2.4677, "loss_": 0.9853, "moe_loss": 0.1601, "moe_loss_longrong": 1.3963, "step": 3850 }, { "epoch": 0.74, "learning_rate": 3.2889572322137454e-06, "loss": 2.4633, "loss_": 1.2634, "moe_loss": 0.1601, "moe_loss_longrong": 1.3961, "step": 3857 }, { "epoch": 0.74, "learning_rate": 3.256679548894831e-06, "loss": 2.4637, "loss_": 0.8044, "moe_loss": 0.1601, "moe_loss_longrong": 1.3965, "step": 3864 }, { "epoch": 0.74, "learning_rate": 3.224530197746587e-06, "loss": 2.4343, "loss_": 0.8018, "moe_loss": 0.1601, "moe_loss_longrong": 1.3968, "step": 3871 }, { "epoch": 0.75, "learning_rate": 3.1925097906034962e-06, "loss": 2.4328, "loss_": 0.8425, "moe_loss": 0.1604, "moe_loss_longrong": 1.4055, "step": 3878 }, { "epoch": 0.75, "learning_rate": 3.1606189368461117e-06, "loss": 2.4648, "loss_": 0.9644, "moe_loss": 0.1601, "moe_loss_longrong": 1.3964, "step": 3885 }, { "epoch": 0.75, "learning_rate": 3.128858243389461e-06, "loss": 2.4541, "loss_": 0.6231, "moe_loss": 0.1601, "moe_loss_longrong": 1.3966, "step": 3892 }, { "epoch": 0.75, "learning_rate": 3.097228314671481e-06, "loss": 2.476, "loss_": 0.9949, "moe_loss": 0.1601, "moe_loss_longrong": 1.3961, "step": 3899 }, { "epoch": 0.75, "learning_rate": 3.065729752641532e-06, "loss": 2.4229, "loss_": 0.8875, "moe_loss": 0.1601, "moe_loss_longrong": 1.3962, "step": 3906 }, { "epoch": 0.75, "learning_rate": 3.034363156748933e-06, "loss": 2.4502, "loss_": 0.9087, "moe_loss": 0.1604, "moe_loss_longrong": 1.4054, "step": 3913 }, { "epoch": 0.75, "learning_rate": 3.0031291239315473e-06, "loss": 2.4367, "loss_": 0.8938, "moe_loss": 0.1601, "moe_loss_longrong": 1.3965, "step": 3920 }, { "epoch": 0.76, "learning_rate": 2.9720282486044407e-06, "loss": 2.471, "loss_": 0.742, "moe_loss": 0.1601, "moe_loss_longrong": 1.3963, "step": 3927 }, { "epoch": 0.76, "learning_rate": 2.941061122648545e-06, "loss": 2.4598, "loss_": 0.6142, "moe_loss": 0.1601, "moe_loss_longrong": 1.3966, "step": 3934 }, { "epoch": 0.76, "learning_rate": 2.910228335399419e-06, "loss": 2.4532, "loss_": 0.9248, "moe_loss": 0.1601, "moe_loss_longrong": 1.3961, "step": 3941 }, { "epoch": 0.76, "learning_rate": 2.8795304736360184e-06, "loss": 2.4694, "loss_": 0.876, "moe_loss": 0.1601, "moe_loss_longrong": 1.3961, "step": 3948 }, { "epoch": 0.76, "learning_rate": 2.8489681215695242e-06, "loss": 2.4464, "loss_": 1.0146, "moe_loss": 0.1601, "moe_loss_longrong": 1.3961, "step": 3955 }, { "epoch": 0.76, "learning_rate": 2.8185418608322344e-06, "loss": 2.4632, "loss_": 0.7415, "moe_loss": 0.1601, "moe_loss_longrong": 1.3964, "step": 3962 }, { "epoch": 0.76, "learning_rate": 2.788252270466497e-06, "loss": 2.4575, "loss_": 1.1931, "moe_loss": 0.1601, "moe_loss_longrong": 1.3963, "step": 3969 }, { "epoch": 0.76, "learning_rate": 2.7580999269136854e-06, "loss": 2.4825, "loss_": 0.9967, "moe_loss": 0.1601, "moe_loss_longrong": 1.3961, "step": 3976 }, { "epoch": 0.77, "learning_rate": 2.728085404003217e-06, "loss": 2.4658, "loss_": 0.9402, "moe_loss": 0.1601, "moe_loss_longrong": 1.3961, "step": 3983 }, { "epoch": 0.77, "learning_rate": 2.698209272941659e-06, "loss": 2.4466, "loss_": 1.1097, "moe_loss": 0.1601, "moe_loss_longrong": 1.3963, "step": 3990 }, { "epoch": 0.77, "learning_rate": 2.668472102301829e-06, "loss": 2.4544, "loss_": 1.018, "moe_loss": 0.1601, "moe_loss_longrong": 1.3962, "step": 3997 }, { "epoch": 0.77, "learning_rate": 2.6388744580119975e-06, "loss": 2.4195, "loss_": 0.9804, "moe_loss": 0.1601, "moe_loss_longrong": 1.3961, "step": 4004 }, { "epoch": 0.77, "learning_rate": 2.6094169033451066e-06, "loss": 2.4628, "loss_": 0.7708, "moe_loss": 0.1601, "moe_loss_longrong": 1.3963, "step": 4011 }, { "epoch": 0.77, "learning_rate": 2.580099998908049e-06, "loss": 2.4624, "loss_": 0.6729, "moe_loss": 0.1601, "moe_loss_longrong": 1.3962, "step": 4018 }, { "epoch": 0.77, "learning_rate": 2.5509243026309983e-06, "loss": 2.4753, "loss_": 1.177, "moe_loss": 0.1601, "moe_loss_longrong": 1.3961, "step": 4025 }, { "epoch": 0.78, "learning_rate": 2.5218903697568075e-06, "loss": 2.4669, "loss_": 1.1103, "moe_loss": 0.1601, "moe_loss_longrong": 1.396, "step": 4032 }, { "epoch": 0.78, "learning_rate": 2.4929987528304144e-06, "loss": 2.4671, "loss_": 1.3009, "moe_loss": 0.1601, "moe_loss_longrong": 1.3962, "step": 4039 }, { "epoch": 0.78, "learning_rate": 2.4642500016883532e-06, "loss": 2.4649, "loss_": 0.4641, "moe_loss": 0.1601, "moe_loss_longrong": 1.3964, "step": 4046 }, { "epoch": 0.78, "learning_rate": 2.4356446634482756e-06, "loss": 2.4255, "loss_": 0.7561, "moe_loss": 0.1601, "moe_loss_longrong": 1.3963, "step": 4053 }, { "epoch": 0.78, "learning_rate": 2.407183282498534e-06, "loss": 2.4512, "loss_": 1.0891, "moe_loss": 0.1601, "moe_loss_longrong": 1.396, "step": 4060 }, { "epoch": 0.78, "learning_rate": 2.3788664004878405e-06, "loss": 2.4548, "loss_": 0.8427, "moe_loss": 0.1601, "moe_loss_longrong": 1.3964, "step": 4067 }, { "epoch": 0.78, "learning_rate": 2.350694556314934e-06, "loss": 2.4775, "loss_": 1.1603, "moe_loss": 0.1601, "moe_loss_longrong": 1.396, "step": 4074 }, { "epoch": 0.79, "learning_rate": 2.32266828611835e-06, "loss": 2.4762, "loss_": 0.982, "moe_loss": 0.1601, "moe_loss_longrong": 1.396, "step": 4081 }, { "epoch": 0.79, "learning_rate": 2.2947881232662007e-06, "loss": 2.4574, "loss_": 0.6854, "moe_loss": 0.1601, "moe_loss_longrong": 1.3965, "step": 4088 }, { "epoch": 0.79, "learning_rate": 2.2670545983460245e-06, "loss": 2.4641, "loss_": 1.1094, "moe_loss": 0.1601, "moe_loss_longrong": 1.396, "step": 4095 }, { "epoch": 0.79, "learning_rate": 2.2394682391546928e-06, "loss": 2.4546, "loss_": 0.8832, "moe_loss": 0.1601, "moe_loss_longrong": 1.3962, "step": 4102 }, { "epoch": 0.79, "learning_rate": 2.2120295706883698e-06, "loss": 2.4228, "loss_": 0.534, "moe_loss": 0.1601, "moe_loss_longrong": 1.3966, "step": 4109 }, { "epoch": 0.79, "learning_rate": 2.184739115132517e-06, "loss": 2.4502, "loss_": 0.6129, "moe_loss": 0.1601, "moe_loss_longrong": 1.3959, "step": 4116 }, { "epoch": 0.79, "learning_rate": 2.157597391851949e-06, "loss": 2.4514, "loss_": 0.8927, "moe_loss": 0.1604, "moe_loss_longrong": 1.4045, "step": 4123 }, { "epoch": 0.79, "learning_rate": 2.130604917380962e-06, "loss": 2.4434, "loss_": 0.87, "moe_loss": 0.1601, "moe_loss_longrong": 1.3958, "step": 4130 }, { "epoch": 0.8, "learning_rate": 2.103762205413493e-06, "loss": 2.4475, "loss_": 1.1291, "moe_loss": 0.1601, "moe_loss_longrong": 1.3961, "step": 4137 }, { "epoch": 0.8, "learning_rate": 2.0770697667933436e-06, "loss": 2.4697, "loss_": 0.8274, "moe_loss": 0.1601, "moe_loss_longrong": 1.3961, "step": 4144 }, { "epoch": 0.8, "learning_rate": 2.0505281095044804e-06, "loss": 2.4725, "loss_": 1.0877, "moe_loss": 0.1601, "moe_loss_longrong": 1.396, "step": 4151 }, { "epoch": 0.8, "learning_rate": 2.024137738661329e-06, "loss": 2.4757, "loss_": 0.6894, "moe_loss": 0.1601, "moe_loss_longrong": 1.3962, "step": 4158 }, { "epoch": 0.8, "learning_rate": 1.997899156499191e-06, "loss": 2.4566, "loss_": 0.7625, "moe_loss": 0.1601, "moe_loss_longrong": 1.3962, "step": 4165 }, { "epoch": 0.8, "learning_rate": 1.9718128623646792e-06, "loss": 2.514, "loss_": 1.3132, "moe_loss": 0.1601, "moe_loss_longrong": 1.3958, "step": 4172 }, { "epoch": 0.8, "learning_rate": 1.9458793527062035e-06, "loss": 2.4659, "loss_": 0.6083, "moe_loss": 0.1601, "moe_loss_longrong": 1.3961, "step": 4179 }, { "epoch": 0.81, "learning_rate": 1.9200991210645394e-06, "loss": 2.441, "loss_": 0.8687, "moe_loss": 0.1601, "moe_loss_longrong": 1.3964, "step": 4186 }, { "epoch": 0.81, "learning_rate": 1.8944726580634287e-06, "loss": 2.4227, "loss_": 0.863, "moe_loss": 0.1604, "moe_loss_longrong": 1.4043, "step": 4193 }, { "epoch": 0.81, "learning_rate": 1.8690004514002314e-06, "loss": 2.4488, "loss_": 1.0513, "moe_loss": 0.1601, "moe_loss_longrong": 1.3962, "step": 4200 }, { "epoch": 0.81, "learning_rate": 1.8436829858366655e-06, "loss": 2.4269, "loss_": 0.9285, "moe_loss": 0.1601, "moe_loss_longrong": 1.3959, "step": 4207 }, { "epoch": 0.81, "learning_rate": 1.8185207431895613e-06, "loss": 2.4577, "loss_": 1.0791, "moe_loss": 0.1601, "moe_loss_longrong": 1.3958, "step": 4214 }, { "epoch": 0.81, "learning_rate": 1.7935142023217056e-06, "loss": 2.4565, "loss_": 1.0052, "moe_loss": 0.1601, "moe_loss_longrong": 1.3958, "step": 4221 }, { "epoch": 0.81, "learning_rate": 1.768663839132727e-06, "loss": 2.4314, "loss_": 0.7553, "moe_loss": 0.1601, "moe_loss_longrong": 1.3959, "step": 4228 }, { "epoch": 0.81, "learning_rate": 1.7439701265500274e-06, "loss": 2.432, "loss_": 1.137, "moe_loss": 0.1601, "moe_loss_longrong": 1.3958, "step": 4235 }, { "epoch": 0.82, "learning_rate": 1.7194335345197933e-06, "loss": 2.466, "loss_": 0.804, "moe_loss": 0.1601, "moe_loss_longrong": 1.3963, "step": 4242 }, { "epoch": 0.82, "learning_rate": 1.6950545299980526e-06, "loss": 2.4119, "loss_": 0.8362, "moe_loss": 0.1601, "moe_loss_longrong": 1.3961, "step": 4249 }, { "epoch": 0.82, "learning_rate": 1.6708335769417827e-06, "loss": 2.4555, "loss_": 0.8946, "moe_loss": 0.1601, "moe_loss_longrong": 1.3957, "step": 4256 }, { "epoch": 0.82, "learning_rate": 1.6467711363000794e-06, "loss": 2.434, "loss_": 1.1374, "moe_loss": 0.1601, "moe_loss_longrong": 1.3958, "step": 4263 }, { "epoch": 0.82, "learning_rate": 1.6228676660053932e-06, "loss": 2.4705, "loss_": 1.07, "moe_loss": 0.1601, "moe_loss_longrong": 1.3958, "step": 4270 }, { "epoch": 0.82, "learning_rate": 1.5991236209648052e-06, "loss": 2.4467, "loss_": 0.5343, "moe_loss": 0.1601, "moe_loss_longrong": 1.3962, "step": 4277 }, { "epoch": 0.82, "learning_rate": 1.575539453051369e-06, "loss": 2.4617, "loss_": 1.2505, "moe_loss": 0.1601, "moe_loss_longrong": 1.3958, "step": 4284 }, { "epoch": 0.83, "learning_rate": 1.5521156110955293e-06, "loss": 2.4389, "loss_": 1.0836, "moe_loss": 0.1601, "moe_loss_longrong": 1.3959, "step": 4291 }, { "epoch": 0.83, "learning_rate": 1.5288525408765564e-06, "loss": 2.4877, "loss_": 0.8473, "moe_loss": 0.1601, "moe_loss_longrong": 1.3959, "step": 4298 }, { "epoch": 0.83, "learning_rate": 1.5057506851140701e-06, "loss": 2.4786, "loss_": 1.0259, "moe_loss": 0.1601, "moe_loss_longrong": 1.3958, "step": 4305 }, { "epoch": 0.83, "learning_rate": 1.4828104834596268e-06, "loss": 2.4086, "loss_": 0.5643, "moe_loss": 0.1601, "moe_loss_longrong": 1.3965, "step": 4312 }, { "epoch": 0.83, "learning_rate": 1.4600323724883337e-06, "loss": 2.4481, "loss_": 1.0485, "moe_loss": 0.1601, "moe_loss_longrong": 1.3959, "step": 4319 }, { "epoch": 0.83, "learning_rate": 1.4374167856905542e-06, "loss": 2.4386, "loss_": 0.9296, "moe_loss": 0.1601, "moe_loss_longrong": 1.396, "step": 4326 }, { "epoch": 0.83, "learning_rate": 1.414964153463655e-06, "loss": 2.4538, "loss_": 0.7446, "moe_loss": 0.1601, "moe_loss_longrong": 1.3959, "step": 4333 }, { "epoch": 0.83, "learning_rate": 1.3926749031038055e-06, "loss": 2.4252, "loss_": 1.0624, "moe_loss": 0.1601, "moe_loss_longrong": 1.3963, "step": 4340 }, { "epoch": 0.84, "learning_rate": 1.370549458797863e-06, "loss": 2.4477, "loss_": 1.0075, "moe_loss": 0.1601, "moe_loss_longrong": 1.3957, "step": 4347 }, { "epoch": 0.84, "learning_rate": 1.3485882416152819e-06, "loss": 2.4224, "loss_": 0.9794, "moe_loss": 0.1601, "moe_loss_longrong": 1.3959, "step": 4354 }, { "epoch": 0.84, "learning_rate": 1.3267916695001172e-06, "loss": 2.4571, "loss_": 0.9473, "moe_loss": 0.1601, "moe_loss_longrong": 1.3954, "step": 4361 }, { "epoch": 0.84, "learning_rate": 1.3051601572630611e-06, "loss": 2.449, "loss_": 1.1259, "moe_loss": 0.1601, "moe_loss_longrong": 1.3957, "step": 4368 }, { "epoch": 0.84, "learning_rate": 1.283694116573546e-06, "loss": 2.4477, "loss_": 1.0313, "moe_loss": 0.1601, "moe_loss_longrong": 1.3959, "step": 4375 }, { "epoch": 0.84, "learning_rate": 1.2623939559519161e-06, "loss": 2.46, "loss_": 0.888, "moe_loss": 0.1601, "moe_loss_longrong": 1.3958, "step": 4382 }, { "epoch": 0.84, "learning_rate": 1.2412600807616526e-06, "loss": 2.4559, "loss_": 0.9206, "moe_loss": 0.1601, "moe_loss_longrong": 1.3957, "step": 4389 }, { "epoch": 0.85, "learning_rate": 1.2202928932016588e-06, "loss": 2.4259, "loss_": 0.8758, "moe_loss": 0.1601, "moe_loss_longrong": 1.3958, "step": 4396 }, { "epoch": 0.85, "learning_rate": 1.1994927922985999e-06, "loss": 2.4477, "loss_": 0.8513, "moe_loss": 0.1601, "moe_loss_longrong": 1.3961, "step": 4403 }, { "epoch": 0.85, "learning_rate": 1.178860173899321e-06, "loss": 2.4408, "loss_": 1.0152, "moe_loss": 0.1601, "moe_loss_longrong": 1.3955, "step": 4410 }, { "epoch": 0.85, "learning_rate": 1.1583954306633004e-06, "loss": 2.4442, "loss_": 1.1666, "moe_loss": 0.1601, "moe_loss_longrong": 1.396, "step": 4417 }, { "epoch": 0.85, "learning_rate": 1.138098952055181e-06, "loss": 2.4404, "loss_": 0.6781, "moe_loss": 0.1601, "moe_loss_longrong": 1.3959, "step": 4424 }, { "epoch": 0.85, "learning_rate": 1.1179711243373736e-06, "loss": 2.4439, "loss_": 0.8599, "moe_loss": 0.1601, "moe_loss_longrong": 1.3962, "step": 4431 }, { "epoch": 0.85, "learning_rate": 1.0980123305626812e-06, "loss": 2.4635, "loss_": 1.0466, "moe_loss": 0.1605, "moe_loss_longrong": 1.4055, "step": 4438 }, { "epoch": 0.86, "learning_rate": 1.0782229505670195e-06, "loss": 2.4436, "loss_": 1.1018, "moe_loss": 0.1601, "moe_loss_longrong": 1.3958, "step": 4445 }, { "epoch": 0.86, "learning_rate": 1.0586033609622004e-06, "loss": 2.4521, "loss_": 0.9168, "moe_loss": 0.1601, "moe_loss_longrong": 1.3954, "step": 4452 }, { "epoch": 0.86, "learning_rate": 1.039153935128744e-06, "loss": 2.4435, "loss_": 1.1978, "moe_loss": 0.1601, "moe_loss_longrong": 1.3955, "step": 4459 }, { "epoch": 0.86, "learning_rate": 1.0198750432087855e-06, "loss": 2.4683, "loss_": 1.2032, "moe_loss": 0.16, "moe_loss_longrong": 1.3955, "step": 4466 }, { "epoch": 0.86, "learning_rate": 1.0007670520990331e-06, "loss": 2.4688, "loss_": 1.0949, "moe_loss": 0.1601, "moe_loss_longrong": 1.3958, "step": 4473 }, { "epoch": 0.86, "learning_rate": 9.818303254437723e-07, "loss": 2.459, "loss_": 1.3033, "moe_loss": 0.1601, "moe_loss_longrong": 1.3958, "step": 4480 }, { "epoch": 0.86, "learning_rate": 9.630652236279626e-07, "loss": 2.4758, "loss_": 0.8537, "moe_loss": 0.1601, "moe_loss_longrong": 1.3958, "step": 4487 }, { "epoch": 0.86, "learning_rate": 9.444721037703597e-07, "loss": 2.4579, "loss_": 0.6901, "moe_loss": 0.1601, "moe_loss_longrong": 1.3959, "step": 4494 }, { "epoch": 0.87, "learning_rate": 9.260513197167398e-07, "loss": 2.4456, "loss_": 1.0315, "moe_loss": 0.1601, "moe_loss_longrong": 1.3957, "step": 4501 }, { "epoch": 0.87, "learning_rate": 9.078032220331523e-07, "loss": 2.4451, "loss_": 1.1487, "moe_loss": 0.16, "moe_loss_longrong": 1.3958, "step": 4508 }, { "epoch": 0.87, "learning_rate": 8.897281579992467e-07, "loss": 2.4204, "loss_": 1.1135, "moe_loss": 0.1601, "moe_loss_longrong": 1.3958, "step": 4515 }, { "epoch": 0.87, "learning_rate": 8.718264716016722e-07, "loss": 2.4218, "loss_": 0.8637, "moe_loss": 0.1601, "moe_loss_longrong": 1.3955, "step": 4522 }, { "epoch": 0.87, "learning_rate": 8.540985035275273e-07, "loss": 2.447, "loss_": 0.8818, "moe_loss": 0.16, "moe_loss_longrong": 1.3955, "step": 4529 }, { "epoch": 0.87, "learning_rate": 8.365445911578785e-07, "loss": 2.4654, "loss_": 0.9987, "moe_loss": 0.1601, "moe_loss_longrong": 1.3955, "step": 4536 }, { "epoch": 0.87, "learning_rate": 8.191650685613273e-07, "loss": 2.4603, "loss_": 1.1883, "moe_loss": 0.1601, "moe_loss_longrong": 1.3956, "step": 4543 }, { "epoch": 0.88, "learning_rate": 8.019602664876758e-07, "loss": 2.4475, "loss_": 1.0646, "moe_loss": 0.1601, "moe_loss_longrong": 1.3958, "step": 4550 }, { "epoch": 0.88, "learning_rate": 7.849305123616091e-07, "loss": 2.4486, "loss_": 0.8589, "moe_loss": 0.1601, "moe_loss_longrong": 1.3957, "step": 4557 }, { "epoch": 0.88, "learning_rate": 7.680761302764727e-07, "loss": 2.4336, "loss_": 1.0525, "moe_loss": 0.1601, "moe_loss_longrong": 1.3957, "step": 4564 }, { "epoch": 0.88, "learning_rate": 7.513974409881186e-07, "loss": 2.4663, "loss_": 1.0924, "moe_loss": 0.16, "moe_loss_longrong": 1.3957, "step": 4571 }, { "epoch": 0.88, "learning_rate": 7.348947619087754e-07, "loss": 2.4417, "loss_": 1.0197, "moe_loss": 0.1601, "moe_loss_longrong": 1.3958, "step": 4578 }, { "epoch": 0.88, "learning_rate": 7.185684071010224e-07, "loss": 2.4364, "loss_": 0.9028, "moe_loss": 0.1601, "moe_loss_longrong": 1.3956, "step": 4585 }, { "epoch": 0.88, "learning_rate": 7.024186872718164e-07, "loss": 2.4733, "loss_": 0.5258, "moe_loss": 0.1601, "moe_loss_longrong": 1.3961, "step": 4592 }, { "epoch": 0.88, "learning_rate": 6.864459097665654e-07, "loss": 2.4453, "loss_": 0.9338, "moe_loss": 0.1601, "moe_loss_longrong": 1.3957, "step": 4599 }, { "epoch": 0.89, "learning_rate": 6.706503785632934e-07, "loss": 2.432, "loss_": 1.1129, "moe_loss": 0.16, "moe_loss_longrong": 1.3956, "step": 4606 }, { "epoch": 0.89, "learning_rate": 6.550323942668469e-07, "loss": 2.4297, "loss_": 0.6761, "moe_loss": 0.1601, "moe_loss_longrong": 1.3963, "step": 4613 }, { "epoch": 0.89, "learning_rate": 6.395922541031741e-07, "loss": 2.4152, "loss_": 0.8792, "moe_loss": 0.1601, "moe_loss_longrong": 1.3956, "step": 4620 }, { "epoch": 0.89, "learning_rate": 6.24330251913674e-07, "loss": 2.4526, "loss_": 1.1836, "moe_loss": 0.16, "moe_loss_longrong": 1.396, "step": 4627 }, { "epoch": 0.89, "learning_rate": 6.092466781495976e-07, "loss": 2.4362, "loss_": 0.9499, "moe_loss": 0.1601, "moe_loss_longrong": 1.3958, "step": 4634 }, { "epoch": 0.89, "learning_rate": 5.943418198665251e-07, "loss": 2.4439, "loss_": 1.1622, "moe_loss": 0.1601, "moe_loss_longrong": 1.3958, "step": 4641 }, { "epoch": 0.89, "learning_rate": 5.796159607189001e-07, "loss": 2.4273, "loss_": 0.9876, "moe_loss": 0.1601, "moe_loss_longrong": 1.3956, "step": 4648 }, { "epoch": 0.9, "learning_rate": 5.650693809546348e-07, "loss": 2.4735, "loss_": 1.1284, "moe_loss": 0.16, "moe_loss_longrong": 1.3956, "step": 4655 }, { "epoch": 0.9, "learning_rate": 5.507023574097725e-07, "loss": 2.4393, "loss_": 0.8675, "moe_loss": 0.1601, "moe_loss_longrong": 1.3958, "step": 4662 }, { "epoch": 0.9, "learning_rate": 5.365151635032218e-07, "loss": 2.4482, "loss_": 0.901, "moe_loss": 0.1601, "moe_loss_longrong": 1.3955, "step": 4669 }, { "epoch": 0.9, "learning_rate": 5.225080692315532e-07, "loss": 2.441, "loss_": 1.0355, "moe_loss": 0.1601, "moe_loss_longrong": 1.3956, "step": 4676 }, { "epoch": 0.9, "learning_rate": 5.086813411638581e-07, "loss": 2.4277, "loss_": 0.9478, "moe_loss": 0.16, "moe_loss_longrong": 1.3955, "step": 4683 }, { "epoch": 0.9, "learning_rate": 4.9503524243668e-07, "loss": 2.444, "loss_": 0.8901, "moe_loss": 0.1604, "moe_loss_longrong": 1.4048, "step": 4690 }, { "epoch": 0.9, "learning_rate": 4.815700327490014e-07, "loss": 2.4286, "loss_": 0.8906, "moe_loss": 0.1601, "moe_loss_longrong": 1.3957, "step": 4697 }, { "epoch": 0.91, "learning_rate": 4.6828596835730487e-07, "loss": 2.4475, "loss_": 1.028, "moe_loss": 0.1601, "moe_loss_longrong": 1.3957, "step": 4704 }, { "epoch": 0.91, "learning_rate": 4.551833020707008e-07, "loss": 2.4281, "loss_": 0.6545, "moe_loss": 0.1601, "moe_loss_longrong": 1.3958, "step": 4711 }, { "epoch": 0.91, "learning_rate": 4.4226228324610544e-07, "loss": 2.4677, "loss_": 0.9228, "moe_loss": 0.16, "moe_loss_longrong": 1.3954, "step": 4718 }, { "epoch": 0.91, "learning_rate": 4.295231577835024e-07, "loss": 2.443, "loss_": 0.8677, "moe_loss": 0.1601, "moe_loss_longrong": 1.3957, "step": 4725 }, { "epoch": 0.91, "learning_rate": 4.1696616812126333e-07, "loss": 2.4452, "loss_": 0.8619, "moe_loss": 0.1601, "moe_loss_longrong": 1.3958, "step": 4732 }, { "epoch": 0.91, "learning_rate": 4.0459155323153034e-07, "loss": 2.4501, "loss_": 0.5721, "moe_loss": 0.1601, "moe_loss_longrong": 1.3959, "step": 4739 }, { "epoch": 0.91, "learning_rate": 3.9239954861567177e-07, "loss": 2.4452, "loss_": 1.2849, "moe_loss": 0.1601, "moe_loss_longrong": 1.3957, "step": 4746 }, { "epoch": 0.91, "learning_rate": 3.803903862998004e-07, "loss": 2.4681, "loss_": 1.0272, "moe_loss": 0.1605, "moe_loss_longrong": 1.4052, "step": 4753 }, { "epoch": 0.92, "learning_rate": 3.685642948303503e-07, "loss": 2.4437, "loss_": 0.929, "moe_loss": 0.1601, "moe_loss_longrong": 1.3957, "step": 4760 }, { "epoch": 0.92, "learning_rate": 3.5692149926974006e-07, "loss": 2.4502, "loss_": 1.1455, "moe_loss": 0.1601, "moe_loss_longrong": 1.3958, "step": 4767 }, { "epoch": 0.92, "learning_rate": 3.454622211920766e-07, "loss": 2.4262, "loss_": 0.6494, "moe_loss": 0.1601, "moe_loss_longrong": 1.3955, "step": 4774 }, { "epoch": 0.92, "learning_rate": 3.341866786789505e-07, "loss": 2.4259, "loss_": 1.113, "moe_loss": 0.1601, "moe_loss_longrong": 1.3959, "step": 4781 }, { "epoch": 0.92, "learning_rate": 3.2309508631527486e-07, "loss": 2.4309, "loss_": 0.8977, "moe_loss": 0.1601, "moe_loss_longrong": 1.3957, "step": 4788 }, { "epoch": 0.92, "learning_rate": 3.121876551852099e-07, "loss": 2.4311, "loss_": 1.0739, "moe_loss": 0.1601, "moe_loss_longrong": 1.396, "step": 4795 }, { "epoch": 0.92, "learning_rate": 3.0146459286813924e-07, "loss": 2.4515, "loss_": 0.9781, "moe_loss": 0.1601, "moe_loss_longrong": 1.3959, "step": 4802 }, { "epoch": 0.93, "learning_rate": 2.909261034347255e-07, "loss": 2.4553, "loss_": 0.9123, "moe_loss": 0.1601, "moe_loss_longrong": 1.3959, "step": 4809 }, { "epoch": 0.93, "learning_rate": 2.8057238744301994e-07, "loss": 2.4516, "loss_": 1.131, "moe_loss": 0.1601, "moe_loss_longrong": 1.396, "step": 4816 }, { "epoch": 0.93, "learning_rate": 2.704036419346534e-07, "loss": 2.4628, "loss_": 1.0138, "moe_loss": 0.1605, "moe_loss_longrong": 1.4051, "step": 4823 }, { "epoch": 0.93, "learning_rate": 2.604200604310825e-07, "loss": 2.4657, "loss_": 1.0133, "moe_loss": 0.1601, "moe_loss_longrong": 1.3955, "step": 4830 }, { "epoch": 0.93, "learning_rate": 2.506218329299026e-07, "loss": 2.4311, "loss_": 1.1114, "moe_loss": 0.1601, "moe_loss_longrong": 1.396, "step": 4837 }, { "epoch": 0.93, "learning_rate": 2.410091459012376e-07, "loss": 2.4529, "loss_": 1.0407, "moe_loss": 0.1601, "moe_loss_longrong": 1.396, "step": 4844 }, { "epoch": 0.93, "learning_rate": 2.3158218228419127e-07, "loss": 2.4564, "loss_": 1.061, "moe_loss": 0.1601, "moe_loss_longrong": 1.3955, "step": 4851 }, { "epoch": 0.93, "learning_rate": 2.2234112148336373e-07, "loss": 2.4584, "loss_": 0.735, "moe_loss": 0.1601, "moe_loss_longrong": 1.3956, "step": 4858 }, { "epoch": 0.94, "learning_rate": 2.1328613936543396e-07, "loss": 2.425, "loss_": 0.9476, "moe_loss": 0.1601, "moe_loss_longrong": 1.3962, "step": 4865 }, { "epoch": 0.94, "learning_rate": 2.0441740825582258e-07, "loss": 2.4643, "loss_": 1.0806, "moe_loss": 0.1601, "moe_loss_longrong": 1.3956, "step": 4872 }, { "epoch": 0.94, "learning_rate": 1.9573509693540104e-07, "loss": 2.4676, "loss_": 0.825, "moe_loss": 0.16, "moe_loss_longrong": 1.3954, "step": 4879 }, { "epoch": 0.94, "learning_rate": 1.872393706372866e-07, "loss": 2.4485, "loss_": 0.8625, "moe_loss": 0.1605, "moe_loss_longrong": 1.4046, "step": 4886 }, { "epoch": 0.94, "learning_rate": 1.789303910436968e-07, "loss": 2.4413, "loss_": 0.7278, "moe_loss": 0.1601, "moe_loss_longrong": 1.3958, "step": 4893 }, { "epoch": 0.94, "learning_rate": 1.7080831628286886e-07, "loss": 2.4452, "loss_": 0.812, "moe_loss": 0.1601, "moe_loss_longrong": 1.3961, "step": 4900 }, { "epoch": 0.94, "learning_rate": 1.6287330092605525e-07, "loss": 2.4596, "loss_": 0.6445, "moe_loss": 0.1601, "moe_loss_longrong": 1.3958, "step": 4907 }, { "epoch": 0.95, "learning_rate": 1.551254959845805e-07, "loss": 2.4541, "loss_": 1.0014, "moe_loss": 0.1601, "moe_loss_longrong": 1.3957, "step": 4914 }, { "epoch": 0.95, "learning_rate": 1.4756504890696466e-07, "loss": 2.4342, "loss_": 1.1963, "moe_loss": 0.16, "moe_loss_longrong": 1.3957, "step": 4921 }, { "epoch": 0.95, "learning_rate": 1.401921035761189e-07, "loss": 2.4346, "loss_": 0.8071, "moe_loss": 0.1604, "moe_loss_longrong": 1.4051, "step": 4928 }, { "epoch": 0.95, "learning_rate": 1.3300680030661096e-07, "loss": 2.464, "loss_": 1.1756, "moe_loss": 0.16, "moe_loss_longrong": 1.3957, "step": 4935 }, { "epoch": 0.95, "learning_rate": 1.2600927584198618e-07, "loss": 2.4334, "loss_": 1.0679, "moe_loss": 0.1601, "moe_loss_longrong": 1.3954, "step": 4942 }, { "epoch": 0.95, "learning_rate": 1.1919966335217636e-07, "loss": 2.4779, "loss_": 0.8872, "moe_loss": 0.16, "moe_loss_longrong": 1.3956, "step": 4949 }, { "epoch": 0.95, "learning_rate": 1.1257809243095385e-07, "loss": 2.4339, "loss_": 0.8956, "moe_loss": 0.1601, "moe_loss_longrong": 1.3956, "step": 4956 }, { "epoch": 0.95, "learning_rate": 1.0614468909347476e-07, "loss": 2.4414, "loss_": 1.1397, "moe_loss": 0.1601, "moe_loss_longrong": 1.3956, "step": 4963 }, { "epoch": 0.96, "learning_rate": 9.989957577387521e-08, "loss": 2.4253, "loss_": 0.8755, "moe_loss": 0.1601, "moe_loss_longrong": 1.3955, "step": 4970 }, { "epoch": 0.96, "learning_rate": 9.384287132294223e-08, "loss": 2.4599, "loss_": 0.9577, "moe_loss": 0.16, "moe_loss_longrong": 1.3954, "step": 4977 }, { "epoch": 0.96, "learning_rate": 8.797469100585432e-08, "loss": 2.4615, "loss_": 0.7768, "moe_loss": 0.1601, "moe_loss_longrong": 1.3957, "step": 4984 }, { "epoch": 0.96, "learning_rate": 8.229514649998438e-08, "loss": 2.4414, "loss_": 1.0058, "moe_loss": 0.16, "moe_loss_longrong": 1.3956, "step": 4991 }, { "epoch": 0.96, "learning_rate": 7.680434589277696e-08, "loss": 2.4587, "loss_": 1.0013, "moe_loss": 0.1601, "moe_loss_longrong": 1.3957, "step": 4998 }, { "epoch": 0.96, "learning_rate": 7.150239367969102e-08, "loss": 2.4314, "loss_": 0.9539, "moe_loss": 0.1601, "moe_loss_longrong": 1.3959, "step": 5005 }, { "epoch": 0.96, "learning_rate": 6.63893907622104e-08, "loss": 2.4711, "loss_": 0.9778, "moe_loss": 0.1601, "moe_loss_longrong": 1.3954, "step": 5012 }, { "epoch": 0.97, "learning_rate": 6.14654344459209e-08, "loss": 2.4558, "loss_": 0.9942, "moe_loss": 0.1601, "moe_loss_longrong": 1.3958, "step": 5019 }, { "epoch": 0.97, "learning_rate": 5.673061843866623e-08, "loss": 2.4748, "loss_": 0.7836, "moe_loss": 0.1601, "moe_loss_longrong": 1.3959, "step": 5026 }, { "epoch": 0.97, "learning_rate": 5.218503284875609e-08, "loss": 2.4418, "loss_": 1.0962, "moe_loss": 0.1601, "moe_loss_longrong": 1.3955, "step": 5033 }, { "epoch": 0.97, "learning_rate": 4.7828764183257545e-08, "loss": 2.4128, "loss_": 0.9561, "moe_loss": 0.1601, "moe_loss_longrong": 1.3958, "step": 5040 }, { "epoch": 0.97, "learning_rate": 4.366189534634191e-08, "loss": 2.4604, "loss_": 0.9591, "moe_loss": 0.16, "moe_loss_longrong": 1.3955, "step": 5047 }, { "epoch": 0.97, "learning_rate": 3.9684505637718194e-08, "loss": 2.4619, "loss_": 1.1709, "moe_loss": 0.1601, "moe_loss_longrong": 1.3959, "step": 5054 }, { "epoch": 0.97, "learning_rate": 3.589667075110992e-08, "loss": 2.4199, "loss_": 1.0163, "moe_loss": 0.1601, "moe_loss_longrong": 1.3959, "step": 5061 }, { "epoch": 0.98, "learning_rate": 3.229846277282511e-08, "loss": 2.4621, "loss_": 0.9223, "moe_loss": 0.1601, "moe_loss_longrong": 1.3959, "step": 5068 }, { "epoch": 0.98, "learning_rate": 2.8889950180382985e-08, "loss": 2.4625, "loss_": 0.4562, "moe_loss": 0.1601, "moe_loss_longrong": 1.3959, "step": 5075 }, { "epoch": 0.98, "learning_rate": 2.5671197841203867e-08, "loss": 2.4386, "loss_": 1.0035, "moe_loss": 0.1601, "moe_loss_longrong": 1.3958, "step": 5082 }, { "epoch": 0.98, "learning_rate": 2.264226701138461e-08, "loss": 2.4681, "loss_": 1.218, "moe_loss": 0.16, "moe_loss_longrong": 1.3955, "step": 5089 }, { "epoch": 0.98, "learning_rate": 1.9803215334522895e-08, "loss": 2.427, "loss_": 0.7696, "moe_loss": 0.1601, "moe_loss_longrong": 1.3961, "step": 5096 }, { "epoch": 0.98, "learning_rate": 1.7154096840629186e-08, "loss": 2.4851, "loss_": 1.054, "moe_loss": 0.1601, "moe_loss_longrong": 1.3956, "step": 5103 }, { "epoch": 0.98, "learning_rate": 1.4694961945093122e-08, "loss": 2.4448, "loss_": 0.8593, "moe_loss": 0.1601, "moe_loss_longrong": 1.3959, "step": 5110 }, { "epoch": 0.98, "learning_rate": 1.2425857447725397e-08, "loss": 2.454, "loss_": 0.8875, "moe_loss": 0.1601, "moe_loss_longrong": 1.3956, "step": 5117 }, { "epoch": 0.99, "learning_rate": 1.0346826531865139e-08, "loss": 2.4289, "loss_": 0.9578, "moe_loss": 0.16, "moe_loss_longrong": 1.3957, "step": 5124 }, { "epoch": 0.99, "learning_rate": 8.457908763562783e-09, "loss": 2.4436, "loss_": 1.026, "moe_loss": 0.1601, "moe_loss_longrong": 1.3956, "step": 5131 }, { "epoch": 0.99, "learning_rate": 6.759140090824012e-09, "loss": 2.4605, "loss_": 0.6569, "moe_loss": 0.1601, "moe_loss_longrong": 1.3954, "step": 5138 }, { "epoch": 0.99, "learning_rate": 5.25055284292475e-09, "loss": 2.4668, "loss_": 1.2459, "moe_loss": 0.16, "moe_loss_longrong": 1.3959, "step": 5145 }, { "epoch": 0.99, "learning_rate": 3.932175729797205e-09, "loss": 2.4129, "loss_": 0.9565, "moe_loss": 0.1601, "moe_loss_longrong": 1.3959, "step": 5152 }, { "epoch": 0.99, "learning_rate": 2.8040338414847545e-09, "loss": 2.4469, "loss_": 0.8875, "moe_loss": 0.1601, "moe_loss_longrong": 1.3957, "step": 5159 }, { "epoch": 0.99, "learning_rate": 1.8661486476612144e-09, "loss": 2.4186, "loss_": 0.7829, "moe_loss": 0.1601, "moe_loss_longrong": 1.396, "step": 5166 }, { "epoch": 1.0, "learning_rate": 1.1185379972256105e-09, "loss": 2.4351, "loss_": 0.7496, "moe_loss": 0.1601, "moe_loss_longrong": 1.3956, "step": 5173 }, { "epoch": 1.0, "learning_rate": 5.612161179613385e-10, "loss": 2.4242, "loss_": 0.9366, "moe_loss": 0.1601, "moe_loss_longrong": 1.3956, "step": 5180 }, { "epoch": 1.0, "learning_rate": 1.9419361626416e-10, "loss": 2.4684, "loss_": 1.0884, "moe_loss": 0.16, "moe_loss_longrong": 1.3956, "step": 5187 }, { "epoch": 1.0, "learning_rate": 1.7477476940142013e-11, "loss": 2.4801, "loss_": 1.0033, "moe_loss": 0.1601, "moe_loss_longrong": 1.3959, "step": 5194 }, { "epoch": 1.0, "step": 5197, "total_flos": 8.818772994970092e+18, "train_loss": 2.4979199135512236, "train_runtime": 95362.7285, "train_samples_per_second": 6.976, "train_steps_per_second": 0.054 } ], "logging_steps": 7, "max_steps": 5197, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 1000, "total_flos": 8.818772994970092e+18, "train_batch_size": 2, "trial_name": null, "trial_params": null }