STGC-Phi2-384 / trainer_state.json
7LRY's picture
Upload folder using huggingface_hub
f8db096 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9998557067962099,
"eval_steps": 500,
"global_step": 5197,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 8.974358974358975e-07,
"loss": 2.9732,
"loss_": 1.436,
"moe_loss": 0.1675,
"moe_loss_longrong": 1.4982,
"step": 7
},
{
"epoch": 0.0,
"learning_rate": 1.794871794871795e-06,
"loss": 2.9414,
"loss_": 1.3375,
"moe_loss": 0.1674,
"moe_loss_longrong": 1.4964,
"step": 14
},
{
"epoch": 0.0,
"learning_rate": 2.6923076923076923e-06,
"loss": 2.8682,
"loss_": 1.1438,
"moe_loss": 0.1663,
"moe_loss_longrong": 1.4917,
"step": 21
},
{
"epoch": 0.01,
"learning_rate": 3.58974358974359e-06,
"loss": 2.806,
"loss_": 1.2312,
"moe_loss": 0.1655,
"moe_loss_longrong": 1.488,
"step": 28
},
{
"epoch": 0.01,
"learning_rate": 4.487179487179488e-06,
"loss": 2.7734,
"loss_": 1.2285,
"moe_loss": 0.1645,
"moe_loss_longrong": 1.4882,
"step": 35
},
{
"epoch": 0.01,
"learning_rate": 5.384615384615385e-06,
"loss": 2.7314,
"loss_": 1.0526,
"moe_loss": 0.1635,
"moe_loss_longrong": 1.4847,
"step": 42
},
{
"epoch": 0.01,
"learning_rate": 6.282051282051282e-06,
"loss": 2.6961,
"loss_": 0.9249,
"moe_loss": 0.1627,
"moe_loss_longrong": 1.4803,
"step": 49
},
{
"epoch": 0.01,
"learning_rate": 7.17948717948718e-06,
"loss": 2.6779,
"loss_": 0.9535,
"moe_loss": 0.1665,
"moe_loss_longrong": 1.5082,
"step": 56
},
{
"epoch": 0.01,
"learning_rate": 8.076923076923077e-06,
"loss": 2.6983,
"loss_": 1.1541,
"moe_loss": 0.1617,
"moe_loss_longrong": 1.4777,
"step": 63
},
{
"epoch": 0.01,
"learning_rate": 8.974358974358976e-06,
"loss": 2.6718,
"loss_": 1.0194,
"moe_loss": 0.1613,
"moe_loss_longrong": 1.4718,
"step": 70
},
{
"epoch": 0.01,
"learning_rate": 9.871794871794872e-06,
"loss": 2.6443,
"loss_": 0.7302,
"moe_loss": 0.1615,
"moe_loss_longrong": 1.4723,
"step": 77
},
{
"epoch": 0.02,
"learning_rate": 1.076923076923077e-05,
"loss": 2.7002,
"loss_": 1.1497,
"moe_loss": 0.1615,
"moe_loss_longrong": 1.4679,
"step": 84
},
{
"epoch": 0.02,
"learning_rate": 1.1666666666666668e-05,
"loss": 2.6528,
"loss_": 1.114,
"moe_loss": 0.1613,
"moe_loss_longrong": 1.4653,
"step": 91
},
{
"epoch": 0.02,
"learning_rate": 1.2564102564102565e-05,
"loss": 2.667,
"loss_": 1.0337,
"moe_loss": 0.1614,
"moe_loss_longrong": 1.466,
"step": 98
},
{
"epoch": 0.02,
"learning_rate": 1.3461538461538463e-05,
"loss": 2.61,
"loss_": 0.8435,
"moe_loss": 0.1643,
"moe_loss_longrong": 1.4985,
"step": 105
},
{
"epoch": 0.02,
"learning_rate": 1.435897435897436e-05,
"loss": 2.6878,
"loss_": 1.2116,
"moe_loss": 0.1611,
"moe_loss_longrong": 1.4619,
"step": 112
},
{
"epoch": 0.02,
"learning_rate": 1.5256410256410257e-05,
"loss": 2.6614,
"loss_": 1.2295,
"moe_loss": 0.1612,
"moe_loss_longrong": 1.4628,
"step": 119
},
{
"epoch": 0.02,
"learning_rate": 1.6153846153846154e-05,
"loss": 2.6541,
"loss_": 1.087,
"moe_loss": 0.1612,
"moe_loss_longrong": 1.4593,
"step": 126
},
{
"epoch": 0.03,
"learning_rate": 1.7051282051282053e-05,
"loss": 2.6268,
"loss_": 1.0603,
"moe_loss": 0.1611,
"moe_loss_longrong": 1.4563,
"step": 133
},
{
"epoch": 0.03,
"learning_rate": 1.794871794871795e-05,
"loss": 2.6591,
"loss_": 1.1362,
"moe_loss": 0.1611,
"moe_loss_longrong": 1.4566,
"step": 140
},
{
"epoch": 0.03,
"learning_rate": 1.8846153846153846e-05,
"loss": 2.6675,
"loss_": 0.9348,
"moe_loss": 0.1611,
"moe_loss_longrong": 1.4551,
"step": 147
},
{
"epoch": 0.03,
"learning_rate": 1.9743589743589745e-05,
"loss": 2.6317,
"loss_": 1.1054,
"moe_loss": 0.1611,
"moe_loss_longrong": 1.4542,
"step": 154
},
{
"epoch": 0.03,
"learning_rate": 1.999995145147809e-05,
"loss": 2.658,
"loss_": 0.9526,
"moe_loss": 0.1611,
"moe_loss_longrong": 1.4514,
"step": 161
},
{
"epoch": 0.03,
"learning_rate": 1.9999720361590812e-05,
"loss": 2.6381,
"loss_": 0.9977,
"moe_loss": 0.1615,
"moe_loss_longrong": 1.4506,
"step": 168
},
{
"epoch": 0.03,
"learning_rate": 1.9999298966967264e-05,
"loss": 2.6193,
"loss_": 0.8738,
"moe_loss": 0.1612,
"moe_loss_longrong": 1.4527,
"step": 175
},
{
"epoch": 0.04,
"learning_rate": 1.9998687275627008e-05,
"loss": 2.617,
"loss_": 1.0383,
"moe_loss": 0.1612,
"moe_loss_longrong": 1.4503,
"step": 182
},
{
"epoch": 0.04,
"learning_rate": 1.999788529921114e-05,
"loss": 2.6334,
"loss_": 0.9878,
"moe_loss": 0.1611,
"moe_loss_longrong": 1.4484,
"step": 189
},
{
"epoch": 0.04,
"learning_rate": 1.9996893052982083e-05,
"loss": 2.6288,
"loss_": 0.9578,
"moe_loss": 0.161,
"moe_loss_longrong": 1.4476,
"step": 196
},
{
"epoch": 0.04,
"learning_rate": 1.9995710555823277e-05,
"loss": 2.6573,
"loss_": 1.0551,
"moe_loss": 0.1608,
"moe_loss_longrong": 1.4478,
"step": 203
},
{
"epoch": 0.04,
"learning_rate": 1.9994337830238836e-05,
"loss": 2.6195,
"loss_": 1.2421,
"moe_loss": 0.1609,
"moe_loss_longrong": 1.4449,
"step": 210
},
{
"epoch": 0.04,
"learning_rate": 1.9992774902353104e-05,
"loss": 2.5979,
"loss_": 1.0235,
"moe_loss": 0.1627,
"moe_loss_longrong": 1.4802,
"step": 217
},
{
"epoch": 0.04,
"learning_rate": 1.9991021801910177e-05,
"loss": 2.6143,
"loss_": 1.1486,
"moe_loss": 0.1608,
"moe_loss_longrong": 1.4443,
"step": 224
},
{
"epoch": 0.04,
"learning_rate": 1.9989078562273313e-05,
"loss": 2.6047,
"loss_": 0.9541,
"moe_loss": 0.161,
"moe_loss_longrong": 1.4455,
"step": 231
},
{
"epoch": 0.05,
"learning_rate": 1.9986945220424326e-05,
"loss": 2.6336,
"loss_": 1.1406,
"moe_loss": 0.1607,
"moe_loss_longrong": 1.4428,
"step": 238
},
{
"epoch": 0.05,
"learning_rate": 1.9984621816962843e-05,
"loss": 2.6217,
"loss_": 1.0207,
"moe_loss": 0.1608,
"moe_loss_longrong": 1.4447,
"step": 245
},
{
"epoch": 0.05,
"learning_rate": 1.9982108396105584e-05,
"loss": 2.6014,
"loss_": 1.3744,
"moe_loss": 0.1608,
"moe_loss_longrong": 1.4426,
"step": 252
},
{
"epoch": 0.05,
"learning_rate": 1.9979405005685466e-05,
"loss": 2.6134,
"loss_": 0.9548,
"moe_loss": 0.1609,
"moe_loss_longrong": 1.4415,
"step": 259
},
{
"epoch": 0.05,
"learning_rate": 1.997651169715073e-05,
"loss": 2.6022,
"loss_": 1.058,
"moe_loss": 0.1607,
"moe_loss_longrong": 1.4413,
"step": 266
},
{
"epoch": 0.05,
"learning_rate": 1.9973428525563948e-05,
"loss": 2.6219,
"loss_": 1.1897,
"moe_loss": 0.1607,
"moe_loss_longrong": 1.4404,
"step": 273
},
{
"epoch": 0.05,
"learning_rate": 1.9970155549600978e-05,
"loss": 2.6232,
"loss_": 1.25,
"moe_loss": 0.1607,
"moe_loss_longrong": 1.4401,
"step": 280
},
{
"epoch": 0.06,
"learning_rate": 1.996669283154984e-05,
"loss": 2.5805,
"loss_": 1.138,
"moe_loss": 0.1607,
"moe_loss_longrong": 1.4387,
"step": 287
},
{
"epoch": 0.06,
"learning_rate": 1.996304043730955e-05,
"loss": 2.6188,
"loss_": 1.2563,
"moe_loss": 0.1607,
"moe_loss_longrong": 1.4386,
"step": 294
},
{
"epoch": 0.06,
"learning_rate": 1.995919843638883e-05,
"loss": 2.5867,
"loss_": 0.9975,
"moe_loss": 0.1607,
"moe_loss_longrong": 1.4387,
"step": 301
},
{
"epoch": 0.06,
"learning_rate": 1.9955166901904838e-05,
"loss": 2.5987,
"loss_": 1.157,
"moe_loss": 0.1607,
"moe_loss_longrong": 1.4366,
"step": 308
},
{
"epoch": 0.06,
"learning_rate": 1.9950945910581718e-05,
"loss": 2.5875,
"loss_": 1.0582,
"moe_loss": 0.1606,
"moe_loss_longrong": 1.4378,
"step": 315
},
{
"epoch": 0.06,
"learning_rate": 1.9946535542749187e-05,
"loss": 2.5848,
"loss_": 0.9934,
"moe_loss": 0.1624,
"moe_loss_longrong": 1.4703,
"step": 322
},
{
"epoch": 0.06,
"learning_rate": 1.9941935882340976e-05,
"loss": 2.6086,
"loss_": 0.8756,
"moe_loss": 0.1607,
"moe_loss_longrong": 1.4366,
"step": 329
},
{
"epoch": 0.06,
"learning_rate": 1.9937147016893257e-05,
"loss": 2.5968,
"loss_": 1.1941,
"moe_loss": 0.1608,
"moe_loss_longrong": 1.4376,
"step": 336
},
{
"epoch": 0.07,
"learning_rate": 1.9932169037542947e-05,
"loss": 2.6158,
"loss_": 0.9761,
"moe_loss": 0.1606,
"moe_loss_longrong": 1.4358,
"step": 343
},
{
"epoch": 0.07,
"learning_rate": 1.9927002039026002e-05,
"loss": 2.5944,
"loss_": 1.2162,
"moe_loss": 0.1605,
"moe_loss_longrong": 1.4346,
"step": 350
},
{
"epoch": 0.07,
"learning_rate": 1.9921646119675606e-05,
"loss": 2.5806,
"loss_": 0.8511,
"moe_loss": 0.1606,
"moe_loss_longrong": 1.4358,
"step": 357
},
{
"epoch": 0.07,
"learning_rate": 1.9916101381420285e-05,
"loss": 2.6285,
"loss_": 0.9065,
"moe_loss": 0.1605,
"moe_loss_longrong": 1.4361,
"step": 364
},
{
"epoch": 0.07,
"learning_rate": 1.991036792978199e-05,
"loss": 2.6076,
"loss_": 0.7095,
"moe_loss": 0.1606,
"moe_loss_longrong": 1.4347,
"step": 371
},
{
"epoch": 0.07,
"learning_rate": 1.9904445873874068e-05,
"loss": 2.5824,
"loss_": 0.571,
"moe_loss": 0.1606,
"moe_loss_longrong": 1.4343,
"step": 378
},
{
"epoch": 0.07,
"learning_rate": 1.98983353263992e-05,
"loss": 2.5803,
"loss_": 0.9037,
"moe_loss": 0.1606,
"moe_loss_longrong": 1.4344,
"step": 385
},
{
"epoch": 0.08,
"learning_rate": 1.9892036403647256e-05,
"loss": 2.6071,
"loss_": 1.0289,
"moe_loss": 0.1606,
"moe_loss_longrong": 1.4333,
"step": 392
},
{
"epoch": 0.08,
"learning_rate": 1.9885549225493064e-05,
"loss": 2.6155,
"loss_": 1.227,
"moe_loss": 0.1605,
"moe_loss_longrong": 1.4324,
"step": 399
},
{
"epoch": 0.08,
"learning_rate": 1.9878873915394154e-05,
"loss": 2.6057,
"loss_": 1.276,
"moe_loss": 0.1607,
"moe_loss_longrong": 1.4326,
"step": 406
},
{
"epoch": 0.08,
"learning_rate": 1.987201060038839e-05,
"loss": 2.5446,
"loss_": 1.1148,
"moe_loss": 0.1606,
"moe_loss_longrong": 1.432,
"step": 413
},
{
"epoch": 0.08,
"learning_rate": 1.986495941109156e-05,
"loss": 2.5787,
"loss_": 0.9601,
"moe_loss": 0.1605,
"moe_loss_longrong": 1.4317,
"step": 420
},
{
"epoch": 0.08,
"learning_rate": 1.9857720481694887e-05,
"loss": 2.6018,
"loss_": 0.8145,
"moe_loss": 0.1606,
"moe_loss_longrong": 1.4318,
"step": 427
},
{
"epoch": 0.08,
"learning_rate": 1.985029394996248e-05,
"loss": 2.5863,
"loss_": 0.9872,
"moe_loss": 0.1618,
"moe_loss_longrong": 1.4613,
"step": 434
},
{
"epoch": 0.08,
"learning_rate": 1.9842679957228706e-05,
"loss": 2.5837,
"loss_": 1.165,
"moe_loss": 0.1605,
"moe_loss_longrong": 1.43,
"step": 441
},
{
"epoch": 0.09,
"learning_rate": 1.9834878648395507e-05,
"loss": 2.6015,
"loss_": 0.9562,
"moe_loss": 0.1605,
"moe_loss_longrong": 1.4307,
"step": 448
},
{
"epoch": 0.09,
"learning_rate": 1.9826890171929634e-05,
"loss": 2.5453,
"loss_": 0.8231,
"moe_loss": 0.1605,
"moe_loss_longrong": 1.4306,
"step": 455
},
{
"epoch": 0.09,
"learning_rate": 1.981871467985983e-05,
"loss": 2.578,
"loss_": 0.9864,
"moe_loss": 0.1605,
"moe_loss_longrong": 1.4306,
"step": 462
},
{
"epoch": 0.09,
"learning_rate": 1.9810352327773935e-05,
"loss": 2.5723,
"loss_": 1.1748,
"moe_loss": 0.1605,
"moe_loss_longrong": 1.4305,
"step": 469
},
{
"epoch": 0.09,
"learning_rate": 1.9801803274815915e-05,
"loss": 2.6173,
"loss_": 1.0737,
"moe_loss": 0.1605,
"moe_loss_longrong": 1.4297,
"step": 476
},
{
"epoch": 0.09,
"learning_rate": 1.979306768368285e-05,
"loss": 2.5664,
"loss_": 1.3735,
"moe_loss": 0.1605,
"moe_loss_longrong": 1.4291,
"step": 483
},
{
"epoch": 0.09,
"learning_rate": 1.9784145720621827e-05,
"loss": 2.5832,
"loss_": 1.0223,
"moe_loss": 0.1605,
"moe_loss_longrong": 1.4296,
"step": 490
},
{
"epoch": 0.1,
"learning_rate": 1.9775037555426772e-05,
"loss": 2.5448,
"loss_": 1.2395,
"moe_loss": 0.1607,
"moe_loss_longrong": 1.4291,
"step": 497
},
{
"epoch": 0.1,
"learning_rate": 1.9765743361435234e-05,
"loss": 2.5729,
"loss_": 1.1156,
"moe_loss": 0.1604,
"moe_loss_longrong": 1.4273,
"step": 504
},
{
"epoch": 0.1,
"learning_rate": 1.975626331552507e-05,
"loss": 2.5526,
"loss_": 0.8797,
"moe_loss": 0.1606,
"moe_loss_longrong": 1.4281,
"step": 511
},
{
"epoch": 0.1,
"learning_rate": 1.974659759811109e-05,
"loss": 2.573,
"loss_": 1.1636,
"moe_loss": 0.1605,
"moe_loss_longrong": 1.4274,
"step": 518
},
{
"epoch": 0.1,
"learning_rate": 1.9736746393141617e-05,
"loss": 2.59,
"loss_": 1.1342,
"moe_loss": 0.1606,
"moe_loss_longrong": 1.4279,
"step": 525
},
{
"epoch": 0.1,
"learning_rate": 1.9726709888094994e-05,
"loss": 2.5921,
"loss_": 0.8051,
"moe_loss": 0.1605,
"moe_loss_longrong": 1.4277,
"step": 532
},
{
"epoch": 0.1,
"learning_rate": 1.9716488273976006e-05,
"loss": 2.6023,
"loss_": 1.2093,
"moe_loss": 0.1605,
"moe_loss_longrong": 1.4276,
"step": 539
},
{
"epoch": 0.11,
"learning_rate": 1.970608174531224e-05,
"loss": 2.5744,
"loss_": 0.9951,
"moe_loss": 0.1604,
"moe_loss_longrong": 1.4256,
"step": 546
},
{
"epoch": 0.11,
"learning_rate": 1.9695490500150418e-05,
"loss": 2.5917,
"loss_": 1.0794,
"moe_loss": 0.1605,
"moe_loss_longrong": 1.427,
"step": 553
},
{
"epoch": 0.11,
"learning_rate": 1.9684714740052584e-05,
"loss": 2.5849,
"loss_": 0.8469,
"moe_loss": 0.1605,
"moe_loss_longrong": 1.426,
"step": 560
},
{
"epoch": 0.11,
"learning_rate": 1.9673754670092283e-05,
"loss": 2.5705,
"loss_": 0.96,
"moe_loss": 0.1604,
"moe_loss_longrong": 1.4265,
"step": 567
},
{
"epoch": 0.11,
"learning_rate": 1.9662610498850684e-05,
"loss": 2.5672,
"loss_": 1.038,
"moe_loss": 0.1605,
"moe_loss_longrong": 1.4258,
"step": 574
},
{
"epoch": 0.11,
"learning_rate": 1.965128243841256e-05,
"loss": 2.553,
"loss_": 1.1173,
"moe_loss": 0.1606,
"moe_loss_longrong": 1.4267,
"step": 581
},
{
"epoch": 0.11,
"learning_rate": 1.9639770704362305e-05,
"loss": 2.5951,
"loss_": 1.1815,
"moe_loss": 0.1605,
"moe_loss_longrong": 1.4255,
"step": 588
},
{
"epoch": 0.11,
"learning_rate": 1.9628075515779796e-05,
"loss": 2.5528,
"loss_": 0.916,
"moe_loss": 0.1604,
"moe_loss_longrong": 1.4247,
"step": 595
},
{
"epoch": 0.12,
"learning_rate": 1.961619709523623e-05,
"loss": 2.5537,
"loss_": 1.1069,
"moe_loss": 0.1606,
"moe_loss_longrong": 1.4252,
"step": 602
},
{
"epoch": 0.12,
"learning_rate": 1.9604135668789897e-05,
"loss": 2.553,
"loss_": 0.8815,
"moe_loss": 0.1616,
"moe_loss_longrong": 1.4545,
"step": 609
},
{
"epoch": 0.12,
"learning_rate": 1.959189146598188e-05,
"loss": 2.557,
"loss_": 0.3617,
"moe_loss": 0.1605,
"moe_loss_longrong": 1.4253,
"step": 616
},
{
"epoch": 0.12,
"learning_rate": 1.9579464719831668e-05,
"loss": 2.5735,
"loss_": 1.1934,
"moe_loss": 0.1604,
"moe_loss_longrong": 1.4248,
"step": 623
},
{
"epoch": 0.12,
"learning_rate": 1.9566855666832743e-05,
"loss": 2.5679,
"loss_": 1.2144,
"moe_loss": 0.1604,
"moe_loss_longrong": 1.4241,
"step": 630
},
{
"epoch": 0.12,
"learning_rate": 1.9554064546948064e-05,
"loss": 2.5541,
"loss_": 0.7773,
"moe_loss": 0.1604,
"moe_loss_longrong": 1.4247,
"step": 637
},
{
"epoch": 0.12,
"learning_rate": 1.9541091603605508e-05,
"loss": 2.5396,
"loss_": 1.0911,
"moe_loss": 0.1605,
"moe_loss_longrong": 1.4238,
"step": 644
},
{
"epoch": 0.13,
"learning_rate": 1.9527937083693233e-05,
"loss": 2.5328,
"loss_": 1.2836,
"moe_loss": 0.1604,
"moe_loss_longrong": 1.4241,
"step": 651
},
{
"epoch": 0.13,
"learning_rate": 1.951460123755499e-05,
"loss": 2.559,
"loss_": 0.6191,
"moe_loss": 0.1603,
"moe_loss_longrong": 1.4249,
"step": 658
},
{
"epoch": 0.13,
"learning_rate": 1.9501084318985335e-05,
"loss": 2.5656,
"loss_": 0.5936,
"moe_loss": 0.1604,
"moe_loss_longrong": 1.4238,
"step": 665
},
{
"epoch": 0.13,
"learning_rate": 1.948738658522483e-05,
"loss": 2.5408,
"loss_": 1.0426,
"moe_loss": 0.1605,
"moe_loss_longrong": 1.4256,
"step": 672
},
{
"epoch": 0.13,
"learning_rate": 1.9473508296955126e-05,
"loss": 2.5346,
"loss_": 1.0259,
"moe_loss": 0.1613,
"moe_loss_longrong": 1.4496,
"step": 679
},
{
"epoch": 0.13,
"learning_rate": 1.9459449718294008e-05,
"loss": 2.5744,
"loss_": 1.2413,
"moe_loss": 0.1606,
"moe_loss_longrong": 1.4233,
"step": 686
},
{
"epoch": 0.13,
"learning_rate": 1.9445211116790365e-05,
"loss": 2.5513,
"loss_": 1.1087,
"moe_loss": 0.1604,
"moe_loss_longrong": 1.4224,
"step": 693
},
{
"epoch": 0.13,
"learning_rate": 1.9430792763419105e-05,
"loss": 2.5552,
"loss_": 1.1375,
"moe_loss": 0.1603,
"moe_loss_longrong": 1.4219,
"step": 700
},
{
"epoch": 0.14,
"learning_rate": 1.9416194932576e-05,
"loss": 2.5634,
"loss_": 0.8712,
"moe_loss": 0.1606,
"moe_loss_longrong": 1.4224,
"step": 707
},
{
"epoch": 0.14,
"learning_rate": 1.9401417902072447e-05,
"loss": 2.5538,
"loss_": 0.9992,
"moe_loss": 0.1604,
"moe_loss_longrong": 1.4211,
"step": 714
},
{
"epoch": 0.14,
"learning_rate": 1.93864619531302e-05,
"loss": 2.5786,
"loss_": 1.0579,
"moe_loss": 0.1604,
"moe_loss_longrong": 1.4223,
"step": 721
},
{
"epoch": 0.14,
"learning_rate": 1.9371327370376018e-05,
"loss": 2.5565,
"loss_": 1.1717,
"moe_loss": 0.1604,
"moe_loss_longrong": 1.4223,
"step": 728
},
{
"epoch": 0.14,
"learning_rate": 1.935601444183622e-05,
"loss": 2.5491,
"loss_": 1.0508,
"moe_loss": 0.1605,
"moe_loss_longrong": 1.4216,
"step": 735
},
{
"epoch": 0.14,
"learning_rate": 1.934052345893125e-05,
"loss": 2.5485,
"loss_": 1.1535,
"moe_loss": 0.1603,
"moe_loss_longrong": 1.4201,
"step": 742
},
{
"epoch": 0.14,
"learning_rate": 1.932485471647009e-05,
"loss": 2.5434,
"loss_": 0.8935,
"moe_loss": 0.1613,
"moe_loss_longrong": 1.4462,
"step": 749
},
{
"epoch": 0.15,
"learning_rate": 1.9309008512644668e-05,
"loss": 2.5549,
"loss_": 0.9146,
"moe_loss": 0.1604,
"moe_loss_longrong": 1.4212,
"step": 756
},
{
"epoch": 0.15,
"learning_rate": 1.929298514902418e-05,
"loss": 2.5655,
"loss_": 1.2834,
"moe_loss": 0.1604,
"moe_loss_longrong": 1.4206,
"step": 763
},
{
"epoch": 0.15,
"learning_rate": 1.927678493054935e-05,
"loss": 2.5664,
"loss_": 1.1543,
"moe_loss": 0.1603,
"moe_loss_longrong": 1.4203,
"step": 770
},
{
"epoch": 0.15,
"learning_rate": 1.9260408165526638e-05,
"loss": 2.5559,
"loss_": 1.1544,
"moe_loss": 0.1604,
"moe_loss_longrong": 1.4205,
"step": 777
},
{
"epoch": 0.15,
"learning_rate": 1.9243855165622345e-05,
"loss": 2.538,
"loss_": 0.9985,
"moe_loss": 0.1603,
"moe_loss_longrong": 1.42,
"step": 784
},
{
"epoch": 0.15,
"learning_rate": 1.9227126245856716e-05,
"loss": 2.528,
"loss_": 0.766,
"moe_loss": 0.1609,
"moe_loss_longrong": 1.4442,
"step": 791
},
{
"epoch": 0.15,
"learning_rate": 1.921022172459791e-05,
"loss": 2.56,
"loss_": 1.0356,
"moe_loss": 0.1603,
"moe_loss_longrong": 1.4201,
"step": 798
},
{
"epoch": 0.15,
"learning_rate": 1.9193141923555984e-05,
"loss": 2.5418,
"loss_": 1.0224,
"moe_loss": 0.1604,
"moe_loss_longrong": 1.4191,
"step": 805
},
{
"epoch": 0.16,
"learning_rate": 1.917588716777672e-05,
"loss": 2.5489,
"loss_": 1.2415,
"moe_loss": 0.1604,
"moe_loss_longrong": 1.42,
"step": 812
},
{
"epoch": 0.16,
"learning_rate": 1.9158457785635478e-05,
"loss": 2.5647,
"loss_": 1.0902,
"moe_loss": 0.1603,
"moe_loss_longrong": 1.4194,
"step": 819
},
{
"epoch": 0.16,
"learning_rate": 1.914085410883093e-05,
"loss": 2.5695,
"loss_": 1.0454,
"moe_loss": 0.1603,
"moe_loss_longrong": 1.4196,
"step": 826
},
{
"epoch": 0.16,
"learning_rate": 1.9123076472378753e-05,
"loss": 2.5355,
"loss_": 1.1475,
"moe_loss": 0.1603,
"moe_loss_longrong": 1.4185,
"step": 833
},
{
"epoch": 0.16,
"learning_rate": 1.910512521460525e-05,
"loss": 2.5557,
"loss_": 0.7606,
"moe_loss": 0.1603,
"moe_loss_longrong": 1.4187,
"step": 840
},
{
"epoch": 0.16,
"learning_rate": 1.908700067714091e-05,
"loss": 2.5223,
"loss_": 0.9963,
"moe_loss": 0.1611,
"moe_loss_longrong": 1.4451,
"step": 847
},
{
"epoch": 0.16,
"learning_rate": 1.906870320491391e-05,
"loss": 2.5422,
"loss_": 0.6658,
"moe_loss": 0.1603,
"moe_loss_longrong": 1.4189,
"step": 854
},
{
"epoch": 0.17,
"learning_rate": 1.9050233146143554e-05,
"loss": 2.5373,
"loss_": 1.022,
"moe_loss": 0.1604,
"moe_loss_longrong": 1.4191,
"step": 861
},
{
"epoch": 0.17,
"learning_rate": 1.9031590852333637e-05,
"loss": 2.5536,
"loss_": 0.9191,
"moe_loss": 0.1603,
"moe_loss_longrong": 1.4191,
"step": 868
},
{
"epoch": 0.17,
"learning_rate": 1.9012776678265756e-05,
"loss": 2.5076,
"loss_": 0.4788,
"moe_loss": 0.1604,
"moe_loss_longrong": 1.4179,
"step": 875
},
{
"epoch": 0.17,
"learning_rate": 1.899379098199257e-05,
"loss": 2.5061,
"loss_": 1.1005,
"moe_loss": 0.1604,
"moe_loss_longrong": 1.4178,
"step": 882
},
{
"epoch": 0.17,
"learning_rate": 1.897463412483098e-05,
"loss": 2.5584,
"loss_": 1.0856,
"moe_loss": 0.1604,
"moe_loss_longrong": 1.4184,
"step": 889
},
{
"epoch": 0.17,
"learning_rate": 1.895530647135524e-05,
"loss": 2.5329,
"loss_": 0.7922,
"moe_loss": 0.1604,
"moe_loss_longrong": 1.4175,
"step": 896
},
{
"epoch": 0.17,
"learning_rate": 1.8935808389390032e-05,
"loss": 2.524,
"loss_": 1.0799,
"moe_loss": 0.1603,
"moe_loss_longrong": 1.4163,
"step": 903
},
{
"epoch": 0.18,
"learning_rate": 1.8916140250003475e-05,
"loss": 2.5423,
"loss_": 0.8152,
"moe_loss": 0.1603,
"moe_loss_longrong": 1.4174,
"step": 910
},
{
"epoch": 0.18,
"learning_rate": 1.8896302427500042e-05,
"loss": 2.533,
"loss_": 1.0454,
"moe_loss": 0.1605,
"moe_loss_longrong": 1.4181,
"step": 917
},
{
"epoch": 0.18,
"learning_rate": 1.8876295299413445e-05,
"loss": 2.522,
"loss_": 1.1185,
"moe_loss": 0.1603,
"moe_loss_longrong": 1.4169,
"step": 924
},
{
"epoch": 0.18,
"learning_rate": 1.885611924649946e-05,
"loss": 2.5539,
"loss_": 1.1256,
"moe_loss": 0.1603,
"moe_loss_longrong": 1.4167,
"step": 931
},
{
"epoch": 0.18,
"learning_rate": 1.883577465272866e-05,
"loss": 2.5069,
"loss_": 0.743,
"moe_loss": 0.1604,
"moe_loss_longrong": 1.4177,
"step": 938
},
{
"epoch": 0.18,
"learning_rate": 1.8815261905279133e-05,
"loss": 2.5429,
"loss_": 1.0463,
"moe_loss": 0.1604,
"moe_loss_longrong": 1.4162,
"step": 945
},
{
"epoch": 0.18,
"learning_rate": 1.879458139452909e-05,
"loss": 2.5381,
"loss_": 1.0908,
"moe_loss": 0.1603,
"moe_loss_longrong": 1.4152,
"step": 952
},
{
"epoch": 0.18,
"learning_rate": 1.877373351404946e-05,
"loss": 2.4924,
"loss_": 1.1275,
"moe_loss": 0.1603,
"moe_loss_longrong": 1.4167,
"step": 959
},
{
"epoch": 0.19,
"learning_rate": 1.8752718660596367e-05,
"loss": 2.536,
"loss_": 0.7467,
"moe_loss": 0.1604,
"moe_loss_longrong": 1.4165,
"step": 966
},
{
"epoch": 0.19,
"learning_rate": 1.873153723410362e-05,
"loss": 2.507,
"loss_": 1.0083,
"moe_loss": 0.1609,
"moe_loss_longrong": 1.4404,
"step": 973
},
{
"epoch": 0.19,
"learning_rate": 1.8710189637675055e-05,
"loss": 2.5118,
"loss_": 0.874,
"moe_loss": 0.1603,
"moe_loss_longrong": 1.4161,
"step": 980
},
{
"epoch": 0.19,
"learning_rate": 1.8688676277576916e-05,
"loss": 2.5415,
"loss_": 1.1152,
"moe_loss": 0.1603,
"moe_loss_longrong": 1.4157,
"step": 987
},
{
"epoch": 0.19,
"learning_rate": 1.866699756323008e-05,
"loss": 2.5225,
"loss_": 0.8857,
"moe_loss": 0.1603,
"moe_loss_longrong": 1.416,
"step": 994
},
{
"epoch": 0.19,
"learning_rate": 1.8645153907202285e-05,
"loss": 2.5093,
"loss_": 1.0791,
"moe_loss": 0.1603,
"moe_loss_longrong": 1.4157,
"step": 1001
},
{
"epoch": 0.19,
"learning_rate": 1.862314572520028e-05,
"loss": 2.534,
"loss_": 1.1649,
"moe_loss": 0.1603,
"moe_loss_longrong": 1.4167,
"step": 1008
},
{
"epoch": 0.2,
"learning_rate": 1.86009734360619e-05,
"loss": 2.559,
"loss_": 1.2289,
"moe_loss": 0.1604,
"moe_loss_longrong": 1.4163,
"step": 1015
},
{
"epoch": 0.2,
"learning_rate": 1.8578637461748105e-05,
"loss": 2.5738,
"loss_": 0.8422,
"moe_loss": 0.1603,
"moe_loss_longrong": 1.416,
"step": 1022
},
{
"epoch": 0.2,
"learning_rate": 1.8556138227334957e-05,
"loss": 2.5752,
"loss_": 1.0332,
"moe_loss": 0.1603,
"moe_loss_longrong": 1.4157,
"step": 1029
},
{
"epoch": 0.2,
"learning_rate": 1.853347616100552e-05,
"loss": 2.5633,
"loss_": 1.2742,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4152,
"step": 1036
},
{
"epoch": 0.2,
"learning_rate": 1.8510651694041702e-05,
"loss": 2.5491,
"loss_": 1.1394,
"moe_loss": 0.1603,
"moe_loss_longrong": 1.4153,
"step": 1043
},
{
"epoch": 0.2,
"learning_rate": 1.848766526081607e-05,
"loss": 2.5032,
"loss_": 1.0904,
"moe_loss": 0.1603,
"moe_loss_longrong": 1.4148,
"step": 1050
},
{
"epoch": 0.2,
"learning_rate": 1.846451729878357e-05,
"loss": 2.5687,
"loss_": 1.0973,
"moe_loss": 0.1603,
"moe_loss_longrong": 1.4144,
"step": 1057
},
{
"epoch": 0.2,
"learning_rate": 1.84412082484732e-05,
"loss": 2.5378,
"loss_": 0.7628,
"moe_loss": 0.161,
"moe_loss_longrong": 1.4372,
"step": 1064
},
{
"epoch": 0.21,
"learning_rate": 1.841773855347963e-05,
"loss": 2.5285,
"loss_": 1.0832,
"moe_loss": 0.1603,
"moe_loss_longrong": 1.4148,
"step": 1071
},
{
"epoch": 0.21,
"learning_rate": 1.8394108660454766e-05,
"loss": 2.53,
"loss_": 0.7952,
"moe_loss": 0.1603,
"moe_loss_longrong": 1.415,
"step": 1078
},
{
"epoch": 0.21,
"learning_rate": 1.8370319019099236e-05,
"loss": 2.5457,
"loss_": 0.8855,
"moe_loss": 0.1603,
"moe_loss_longrong": 1.4146,
"step": 1085
},
{
"epoch": 0.21,
"learning_rate": 1.8346370082153843e-05,
"loss": 2.5227,
"loss_": 1.1518,
"moe_loss": 0.1603,
"moe_loss_longrong": 1.4148,
"step": 1092
},
{
"epoch": 0.21,
"learning_rate": 1.8322262305390948e-05,
"loss": 2.5268,
"loss_": 1.0055,
"moe_loss": 0.1603,
"moe_loss_longrong": 1.4139,
"step": 1099
},
{
"epoch": 0.21,
"learning_rate": 1.8297996147605787e-05,
"loss": 2.5418,
"loss_": 1.2226,
"moe_loss": 0.1603,
"moe_loss_longrong": 1.4139,
"step": 1106
},
{
"epoch": 0.21,
"learning_rate": 1.8273572070607756e-05,
"loss": 2.5465,
"loss_": 1.0475,
"moe_loss": 0.1603,
"moe_loss_longrong": 1.414,
"step": 1113
},
{
"epoch": 0.22,
"learning_rate": 1.8248990539211596e-05,
"loss": 2.5132,
"loss_": 1.2063,
"moe_loss": 0.1603,
"moe_loss_longrong": 1.4148,
"step": 1120
},
{
"epoch": 0.22,
"learning_rate": 1.822425202122858e-05,
"loss": 2.5236,
"loss_": 1.3605,
"moe_loss": 0.1603,
"moe_loss_longrong": 1.4145,
"step": 1127
},
{
"epoch": 0.22,
"learning_rate": 1.819935698745759e-05,
"loss": 2.517,
"loss_": 0.9965,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4137,
"step": 1134
},
{
"epoch": 0.22,
"learning_rate": 1.817430591167615e-05,
"loss": 2.5347,
"loss_": 1.1165,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4133,
"step": 1141
},
{
"epoch": 0.22,
"learning_rate": 1.8149099270631434e-05,
"loss": 2.5051,
"loss_": 0.9918,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4133,
"step": 1148
},
{
"epoch": 0.22,
"learning_rate": 1.8123737544031178e-05,
"loss": 2.5228,
"loss_": 1.0757,
"moe_loss": 0.1603,
"moe_loss_longrong": 1.4132,
"step": 1155
},
{
"epoch": 0.22,
"learning_rate": 1.8098221214534543e-05,
"loss": 2.5117,
"loss_": 0.9441,
"moe_loss": 0.1608,
"moe_loss_longrong": 1.435,
"step": 1162
},
{
"epoch": 0.22,
"learning_rate": 1.807255076774294e-05,
"loss": 2.5292,
"loss_": 0.9315,
"moe_loss": 0.1608,
"moe_loss_longrong": 1.4332,
"step": 1169
},
{
"epoch": 0.23,
"learning_rate": 1.80467266921908e-05,
"loss": 2.4974,
"loss_": 0.9341,
"moe_loss": 0.1603,
"moe_loss_longrong": 1.414,
"step": 1176
},
{
"epoch": 0.23,
"learning_rate": 1.802074947933625e-05,
"loss": 2.5251,
"loss_": 1.0369,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4127,
"step": 1183
},
{
"epoch": 0.23,
"learning_rate": 1.799461962355178e-05,
"loss": 2.5424,
"loss_": 1.2525,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4133,
"step": 1190
},
{
"epoch": 0.23,
"learning_rate": 1.7968337622114824e-05,
"loss": 2.5123,
"loss_": 1.4116,
"moe_loss": 0.1603,
"moe_loss_longrong": 1.4131,
"step": 1197
},
{
"epoch": 0.23,
"learning_rate": 1.7941903975198305e-05,
"loss": 2.5119,
"loss_": 0.8823,
"moe_loss": 0.1603,
"moe_loss_longrong": 1.4132,
"step": 1204
},
{
"epoch": 0.23,
"learning_rate": 1.791531918586112e-05,
"loss": 2.5372,
"loss_": 0.9075,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4125,
"step": 1211
},
{
"epoch": 0.23,
"learning_rate": 1.7888583760038534e-05,
"loss": 2.5356,
"loss_": 1.065,
"moe_loss": 0.1603,
"moe_loss_longrong": 1.4133,
"step": 1218
},
{
"epoch": 0.24,
"learning_rate": 1.78616982065326e-05,
"loss": 2.5154,
"loss_": 0.9056,
"moe_loss": 0.1611,
"moe_loss_longrong": 1.4338,
"step": 1225
},
{
"epoch": 0.24,
"learning_rate": 1.7834663037002444e-05,
"loss": 2.5377,
"loss_": 0.8469,
"moe_loss": 0.1603,
"moe_loss_longrong": 1.4123,
"step": 1232
},
{
"epoch": 0.24,
"learning_rate": 1.7807478765954532e-05,
"loss": 2.5363,
"loss_": 1.0507,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4116,
"step": 1239
},
{
"epoch": 0.24,
"learning_rate": 1.778014591073288e-05,
"loss": 2.5131,
"loss_": 1.0501,
"moe_loss": 0.1603,
"moe_loss_longrong": 1.4123,
"step": 1246
},
{
"epoch": 0.24,
"learning_rate": 1.7752664991509224e-05,
"loss": 2.5127,
"loss_": 1.0583,
"moe_loss": 0.1603,
"moe_loss_longrong": 1.412,
"step": 1253
},
{
"epoch": 0.24,
"learning_rate": 1.7725036531273087e-05,
"loss": 2.5147,
"loss_": 1.0225,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4119,
"step": 1260
},
{
"epoch": 0.24,
"learning_rate": 1.7697261055821864e-05,
"loss": 2.5399,
"loss_": 1.1622,
"moe_loss": 0.1603,
"moe_loss_longrong": 1.4123,
"step": 1267
},
{
"epoch": 0.25,
"learning_rate": 1.7669339093750786e-05,
"loss": 2.5042,
"loss_": 1.1,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4118,
"step": 1274
},
{
"epoch": 0.25,
"learning_rate": 1.7641271176442876e-05,
"loss": 2.4867,
"loss_": 1.1917,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4116,
"step": 1281
},
{
"epoch": 0.25,
"learning_rate": 1.761305783805883e-05,
"loss": 2.5159,
"loss_": 1.1055,
"moe_loss": 0.1603,
"moe_loss_longrong": 1.4114,
"step": 1288
},
{
"epoch": 0.25,
"learning_rate": 1.7584699615526857e-05,
"loss": 2.5064,
"loss_": 0.7231,
"moe_loss": 0.1603,
"moe_loss_longrong": 1.4119,
"step": 1295
},
{
"epoch": 0.25,
"learning_rate": 1.755619704853246e-05,
"loss": 2.5335,
"loss_": 0.9998,
"moe_loss": 0.1603,
"moe_loss_longrong": 1.4112,
"step": 1302
},
{
"epoch": 0.25,
"learning_rate": 1.752755067950814e-05,
"loss": 2.5332,
"loss_": 1.2823,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4114,
"step": 1309
},
{
"epoch": 0.25,
"learning_rate": 1.749876105362313e-05,
"loss": 2.5212,
"loss_": 1.2273,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4109,
"step": 1316
},
{
"epoch": 0.25,
"learning_rate": 1.746982871877296e-05,
"loss": 2.5149,
"loss_": 1.2552,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4113,
"step": 1323
},
{
"epoch": 0.26,
"learning_rate": 1.744075422556906e-05,
"loss": 2.4876,
"loss_": 0.9236,
"moe_loss": 0.1603,
"moe_loss_longrong": 1.4115,
"step": 1330
},
{
"epoch": 0.26,
"learning_rate": 1.741153812732828e-05,
"loss": 2.5421,
"loss_": 1.155,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4106,
"step": 1337
},
{
"epoch": 0.26,
"learning_rate": 1.7382180980062365e-05,
"loss": 2.5095,
"loss_": 0.6978,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4109,
"step": 1344
},
{
"epoch": 0.26,
"learning_rate": 1.735268334246734e-05,
"loss": 2.5068,
"loss_": 1.0142,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4105,
"step": 1351
},
{
"epoch": 0.26,
"learning_rate": 1.7323045775912927e-05,
"loss": 2.5247,
"loss_": 0.9039,
"moe_loss": 0.1603,
"moe_loss_longrong": 1.4111,
"step": 1358
},
{
"epoch": 0.26,
"learning_rate": 1.7293268844431826e-05,
"loss": 2.5308,
"loss_": 1.1161,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4101,
"step": 1365
},
{
"epoch": 0.26,
"learning_rate": 1.7263353114708993e-05,
"loss": 2.516,
"loss_": 0.5494,
"moe_loss": 0.1603,
"moe_loss_longrong": 1.4105,
"step": 1372
},
{
"epoch": 0.27,
"learning_rate": 1.7233299156070852e-05,
"loss": 2.5405,
"loss_": 1.0823,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4091,
"step": 1379
},
{
"epoch": 0.27,
"learning_rate": 1.720310754047446e-05,
"loss": 2.5123,
"loss_": 1.016,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.41,
"step": 1386
},
{
"epoch": 0.27,
"learning_rate": 1.717277884249664e-05,
"loss": 2.4917,
"loss_": 1.1087,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4103,
"step": 1393
},
{
"epoch": 0.27,
"learning_rate": 1.7142313639323012e-05,
"loss": 2.5184,
"loss_": 1.2711,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4101,
"step": 1400
},
{
"epoch": 0.27,
"learning_rate": 1.7111712510737035e-05,
"loss": 2.5037,
"loss_": 1.1198,
"moe_loss": 0.1603,
"moe_loss_longrong": 1.4102,
"step": 1407
},
{
"epoch": 0.27,
"learning_rate": 1.7080976039108964e-05,
"loss": 2.5262,
"loss_": 0.9286,
"moe_loss": 0.1612,
"moe_loss_longrong": 1.4315,
"step": 1414
},
{
"epoch": 0.27,
"learning_rate": 1.7050104809384774e-05,
"loss": 2.4892,
"loss_": 1.0377,
"moe_loss": 0.1609,
"moe_loss_longrong": 1.4276,
"step": 1421
},
{
"epoch": 0.27,
"learning_rate": 1.7019099409075014e-05,
"loss": 2.5076,
"loss_": 0.9405,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4092,
"step": 1428
},
{
"epoch": 0.28,
"learning_rate": 1.6987960428243637e-05,
"loss": 2.5198,
"loss_": 1.3093,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4094,
"step": 1435
},
{
"epoch": 0.28,
"learning_rate": 1.6956688459496767e-05,
"loss": 2.5508,
"loss_": 1.0043,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4083,
"step": 1442
},
{
"epoch": 0.28,
"learning_rate": 1.6925284097971427e-05,
"loss": 2.5299,
"loss_": 1.1324,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4091,
"step": 1449
},
{
"epoch": 0.28,
"learning_rate": 1.6893747941324197e-05,
"loss": 2.5495,
"loss_": 0.7979,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4097,
"step": 1456
},
{
"epoch": 0.28,
"learning_rate": 1.6862080589719863e-05,
"loss": 2.4692,
"loss_": 0.563,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4097,
"step": 1463
},
{
"epoch": 0.28,
"learning_rate": 1.6830282645819974e-05,
"loss": 2.5107,
"loss_": 0.3532,
"moe_loss": 0.1604,
"moe_loss_longrong": 1.4107,
"step": 1470
},
{
"epoch": 0.28,
"learning_rate": 1.679835471477139e-05,
"loss": 2.498,
"loss_": 0.9877,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4086,
"step": 1477
},
{
"epoch": 0.29,
"learning_rate": 1.6766297404194745e-05,
"loss": 2.502,
"loss_": 1.1402,
"moe_loss": 0.1603,
"moe_loss_longrong": 1.4091,
"step": 1484
},
{
"epoch": 0.29,
"learning_rate": 1.673411132417291e-05,
"loss": 2.5066,
"loss_": 1.167,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.409,
"step": 1491
},
{
"epoch": 0.29,
"learning_rate": 1.6701797087239354e-05,
"loss": 2.5273,
"loss_": 0.914,
"moe_loss": 0.1607,
"moe_loss_longrong": 1.4264,
"step": 1498
},
{
"epoch": 0.29,
"learning_rate": 1.666935530836651e-05,
"loss": 2.5022,
"loss_": 0.9724,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4087,
"step": 1505
},
{
"epoch": 0.29,
"learning_rate": 1.663678660495406e-05,
"loss": 2.4806,
"loss_": 1.1766,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4086,
"step": 1512
},
{
"epoch": 0.29,
"learning_rate": 1.6604091596817193e-05,
"loss": 2.5228,
"loss_": 0.8955,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4086,
"step": 1519
},
{
"epoch": 0.29,
"learning_rate": 1.657127090617479e-05,
"loss": 2.5303,
"loss_": 0.9496,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4079,
"step": 1526
},
{
"epoch": 0.29,
"learning_rate": 1.6538325157637614e-05,
"loss": 2.5162,
"loss_": 0.8978,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4085,
"step": 1533
},
{
"epoch": 0.3,
"learning_rate": 1.650525497819639e-05,
"loss": 2.5187,
"loss_": 0.787,
"moe_loss": 0.1608,
"moe_loss_longrong": 1.4256,
"step": 1540
},
{
"epoch": 0.3,
"learning_rate": 1.6472060997209898e-05,
"loss": 2.5283,
"loss_": 1.1671,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4085,
"step": 1547
},
{
"epoch": 0.3,
"learning_rate": 1.6438743846392987e-05,
"loss": 2.5049,
"loss_": 0.7488,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4082,
"step": 1554
},
{
"epoch": 0.3,
"learning_rate": 1.6405304159804534e-05,
"loss": 2.4966,
"loss_": 1.0698,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4078,
"step": 1561
},
{
"epoch": 0.3,
"learning_rate": 1.6371742573835426e-05,
"loss": 2.5307,
"loss_": 1.1426,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4075,
"step": 1568
},
{
"epoch": 0.3,
"learning_rate": 1.6338059727196386e-05,
"loss": 2.4884,
"loss_": 1.0242,
"moe_loss": 0.1603,
"moe_loss_longrong": 1.4078,
"step": 1575
},
{
"epoch": 0.3,
"learning_rate": 1.6304256260905872e-05,
"loss": 2.5239,
"loss_": 1.1671,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4079,
"step": 1582
},
{
"epoch": 0.31,
"learning_rate": 1.627033281827785e-05,
"loss": 2.5292,
"loss_": 0.9625,
"moe_loss": 0.1607,
"moe_loss_longrong": 1.4262,
"step": 1589
},
{
"epoch": 0.31,
"learning_rate": 1.6236290044909543e-05,
"loss": 2.5336,
"loss_": 0.8255,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4081,
"step": 1596
},
{
"epoch": 0.31,
"learning_rate": 1.6202128588669177e-05,
"loss": 2.5205,
"loss_": 0.7348,
"moe_loss": 0.1603,
"moe_loss_longrong": 1.4082,
"step": 1603
},
{
"epoch": 0.31,
"learning_rate": 1.6167849099683623e-05,
"loss": 2.4854,
"loss_": 0.9767,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4075,
"step": 1610
},
{
"epoch": 0.31,
"learning_rate": 1.6133452230326035e-05,
"loss": 2.5265,
"loss_": 0.9913,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4074,
"step": 1617
},
{
"epoch": 0.31,
"learning_rate": 1.609893863520343e-05,
"loss": 2.4785,
"loss_": 0.9806,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4076,
"step": 1624
},
{
"epoch": 0.31,
"learning_rate": 1.6064308971144236e-05,
"loss": 2.5053,
"loss_": 1.207,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4073,
"step": 1631
},
{
"epoch": 0.32,
"learning_rate": 1.60295638971858e-05,
"loss": 2.5212,
"loss_": 1.2091,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4072,
"step": 1638
},
{
"epoch": 0.32,
"learning_rate": 1.599470407456182e-05,
"loss": 2.5177,
"loss_": 1.0634,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4071,
"step": 1645
},
{
"epoch": 0.32,
"learning_rate": 1.5959730166689783e-05,
"loss": 2.5219,
"loss_": 0.7302,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4069,
"step": 1652
},
{
"epoch": 0.32,
"learning_rate": 1.5924642839158334e-05,
"loss": 2.5273,
"loss_": 1.1267,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4065,
"step": 1659
},
{
"epoch": 0.32,
"learning_rate": 1.5889442759714603e-05,
"loss": 2.5067,
"loss_": 0.8492,
"moe_loss": 0.1606,
"moe_loss_longrong": 1.4234,
"step": 1666
},
{
"epoch": 0.32,
"learning_rate": 1.5854130598251514e-05,
"loss": 2.4997,
"loss_": 1.0397,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4065,
"step": 1673
},
{
"epoch": 0.32,
"learning_rate": 1.581870702679501e-05,
"loss": 2.5277,
"loss_": 0.9804,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4073,
"step": 1680
},
{
"epoch": 0.32,
"learning_rate": 1.5783172719491288e-05,
"loss": 2.5191,
"loss_": 1.1463,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4064,
"step": 1687
},
{
"epoch": 0.33,
"learning_rate": 1.5747528352593956e-05,
"loss": 2.4859,
"loss_": 1.0594,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4063,
"step": 1694
},
{
"epoch": 0.33,
"learning_rate": 1.5711774604451168e-05,
"loss": 2.5146,
"loss_": 1.0352,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4063,
"step": 1701
},
{
"epoch": 0.33,
"learning_rate": 1.567591215549271e-05,
"loss": 2.5086,
"loss_": 0.8248,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4067,
"step": 1708
},
{
"epoch": 0.33,
"learning_rate": 1.5639941688217063e-05,
"loss": 2.4807,
"loss_": 0.8445,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4063,
"step": 1715
},
{
"epoch": 0.33,
"learning_rate": 1.5603863887178393e-05,
"loss": 2.5192,
"loss_": 0.9476,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4057,
"step": 1722
},
{
"epoch": 0.33,
"learning_rate": 1.5567679438973543e-05,
"loss": 2.5131,
"loss_": 1.1376,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4062,
"step": 1729
},
{
"epoch": 0.33,
"learning_rate": 1.5531389032228955e-05,
"loss": 2.4964,
"loss_": 1.2426,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4064,
"step": 1736
},
{
"epoch": 0.34,
"learning_rate": 1.549499335758757e-05,
"loss": 2.5134,
"loss_": 0.8763,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.406,
"step": 1743
},
{
"epoch": 0.34,
"learning_rate": 1.5458493107695688e-05,
"loss": 2.4855,
"loss_": 1.1827,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4061,
"step": 1750
},
{
"epoch": 0.34,
"learning_rate": 1.542188897718977e-05,
"loss": 2.4889,
"loss_": 1.014,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4064,
"step": 1757
},
{
"epoch": 0.34,
"learning_rate": 1.5385181662683244e-05,
"loss": 2.5111,
"loss_": 1.0933,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4058,
"step": 1764
},
{
"epoch": 0.34,
"learning_rate": 1.534837186275322e-05,
"loss": 2.5385,
"loss_": 0.8571,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4062,
"step": 1771
},
{
"epoch": 0.34,
"learning_rate": 1.531146027792722e-05,
"loss": 2.5107,
"loss_": 0.9431,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4058,
"step": 1778
},
{
"epoch": 0.34,
"learning_rate": 1.527444761066982e-05,
"loss": 2.5031,
"loss_": 1.2333,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4056,
"step": 1785
},
{
"epoch": 0.34,
"learning_rate": 1.523733456536931e-05,
"loss": 2.4927,
"loss_": 1.1367,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.406,
"step": 1792
},
{
"epoch": 0.35,
"learning_rate": 1.5200121848324276e-05,
"loss": 2.5148,
"loss_": 1.0457,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4052,
"step": 1799
},
{
"epoch": 0.35,
"learning_rate": 1.5162810167730144e-05,
"loss": 2.4974,
"loss_": 0.8327,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4055,
"step": 1806
},
{
"epoch": 0.35,
"learning_rate": 1.5125400233665728e-05,
"loss": 2.4938,
"loss_": 0.965,
"moe_loss": 0.1603,
"moe_loss_longrong": 1.4061,
"step": 1813
},
{
"epoch": 0.35,
"learning_rate": 1.50878927580797e-05,
"loss": 2.4854,
"loss_": 1.0636,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4061,
"step": 1820
},
{
"epoch": 0.35,
"learning_rate": 1.5050288454777047e-05,
"loss": 2.4829,
"loss_": 0.8501,
"moe_loss": 0.1607,
"moe_loss_longrong": 1.4209,
"step": 1827
},
{
"epoch": 0.35,
"learning_rate": 1.501258803940548e-05,
"loss": 2.5151,
"loss_": 1.2857,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4049,
"step": 1834
},
{
"epoch": 0.35,
"learning_rate": 1.4974792229441826e-05,
"loss": 2.5045,
"loss_": 1.0645,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4054,
"step": 1841
},
{
"epoch": 0.36,
"learning_rate": 1.4936901744178367e-05,
"loss": 2.5062,
"loss_": 0.5678,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4056,
"step": 1848
},
{
"epoch": 0.36,
"learning_rate": 1.489891730470914e-05,
"loss": 2.4826,
"loss_": 1.2262,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4049,
"step": 1855
},
{
"epoch": 0.36,
"learning_rate": 1.4860839633916236e-05,
"loss": 2.466,
"loss_": 0.7849,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4048,
"step": 1862
},
{
"epoch": 0.36,
"learning_rate": 1.4822669456456031e-05,
"loss": 2.4872,
"loss_": 0.8576,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4052,
"step": 1869
},
{
"epoch": 0.36,
"learning_rate": 1.4784407498745394e-05,
"loss": 2.4951,
"loss_": 0.8778,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.4052,
"step": 1876
},
{
"epoch": 0.36,
"learning_rate": 1.4746054488947863e-05,
"loss": 2.4876,
"loss_": 0.8237,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.405,
"step": 1883
},
{
"epoch": 0.36,
"learning_rate": 1.470761115695979e-05,
"loss": 2.4986,
"loss_": 0.9971,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4045,
"step": 1890
},
{
"epoch": 0.36,
"learning_rate": 1.4669078234396454e-05,
"loss": 2.4678,
"loss_": 1.1283,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4043,
"step": 1897
},
{
"epoch": 0.37,
"learning_rate": 1.4630456454578122e-05,
"loss": 2.516,
"loss_": 0.9592,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4045,
"step": 1904
},
{
"epoch": 0.37,
"learning_rate": 1.4591746552516109e-05,
"loss": 2.5208,
"loss_": 1.0451,
"moe_loss": 0.1607,
"moe_loss_longrong": 1.4182,
"step": 1911
},
{
"epoch": 0.37,
"learning_rate": 1.4552949264898795e-05,
"loss": 2.498,
"loss_": 0.8697,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.4045,
"step": 1918
},
{
"epoch": 0.37,
"learning_rate": 1.4514065330077575e-05,
"loss": 2.5174,
"loss_": 0.8274,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.4044,
"step": 1925
},
{
"epoch": 0.37,
"learning_rate": 1.4475095488052843e-05,
"loss": 2.5038,
"loss_": 0.7792,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4052,
"step": 1932
},
{
"epoch": 0.37,
"learning_rate": 1.4436040480459891e-05,
"loss": 2.5116,
"loss_": 0.9444,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4044,
"step": 1939
},
{
"epoch": 0.37,
"learning_rate": 1.4396901050554794e-05,
"loss": 2.4786,
"loss_": 1.0648,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4047,
"step": 1946
},
{
"epoch": 0.38,
"learning_rate": 1.435767794320027e-05,
"loss": 2.4987,
"loss_": 1.0158,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4046,
"step": 1953
},
{
"epoch": 0.38,
"learning_rate": 1.4318371904851502e-05,
"loss": 2.5188,
"loss_": 1.1058,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4045,
"step": 1960
},
{
"epoch": 0.38,
"learning_rate": 1.4278983683541934e-05,
"loss": 2.491,
"loss_": 1.1232,
"moe_loss": 0.1606,
"moe_loss_longrong": 1.4187,
"step": 1967
},
{
"epoch": 0.38,
"learning_rate": 1.4239514028869032e-05,
"loss": 2.487,
"loss_": 0.9791,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4039,
"step": 1974
},
{
"epoch": 0.38,
"learning_rate": 1.4199963691980027e-05,
"loss": 2.492,
"loss_": 1.0493,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4046,
"step": 1981
},
{
"epoch": 0.38,
"learning_rate": 1.4160333425557616e-05,
"loss": 2.5256,
"loss_": 0.6311,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4039,
"step": 1988
},
{
"epoch": 0.38,
"learning_rate": 1.4120623983805617e-05,
"loss": 2.502,
"loss_": 0.8536,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4037,
"step": 1995
},
{
"epoch": 0.39,
"learning_rate": 1.408083612243465e-05,
"loss": 2.4939,
"loss_": 1.0558,
"moe_loss": 0.1605,
"moe_loss_longrong": 1.4185,
"step": 2002
},
{
"epoch": 0.39,
"learning_rate": 1.4040970598647742e-05,
"loss": 2.4975,
"loss_": 0.9278,
"moe_loss": 0.1606,
"moe_loss_longrong": 1.4179,
"step": 2009
},
{
"epoch": 0.39,
"learning_rate": 1.40010281711259e-05,
"loss": 2.4624,
"loss_": 0.8695,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4036,
"step": 2016
},
{
"epoch": 0.39,
"learning_rate": 1.3961009600013702e-05,
"loss": 2.4981,
"loss_": 0.9502,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4041,
"step": 2023
},
{
"epoch": 0.39,
"learning_rate": 1.39209156469048e-05,
"loss": 2.4973,
"loss_": 1.0486,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4037,
"step": 2030
},
{
"epoch": 0.39,
"learning_rate": 1.3880747074827454e-05,
"loss": 2.498,
"loss_": 1.0935,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4031,
"step": 2037
},
{
"epoch": 0.39,
"learning_rate": 1.384050464822999e-05,
"loss": 2.4956,
"loss_": 0.978,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4045,
"step": 2044
},
{
"epoch": 0.39,
"learning_rate": 1.3800189132966257e-05,
"loss": 2.4826,
"loss_": 0.9682,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4039,
"step": 2051
},
{
"epoch": 0.4,
"learning_rate": 1.3759801296281072e-05,
"loss": 2.499,
"loss_": 0.8618,
"moe_loss": 0.1606,
"moe_loss_longrong": 1.4181,
"step": 2058
},
{
"epoch": 0.4,
"learning_rate": 1.371934190679558e-05,
"loss": 2.4876,
"loss_": 0.7575,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4032,
"step": 2065
},
{
"epoch": 0.4,
"learning_rate": 1.3678811734492659e-05,
"loss": 2.4821,
"loss_": 0.8992,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4031,
"step": 2072
},
{
"epoch": 0.4,
"learning_rate": 1.3638211550702256e-05,
"loss": 2.4975,
"loss_": 0.9085,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.4031,
"step": 2079
},
{
"epoch": 0.4,
"learning_rate": 1.3597542128086702e-05,
"loss": 2.4958,
"loss_": 1.1546,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.403,
"step": 2086
},
{
"epoch": 0.4,
"learning_rate": 1.3556804240626019e-05,
"loss": 2.5323,
"loss_": 1.0748,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4033,
"step": 2093
},
{
"epoch": 0.4,
"learning_rate": 1.3515998663603174e-05,
"loss": 2.5085,
"loss_": 1.1199,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4034,
"step": 2100
},
{
"epoch": 0.41,
"learning_rate": 1.3475126173589343e-05,
"loss": 2.4864,
"loss_": 0.8556,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.4029,
"step": 2107
},
{
"epoch": 0.41,
"learning_rate": 1.3434187548429126e-05,
"loss": 2.5068,
"loss_": 0.946,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4031,
"step": 2114
},
{
"epoch": 0.41,
"learning_rate": 1.3393183567225724e-05,
"loss": 2.4837,
"loss_": 1.1161,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.4029,
"step": 2121
},
{
"epoch": 0.41,
"learning_rate": 1.3352115010326155e-05,
"loss": 2.4825,
"loss_": 0.6543,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4027,
"step": 2128
},
{
"epoch": 0.41,
"learning_rate": 1.3310982659306352e-05,
"loss": 2.5257,
"loss_": 1.2189,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.4026,
"step": 2135
},
{
"epoch": 0.41,
"learning_rate": 1.3269787296956333e-05,
"loss": 2.4993,
"loss_": 0.9341,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.4028,
"step": 2142
},
{
"epoch": 0.41,
"learning_rate": 1.3228529707265279e-05,
"loss": 2.4981,
"loss_": 1.102,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4029,
"step": 2149
},
{
"epoch": 0.41,
"learning_rate": 1.3187210675406617e-05,
"loss": 2.5076,
"loss_": 0.6091,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4026,
"step": 2156
},
{
"epoch": 0.42,
"learning_rate": 1.3145830987723081e-05,
"loss": 2.4946,
"loss_": 0.972,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.4025,
"step": 2163
},
{
"epoch": 0.42,
"learning_rate": 1.3104391431711748e-05,
"loss": 2.471,
"loss_": 0.8826,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4026,
"step": 2170
},
{
"epoch": 0.42,
"learning_rate": 1.306289279600905e-05,
"loss": 2.4847,
"loss_": 1.1855,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.4025,
"step": 2177
},
{
"epoch": 0.42,
"learning_rate": 1.3021335870375763e-05,
"loss": 2.505,
"loss_": 1.0819,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.4025,
"step": 2184
},
{
"epoch": 0.42,
"learning_rate": 1.297972144568198e-05,
"loss": 2.4909,
"loss_": 0.8074,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4024,
"step": 2191
},
{
"epoch": 0.42,
"learning_rate": 1.2938050313892062e-05,
"loss": 2.4929,
"loss_": 1.0944,
"moe_loss": 0.1607,
"moe_loss_longrong": 1.4177,
"step": 2198
},
{
"epoch": 0.42,
"learning_rate": 1.289632326804956e-05,
"loss": 2.4747,
"loss_": 0.8172,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4027,
"step": 2205
},
{
"epoch": 0.43,
"learning_rate": 1.2854541102262119e-05,
"loss": 2.4782,
"loss_": 0.8552,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4028,
"step": 2212
},
{
"epoch": 0.43,
"learning_rate": 1.2812704611686386e-05,
"loss": 2.4825,
"loss_": 0.9202,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.402,
"step": 2219
},
{
"epoch": 0.43,
"learning_rate": 1.2770814592512853e-05,
"loss": 2.4951,
"loss_": 1.1396,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.4022,
"step": 2226
},
{
"epoch": 0.43,
"learning_rate": 1.2728871841950719e-05,
"loss": 2.4628,
"loss_": 0.9138,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.4025,
"step": 2233
},
{
"epoch": 0.43,
"learning_rate": 1.2686877158212715e-05,
"loss": 2.5028,
"loss_": 0.8915,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.402,
"step": 2240
},
{
"epoch": 0.43,
"learning_rate": 1.2644831340499906e-05,
"loss": 2.4802,
"loss_": 1.3262,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.4022,
"step": 2247
},
{
"epoch": 0.43,
"learning_rate": 1.2602735188986498e-05,
"loss": 2.4888,
"loss_": 1.1958,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.4025,
"step": 2254
},
{
"epoch": 0.43,
"learning_rate": 1.2560589504804592e-05,
"loss": 2.4964,
"loss_": 1.0784,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4019,
"step": 2261
},
{
"epoch": 0.44,
"learning_rate": 1.2518395090028952e-05,
"loss": 2.4972,
"loss_": 1.164,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.402,
"step": 2268
},
{
"epoch": 0.44,
"learning_rate": 1.2476152747661727e-05,
"loss": 2.5173,
"loss_": 1.083,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.4018,
"step": 2275
},
{
"epoch": 0.44,
"learning_rate": 1.243386328161718e-05,
"loss": 2.5094,
"loss_": 1.1749,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4021,
"step": 2282
},
{
"epoch": 0.44,
"learning_rate": 1.2391527496706389e-05,
"loss": 2.5007,
"loss_": 1.2048,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4019,
"step": 2289
},
{
"epoch": 0.44,
"learning_rate": 1.2349146198621917e-05,
"loss": 2.4613,
"loss_": 0.9356,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.4018,
"step": 2296
},
{
"epoch": 0.44,
"learning_rate": 1.23067201939225e-05,
"loss": 2.522,
"loss_": 1.3161,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.4019,
"step": 2303
},
{
"epoch": 0.44,
"learning_rate": 1.2264250290017675e-05,
"loss": 2.4876,
"loss_": 0.9183,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.4018,
"step": 2310
},
{
"epoch": 0.45,
"learning_rate": 1.222173729515243e-05,
"loss": 2.4852,
"loss_": 1.0262,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.4013,
"step": 2317
},
{
"epoch": 0.45,
"learning_rate": 1.217918201839182e-05,
"loss": 2.4974,
"loss_": 0.9078,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4019,
"step": 2324
},
{
"epoch": 0.45,
"learning_rate": 1.2136585269605558e-05,
"loss": 2.4873,
"loss_": 1.063,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.4015,
"step": 2331
},
{
"epoch": 0.45,
"learning_rate": 1.209394785945263e-05,
"loss": 2.4491,
"loss_": 0.7031,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4026,
"step": 2338
},
{
"epoch": 0.45,
"learning_rate": 1.2051270599365825e-05,
"loss": 2.5059,
"loss_": 1.0756,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4012,
"step": 2345
},
{
"epoch": 0.45,
"learning_rate": 1.2008554301536328e-05,
"loss": 2.4821,
"loss_": 0.508,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.4018,
"step": 2352
},
{
"epoch": 0.45,
"learning_rate": 1.1965799778898258e-05,
"loss": 2.4776,
"loss_": 1.0053,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4016,
"step": 2359
},
{
"epoch": 0.46,
"learning_rate": 1.1923007845113178e-05,
"loss": 2.512,
"loss_": 0.6722,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.4016,
"step": 2366
},
{
"epoch": 0.46,
"learning_rate": 1.1880179314554629e-05,
"loss": 2.4488,
"loss_": 0.4041,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.402,
"step": 2373
},
{
"epoch": 0.46,
"learning_rate": 1.1837315002292629e-05,
"loss": 2.4889,
"loss_": 1.1084,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.401,
"step": 2380
},
{
"epoch": 0.46,
"learning_rate": 1.1794415724078147e-05,
"loss": 2.4732,
"loss_": 0.6909,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4019,
"step": 2387
},
{
"epoch": 0.46,
"learning_rate": 1.17514822963276e-05,
"loss": 2.4599,
"loss_": 1.0441,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4013,
"step": 2394
},
{
"epoch": 0.46,
"learning_rate": 1.1708515536107299e-05,
"loss": 2.472,
"loss_": 0.7234,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.4012,
"step": 2401
},
{
"epoch": 0.46,
"learning_rate": 1.1665516261117914e-05,
"loss": 2.4923,
"loss_": 1.2036,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.4014,
"step": 2408
},
{
"epoch": 0.46,
"learning_rate": 1.1622485289678886e-05,
"loss": 2.4794,
"loss_": 0.9414,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.4011,
"step": 2415
},
{
"epoch": 0.47,
"learning_rate": 1.1579423440712887e-05,
"loss": 2.4873,
"loss_": 0.8799,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.4007,
"step": 2422
},
{
"epoch": 0.47,
"learning_rate": 1.153633153373022e-05,
"loss": 2.4685,
"loss_": 0.8351,
"moe_loss": 0.1605,
"moe_loss_longrong": 1.414,
"step": 2429
},
{
"epoch": 0.47,
"learning_rate": 1.149321038881321e-05,
"loss": 2.4965,
"loss_": 1.0812,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.4007,
"step": 2436
},
{
"epoch": 0.47,
"learning_rate": 1.1450060826600618e-05,
"loss": 2.467,
"loss_": 1.0899,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.4007,
"step": 2443
},
{
"epoch": 0.47,
"learning_rate": 1.1406883668272015e-05,
"loss": 2.5148,
"loss_": 0.9878,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4008,
"step": 2450
},
{
"epoch": 0.47,
"learning_rate": 1.1363679735532151e-05,
"loss": 2.4869,
"loss_": 1.0094,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.4004,
"step": 2457
},
{
"epoch": 0.47,
"learning_rate": 1.132044985059532e-05,
"loss": 2.4687,
"loss_": 0.8133,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.4004,
"step": 2464
},
{
"epoch": 0.48,
"learning_rate": 1.1277194836169714e-05,
"loss": 2.4692,
"loss_": 1.154,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.401,
"step": 2471
},
{
"epoch": 0.48,
"learning_rate": 1.1233915515441765e-05,
"loss": 2.4768,
"loss_": 0.9273,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.4005,
"step": 2478
},
{
"epoch": 0.48,
"learning_rate": 1.1190612712060475e-05,
"loss": 2.465,
"loss_": 0.9635,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.4005,
"step": 2485
},
{
"epoch": 0.48,
"learning_rate": 1.1147287250121745e-05,
"loss": 2.5032,
"loss_": 1.3144,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.4006,
"step": 2492
},
{
"epoch": 0.48,
"learning_rate": 1.11039399541527e-05,
"loss": 2.4839,
"loss_": 0.852,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.4001,
"step": 2499
},
{
"epoch": 0.48,
"learning_rate": 1.1060571649095972e-05,
"loss": 2.4618,
"loss_": 0.816,
"moe_loss": 0.1605,
"moe_loss_longrong": 1.4135,
"step": 2506
},
{
"epoch": 0.48,
"learning_rate": 1.1017183160294033e-05,
"loss": 2.5082,
"loss_": 0.7247,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4005,
"step": 2513
},
{
"epoch": 0.48,
"learning_rate": 1.0973775313473465e-05,
"loss": 2.5026,
"loss_": 0.9287,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.4007,
"step": 2520
},
{
"epoch": 0.49,
"learning_rate": 1.0930348934729249e-05,
"loss": 2.4564,
"loss_": 1.0246,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.4009,
"step": 2527
},
{
"epoch": 0.49,
"learning_rate": 1.0886904850509052e-05,
"loss": 2.5123,
"loss_": 1.1915,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.4005,
"step": 2534
},
{
"epoch": 0.49,
"learning_rate": 1.0843443887597495e-05,
"loss": 2.4786,
"loss_": 0.9271,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.4004,
"step": 2541
},
{
"epoch": 0.49,
"learning_rate": 1.0799966873100419e-05,
"loss": 2.4941,
"loss_": 1.2428,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.4007,
"step": 2548
},
{
"epoch": 0.49,
"learning_rate": 1.0756474634429133e-05,
"loss": 2.4861,
"loss_": 1.1406,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.4004,
"step": 2555
},
{
"epoch": 0.49,
"learning_rate": 1.0712967999284682e-05,
"loss": 2.474,
"loss_": 1.0874,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.4006,
"step": 2562
},
{
"epoch": 0.49,
"learning_rate": 1.0669447795642103e-05,
"loss": 2.478,
"loss_": 1.2379,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4004,
"step": 2569
},
{
"epoch": 0.5,
"learning_rate": 1.0625914851734632e-05,
"loss": 2.4567,
"loss_": 1.0187,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.4004,
"step": 2576
},
{
"epoch": 0.5,
"learning_rate": 1.0582369996037985e-05,
"loss": 2.4762,
"loss_": 0.9279,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4007,
"step": 2583
},
{
"epoch": 0.5,
"learning_rate": 1.053881405725456e-05,
"loss": 2.4869,
"loss_": 0.8599,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.4,
"step": 2590
},
{
"epoch": 0.5,
"learning_rate": 1.0495247864297684e-05,
"loss": 2.5043,
"loss_": 1.0374,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.4,
"step": 2597
},
{
"epoch": 0.5,
"learning_rate": 1.0451672246275826e-05,
"loss": 2.4675,
"loss_": 0.57,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4005,
"step": 2604
},
{
"epoch": 0.5,
"learning_rate": 1.0408088032476822e-05,
"loss": 2.4752,
"loss_": 0.5417,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4003,
"step": 2611
},
{
"epoch": 0.5,
"learning_rate": 1.036449605235211e-05,
"loss": 2.4568,
"loss_": 1.056,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3999,
"step": 2618
},
{
"epoch": 0.51,
"learning_rate": 1.0320897135500904e-05,
"loss": 2.4658,
"loss_": 0.8557,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4,
"step": 2625
},
{
"epoch": 0.51,
"learning_rate": 1.0277292111654447e-05,
"loss": 2.4553,
"loss_": 0.9975,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3999,
"step": 2632
},
{
"epoch": 0.51,
"learning_rate": 1.0233681810660207e-05,
"loss": 2.4815,
"loss_": 1.0191,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.4004,
"step": 2639
},
{
"epoch": 0.51,
"learning_rate": 1.019006706246607e-05,
"loss": 2.4735,
"loss_": 1.0673,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.4001,
"step": 2646
},
{
"epoch": 0.51,
"learning_rate": 1.0146448697104561e-05,
"loss": 2.458,
"loss_": 1.2381,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.4,
"step": 2653
},
{
"epoch": 0.51,
"learning_rate": 1.010282754467705e-05,
"loss": 2.463,
"loss_": 1.1428,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3997,
"step": 2660
},
{
"epoch": 0.51,
"learning_rate": 1.0059204435337938e-05,
"loss": 2.483,
"loss_": 1.2124,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3999,
"step": 2667
},
{
"epoch": 0.51,
"learning_rate": 1.0015580199278873e-05,
"loss": 2.4907,
"loss_": 0.8323,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3995,
"step": 2674
},
{
"epoch": 0.52,
"learning_rate": 9.971955666712945e-06,
"loss": 2.4936,
"loss_": 1.1091,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3998,
"step": 2681
},
{
"epoch": 0.52,
"learning_rate": 9.928331667858886e-06,
"loss": 2.5039,
"loss_": 1.0505,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3998,
"step": 2688
},
{
"epoch": 0.52,
"learning_rate": 9.884709032925274e-06,
"loss": 2.4704,
"loss_": 0.9685,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.3998,
"step": 2695
},
{
"epoch": 0.52,
"learning_rate": 9.841088592094726e-06,
"loss": 2.4897,
"loss_": 1.2011,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3993,
"step": 2702
},
{
"epoch": 0.52,
"learning_rate": 9.797471175508101e-06,
"loss": 2.4642,
"loss_": 1.064,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3997,
"step": 2709
},
{
"epoch": 0.52,
"learning_rate": 9.753857613248714e-06,
"loss": 2.4746,
"loss_": 1.089,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3995,
"step": 2716
},
{
"epoch": 0.52,
"learning_rate": 9.710248735326519e-06,
"loss": 2.4767,
"loss_": 0.7312,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3996,
"step": 2723
},
{
"epoch": 0.53,
"learning_rate": 9.666645371662324e-06,
"loss": 2.4693,
"loss_": 1.0271,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3994,
"step": 2730
},
{
"epoch": 0.53,
"learning_rate": 9.623048352071998e-06,
"loss": 2.4631,
"loss_": 0.7867,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3992,
"step": 2737
},
{
"epoch": 0.53,
"learning_rate": 9.579458506250668e-06,
"loss": 2.4744,
"loss_": 1.1123,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.3996,
"step": 2744
},
{
"epoch": 0.53,
"learning_rate": 9.535876663756955e-06,
"loss": 2.4836,
"loss_": 0.9437,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3994,
"step": 2751
},
{
"epoch": 0.53,
"learning_rate": 9.492303653997146e-06,
"loss": 2.4822,
"loss_": 0.9857,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.399,
"step": 2758
},
{
"epoch": 0.53,
"learning_rate": 9.448740306209447e-06,
"loss": 2.4816,
"loss_": 0.9489,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3991,
"step": 2765
},
{
"epoch": 0.53,
"learning_rate": 9.40518744944818e-06,
"loss": 2.4744,
"loss_": 0.6401,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3996,
"step": 2772
},
{
"epoch": 0.53,
"learning_rate": 9.361645912568015e-06,
"loss": 2.4736,
"loss_": 0.8008,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.3996,
"step": 2779
},
{
"epoch": 0.54,
"learning_rate": 9.318116524208198e-06,
"loss": 2.4719,
"loss_": 0.9666,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3997,
"step": 2786
},
{
"epoch": 0.54,
"learning_rate": 9.27460011277677e-06,
"loss": 2.4865,
"loss_": 1.0383,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3994,
"step": 2793
},
{
"epoch": 0.54,
"learning_rate": 9.231097506434808e-06,
"loss": 2.4683,
"loss_": 0.807,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3995,
"step": 2800
},
{
"epoch": 0.54,
"learning_rate": 9.187609533080668e-06,
"loss": 2.4738,
"loss_": 1.0131,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3992,
"step": 2807
},
{
"epoch": 0.54,
"learning_rate": 9.144137020334214e-06,
"loss": 2.4559,
"loss_": 0.9178,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3994,
"step": 2814
},
{
"epoch": 0.54,
"learning_rate": 9.100680795521104e-06,
"loss": 2.4832,
"loss_": 0.8958,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3993,
"step": 2821
},
{
"epoch": 0.54,
"learning_rate": 9.057241685656995e-06,
"loss": 2.4729,
"loss_": 0.8244,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.399,
"step": 2828
},
{
"epoch": 0.55,
"learning_rate": 9.013820517431841e-06,
"loss": 2.4458,
"loss_": 0.6857,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3989,
"step": 2835
},
{
"epoch": 0.55,
"learning_rate": 8.970418117194146e-06,
"loss": 2.4789,
"loss_": 0.8677,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3991,
"step": 2842
},
{
"epoch": 0.55,
"learning_rate": 8.927035310935241e-06,
"loss": 2.4633,
"loss_": 0.755,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.399,
"step": 2849
},
{
"epoch": 0.55,
"learning_rate": 8.883672924273566e-06,
"loss": 2.481,
"loss_": 0.9947,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.399,
"step": 2856
},
{
"epoch": 0.55,
"learning_rate": 8.840331782438954e-06,
"loss": 2.4821,
"loss_": 0.6639,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3988,
"step": 2863
},
{
"epoch": 0.55,
"learning_rate": 8.797012710256923e-06,
"loss": 2.4683,
"loss_": 1.2205,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3992,
"step": 2870
},
{
"epoch": 0.55,
"learning_rate": 8.753716532132992e-06,
"loss": 2.4611,
"loss_": 0.9415,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3989,
"step": 2877
},
{
"epoch": 0.55,
"learning_rate": 8.71044407203697e-06,
"loss": 2.491,
"loss_": 0.9864,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.399,
"step": 2884
},
{
"epoch": 0.56,
"learning_rate": 8.667196153487308e-06,
"loss": 2.4726,
"loss_": 0.865,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3986,
"step": 2891
},
{
"epoch": 0.56,
"learning_rate": 8.623973599535385e-06,
"loss": 2.457,
"loss_": 0.8526,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.399,
"step": 2898
},
{
"epoch": 0.56,
"learning_rate": 8.580777232749883e-06,
"loss": 2.4576,
"loss_": 1.0116,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3988,
"step": 2905
},
{
"epoch": 0.56,
"learning_rate": 8.537607875201106e-06,
"loss": 2.5113,
"loss_": 0.8642,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3987,
"step": 2912
},
{
"epoch": 0.56,
"learning_rate": 8.494466348445345e-06,
"loss": 2.4787,
"loss_": 1.0994,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3988,
"step": 2919
},
{
"epoch": 0.56,
"learning_rate": 8.451353473509254e-06,
"loss": 2.4797,
"loss_": 1.1009,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3983,
"step": 2926
},
{
"epoch": 0.56,
"learning_rate": 8.408270070874201e-06,
"loss": 2.4709,
"loss_": 0.8487,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3984,
"step": 2933
},
{
"epoch": 0.57,
"learning_rate": 8.365216960460675e-06,
"loss": 2.5019,
"loss_": 0.9758,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3987,
"step": 2940
},
{
"epoch": 0.57,
"learning_rate": 8.322194961612668e-06,
"loss": 2.4919,
"loss_": 0.9281,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3986,
"step": 2947
},
{
"epoch": 0.57,
"learning_rate": 8.279204893082083e-06,
"loss": 2.4788,
"loss_": 0.9675,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3986,
"step": 2954
},
{
"epoch": 0.57,
"learning_rate": 8.23624757301318e-06,
"loss": 2.4796,
"loss_": 0.9676,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3986,
"step": 2961
},
{
"epoch": 0.57,
"learning_rate": 8.193323818926955e-06,
"loss": 2.4471,
"loss_": 0.3893,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3986,
"step": 2968
},
{
"epoch": 0.57,
"learning_rate": 8.150434447705623e-06,
"loss": 2.4644,
"loss_": 1.0186,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3985,
"step": 2975
},
{
"epoch": 0.57,
"learning_rate": 8.107580275577059e-06,
"loss": 2.4517,
"loss_": 1.2343,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3985,
"step": 2982
},
{
"epoch": 0.58,
"learning_rate": 8.064762118099258e-06,
"loss": 2.4524,
"loss_": 0.7846,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3985,
"step": 2989
},
{
"epoch": 0.58,
"learning_rate": 8.021980790144828e-06,
"loss": 2.4626,
"loss_": 1.0468,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3988,
"step": 2996
},
{
"epoch": 0.58,
"learning_rate": 7.979237105885467e-06,
"loss": 2.4822,
"loss_": 0.5538,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3985,
"step": 3003
},
{
"epoch": 0.58,
"learning_rate": 7.936531878776484e-06,
"loss": 2.4753,
"loss_": 1.1616,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3983,
"step": 3010
},
{
"epoch": 0.58,
"learning_rate": 7.893865921541294e-06,
"loss": 2.4418,
"loss_": 0.5863,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3987,
"step": 3017
},
{
"epoch": 0.58,
"learning_rate": 7.85124004615598e-06,
"loss": 2.4724,
"loss_": 0.9406,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3982,
"step": 3024
},
{
"epoch": 0.58,
"learning_rate": 7.808655063833832e-06,
"loss": 2.4884,
"loss_": 1.0104,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3989,
"step": 3031
},
{
"epoch": 0.58,
"learning_rate": 7.766111785009888e-06,
"loss": 2.4676,
"loss_": 0.9396,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.398,
"step": 3038
},
{
"epoch": 0.59,
"learning_rate": 7.723611019325538e-06,
"loss": 2.4705,
"loss_": 0.9611,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3982,
"step": 3045
},
{
"epoch": 0.59,
"learning_rate": 7.681153575613098e-06,
"loss": 2.4555,
"loss_": 0.931,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3986,
"step": 3052
},
{
"epoch": 0.59,
"learning_rate": 7.638740261880423e-06,
"loss": 2.4369,
"loss_": 0.7901,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3985,
"step": 3059
},
{
"epoch": 0.59,
"learning_rate": 7.596371885295542e-06,
"loss": 2.4852,
"loss_": 0.9128,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3981,
"step": 3066
},
{
"epoch": 0.59,
"learning_rate": 7.55404925217127e-06,
"loss": 2.5004,
"loss_": 1.0571,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3983,
"step": 3073
},
{
"epoch": 0.59,
"learning_rate": 7.511773167949885e-06,
"loss": 2.4582,
"loss_": 1.0777,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3986,
"step": 3080
},
{
"epoch": 0.59,
"learning_rate": 7.46954443718779e-06,
"loss": 2.4644,
"loss_": 0.89,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3984,
"step": 3087
},
{
"epoch": 0.6,
"learning_rate": 7.427363863540202e-06,
"loss": 2.4668,
"loss_": 1.0102,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3986,
"step": 3094
},
{
"epoch": 0.6,
"learning_rate": 7.385232249745873e-06,
"loss": 2.4733,
"loss_": 0.6698,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3988,
"step": 3101
},
{
"epoch": 0.6,
"learning_rate": 7.343150397611782e-06,
"loss": 2.5122,
"loss_": 1.2655,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3979,
"step": 3108
},
{
"epoch": 0.6,
"learning_rate": 7.301119107997905e-06,
"loss": 2.461,
"loss_": 1.1851,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3978,
"step": 3115
},
{
"epoch": 0.6,
"learning_rate": 7.2591391808019555e-06,
"loss": 2.4727,
"loss_": 0.8541,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3979,
"step": 3122
},
{
"epoch": 0.6,
"learning_rate": 7.217211414944171e-06,
"loss": 2.4443,
"loss_": 1.0654,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3978,
"step": 3129
},
{
"epoch": 0.6,
"learning_rate": 7.175336608352113e-06,
"loss": 2.4922,
"loss_": 1.184,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3982,
"step": 3136
},
{
"epoch": 0.6,
"learning_rate": 7.133515557945463e-06,
"loss": 2.4643,
"loss_": 1.1851,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3981,
"step": 3143
},
{
"epoch": 0.61,
"learning_rate": 7.091749059620881e-06,
"loss": 2.4581,
"loss_": 1.328,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3982,
"step": 3150
},
{
"epoch": 0.61,
"learning_rate": 7.0500379082368305e-06,
"loss": 2.4708,
"loss_": 1.0728,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.398,
"step": 3157
},
{
"epoch": 0.61,
"learning_rate": 7.008382897598477e-06,
"loss": 2.4901,
"loss_": 0.9315,
"moe_loss": 0.1606,
"moe_loss_longrong": 1.4088,
"step": 3164
},
{
"epoch": 0.61,
"learning_rate": 6.9667848204425785e-06,
"loss": 2.4706,
"loss_": 1.1113,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3979,
"step": 3171
},
{
"epoch": 0.61,
"learning_rate": 6.9252444684223765e-06,
"loss": 2.4442,
"loss_": 0.7937,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3977,
"step": 3178
},
{
"epoch": 0.61,
"learning_rate": 6.88376263209255e-06,
"loss": 2.4406,
"loss_": 1.1277,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3982,
"step": 3185
},
{
"epoch": 0.61,
"learning_rate": 6.84234010089417e-06,
"loss": 2.4761,
"loss_": 0.9565,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3982,
"step": 3192
},
{
"epoch": 0.62,
"learning_rate": 6.800977663139666e-06,
"loss": 2.4832,
"loss_": 0.7461,
"moe_loss": 0.1605,
"moe_loss_longrong": 1.4073,
"step": 3199
},
{
"epoch": 0.62,
"learning_rate": 6.759676105997834e-06,
"loss": 2.4752,
"loss_": 1.1396,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3978,
"step": 3206
},
{
"epoch": 0.62,
"learning_rate": 6.718436215478849e-06,
"loss": 2.4594,
"loss_": 1.1075,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3978,
"step": 3213
},
{
"epoch": 0.62,
"learning_rate": 6.677258776419304e-06,
"loss": 2.4703,
"loss_": 0.9133,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3975,
"step": 3220
},
{
"epoch": 0.62,
"learning_rate": 6.63614457246728e-06,
"loss": 2.4534,
"loss_": 0.9049,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3978,
"step": 3227
},
{
"epoch": 0.62,
"learning_rate": 6.595094386067428e-06,
"loss": 2.4945,
"loss_": 1.1701,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3975,
"step": 3234
},
{
"epoch": 0.62,
"learning_rate": 6.554108998446096e-06,
"loss": 2.4832,
"loss_": 0.9606,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3977,
"step": 3241
},
{
"epoch": 0.62,
"learning_rate": 6.513189189596422e-06,
"loss": 2.4639,
"loss_": 1.0635,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3978,
"step": 3248
},
{
"epoch": 0.63,
"learning_rate": 6.472335738263534e-06,
"loss": 2.4609,
"loss_": 0.9759,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3982,
"step": 3255
},
{
"epoch": 0.63,
"learning_rate": 6.431549421929694e-06,
"loss": 2.4641,
"loss_": 0.7825,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.398,
"step": 3262
},
{
"epoch": 0.63,
"learning_rate": 6.390831016799527e-06,
"loss": 2.458,
"loss_": 0.8033,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3977,
"step": 3269
},
{
"epoch": 0.63,
"learning_rate": 6.350181297785242e-06,
"loss": 2.4584,
"loss_": 1.0825,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3974,
"step": 3276
},
{
"epoch": 0.63,
"learning_rate": 6.309601038491874e-06,
"loss": 2.4911,
"loss_": 0.7566,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3979,
"step": 3283
},
{
"epoch": 0.63,
"learning_rate": 6.269091011202576e-06,
"loss": 2.457,
"loss_": 0.6181,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3977,
"step": 3290
},
{
"epoch": 0.63,
"learning_rate": 6.2286519868639095e-06,
"loss": 2.4458,
"loss_": 1.0953,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3975,
"step": 3297
},
{
"epoch": 0.64,
"learning_rate": 6.188284735071177e-06,
"loss": 2.4848,
"loss_": 0.9802,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3976,
"step": 3304
},
{
"epoch": 0.64,
"learning_rate": 6.1479900240537956e-06,
"loss": 2.4815,
"loss_": 1.2048,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3977,
"step": 3311
},
{
"epoch": 0.64,
"learning_rate": 6.107768620660633e-06,
"loss": 2.4552,
"loss_": 1.039,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3976,
"step": 3318
},
{
"epoch": 0.64,
"learning_rate": 6.067621290345455e-06,
"loss": 2.4365,
"loss_": 1.2129,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3974,
"step": 3325
},
{
"epoch": 0.64,
"learning_rate": 6.027548797152336e-06,
"loss": 2.4546,
"loss_": 0.6566,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3973,
"step": 3332
},
{
"epoch": 0.64,
"learning_rate": 5.987551903701128e-06,
"loss": 2.4409,
"loss_": 1.0142,
"moe_loss": 0.1605,
"moe_loss_longrong": 1.408,
"step": 3339
},
{
"epoch": 0.64,
"learning_rate": 5.947631371172943e-06,
"loss": 2.4488,
"loss_": 0.9704,
"moe_loss": 0.1604,
"moe_loss_longrong": 1.408,
"step": 3346
},
{
"epoch": 0.65,
"learning_rate": 5.9077879592956675e-06,
"loss": 2.4569,
"loss_": 0.936,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3972,
"step": 3353
},
{
"epoch": 0.65,
"learning_rate": 5.8680224263295045e-06,
"loss": 2.4519,
"loss_": 0.9728,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3972,
"step": 3360
},
{
"epoch": 0.65,
"learning_rate": 5.828335529052541e-06,
"loss": 2.4757,
"loss_": 0.9242,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3972,
"step": 3367
},
{
"epoch": 0.65,
"learning_rate": 5.788728022746348e-06,
"loss": 2.4769,
"loss_": 0.8005,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3976,
"step": 3374
},
{
"epoch": 0.65,
"learning_rate": 5.749200661181611e-06,
"loss": 2.4548,
"loss_": 1.116,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3971,
"step": 3381
},
{
"epoch": 0.65,
"learning_rate": 5.709754196603781e-06,
"loss": 2.4687,
"loss_": 0.8613,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3971,
"step": 3388
},
{
"epoch": 0.65,
"learning_rate": 5.67038937971875e-06,
"loss": 2.437,
"loss_": 0.9275,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3974,
"step": 3395
},
{
"epoch": 0.65,
"learning_rate": 5.631106959678575e-06,
"loss": 2.4636,
"loss_": 1.1476,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3974,
"step": 3402
},
{
"epoch": 0.66,
"learning_rate": 5.5919076840672215e-06,
"loss": 2.449,
"loss_": 0.9428,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.397,
"step": 3409
},
{
"epoch": 0.66,
"learning_rate": 5.552792298886335e-06,
"loss": 2.4572,
"loss_": 0.8202,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3973,
"step": 3416
},
{
"epoch": 0.66,
"learning_rate": 5.513761548541032e-06,
"loss": 2.444,
"loss_": 0.905,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3973,
"step": 3423
},
{
"epoch": 0.66,
"learning_rate": 5.474816175825754e-06,
"loss": 2.4189,
"loss_": 1.1022,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3973,
"step": 3430
},
{
"epoch": 0.66,
"learning_rate": 5.4359569219101115e-06,
"loss": 2.5038,
"loss_": 1.1099,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3969,
"step": 3437
},
{
"epoch": 0.66,
"learning_rate": 5.397184526324792e-06,
"loss": 2.4885,
"loss_": 0.9227,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3973,
"step": 3444
},
{
"epoch": 0.66,
"learning_rate": 5.358499726947488e-06,
"loss": 2.4389,
"loss_": 0.9602,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3971,
"step": 3451
},
{
"epoch": 0.67,
"learning_rate": 5.31990325998883e-06,
"loss": 2.4275,
"loss_": 0.9191,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3969,
"step": 3458
},
{
"epoch": 0.67,
"learning_rate": 5.281395859978414e-06,
"loss": 2.4647,
"loss_": 1.0229,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.397,
"step": 3465
},
{
"epoch": 0.67,
"learning_rate": 5.24297825975079e-06,
"loss": 2.4649,
"loss_": 0.9973,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3973,
"step": 3472
},
{
"epoch": 0.67,
"learning_rate": 5.2046511904315265e-06,
"loss": 2.4409,
"loss_": 0.6513,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3969,
"step": 3479
},
{
"epoch": 0.67,
"learning_rate": 5.166415381423306e-06,
"loss": 2.4805,
"loss_": 1.1712,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3971,
"step": 3486
},
{
"epoch": 0.67,
"learning_rate": 5.128271560392037e-06,
"loss": 2.4496,
"loss_": 1.0721,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3971,
"step": 3493
},
{
"epoch": 0.67,
"learning_rate": 5.09022045325299e-06,
"loss": 2.473,
"loss_": 1.1122,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3971,
"step": 3500
},
{
"epoch": 0.67,
"learning_rate": 5.052262784157014e-06,
"loss": 2.4654,
"loss_": 1.0388,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.397,
"step": 3507
},
{
"epoch": 0.68,
"learning_rate": 5.014399275476721e-06,
"loss": 2.463,
"loss_": 1.0244,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3968,
"step": 3514
},
{
"epoch": 0.68,
"learning_rate": 4.976630647792771e-06,
"loss": 2.4481,
"loss_": 0.7509,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3972,
"step": 3521
},
{
"epoch": 0.68,
"learning_rate": 4.938957619880138e-06,
"loss": 2.4624,
"loss_": 1.0897,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.397,
"step": 3528
},
{
"epoch": 0.68,
"learning_rate": 4.901380908694434e-06,
"loss": 2.4236,
"loss_": 1.1599,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.397,
"step": 3535
},
{
"epoch": 0.68,
"learning_rate": 4.863901229358261e-06,
"loss": 2.4483,
"loss_": 0.8951,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3971,
"step": 3542
},
{
"epoch": 0.68,
"learning_rate": 4.8265192951476206e-06,
"loss": 2.4552,
"loss_": 0.9006,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3969,
"step": 3549
},
{
"epoch": 0.68,
"learning_rate": 4.789235817478322e-06,
"loss": 2.457,
"loss_": 1.0357,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3969,
"step": 3556
},
{
"epoch": 0.69,
"learning_rate": 4.752051505892438e-06,
"loss": 2.462,
"loss_": 1.031,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3968,
"step": 3563
},
{
"epoch": 0.69,
"learning_rate": 4.714967068044826e-06,
"loss": 2.459,
"loss_": 1.2418,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3969,
"step": 3570
},
{
"epoch": 0.69,
"learning_rate": 4.677983209689631e-06,
"loss": 2.4449,
"loss_": 0.7941,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3973,
"step": 3577
},
{
"epoch": 0.69,
"learning_rate": 4.641100634666877e-06,
"loss": 2.4528,
"loss_": 0.7962,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.397,
"step": 3584
},
{
"epoch": 0.69,
"learning_rate": 4.6043200448890724e-06,
"loss": 2.4674,
"loss_": 1.0349,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3967,
"step": 3591
},
{
"epoch": 0.69,
"learning_rate": 4.567642140327823e-06,
"loss": 2.4498,
"loss_": 0.9343,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3966,
"step": 3598
},
{
"epoch": 0.69,
"learning_rate": 4.531067619000553e-06,
"loss": 2.4711,
"loss_": 0.7285,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3967,
"step": 3605
},
{
"epoch": 0.69,
"learning_rate": 4.494597176957186e-06,
"loss": 2.4578,
"loss_": 0.6286,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3968,
"step": 3612
},
{
"epoch": 0.7,
"learning_rate": 4.458231508266912e-06,
"loss": 2.4736,
"loss_": 0.8458,
"moe_loss": 0.1605,
"moe_loss_longrong": 1.4062,
"step": 3619
},
{
"epoch": 0.7,
"learning_rate": 4.421971305004989e-06,
"loss": 2.4841,
"loss_": 0.7491,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3965,
"step": 3626
},
{
"epoch": 0.7,
"learning_rate": 4.385817257239556e-06,
"loss": 2.4332,
"loss_": 0.9237,
"moe_loss": 0.1604,
"moe_loss_longrong": 1.4068,
"step": 3633
},
{
"epoch": 0.7,
"learning_rate": 4.349770053018502e-06,
"loss": 2.4673,
"loss_": 0.9196,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3968,
"step": 3640
},
{
"epoch": 0.7,
"learning_rate": 4.313830378356384e-06,
"loss": 2.4438,
"loss_": 1.0917,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.397,
"step": 3647
},
{
"epoch": 0.7,
"learning_rate": 4.277998917221354e-06,
"loss": 2.4672,
"loss_": 0.8497,
"moe_loss": 0.1604,
"moe_loss_longrong": 1.4064,
"step": 3654
},
{
"epoch": 0.7,
"learning_rate": 4.242276351522161e-06,
"loss": 2.4468,
"loss_": 0.8331,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3969,
"step": 3661
},
{
"epoch": 0.71,
"learning_rate": 4.206663361095164e-06,
"loss": 2.4639,
"loss_": 0.9817,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3963,
"step": 3668
},
{
"epoch": 0.71,
"learning_rate": 4.171160623691384e-06,
"loss": 2.4403,
"loss_": 1.0819,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3967,
"step": 3675
},
{
"epoch": 0.71,
"learning_rate": 4.135768814963622e-06,
"loss": 2.4281,
"loss_": 0.9681,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3966,
"step": 3682
},
{
"epoch": 0.71,
"learning_rate": 4.100488608453599e-06,
"loss": 2.4383,
"loss_": 0.8748,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3966,
"step": 3689
},
{
"epoch": 0.71,
"learning_rate": 4.065320675579132e-06,
"loss": 2.4811,
"loss_": 1.2776,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3966,
"step": 3696
},
{
"epoch": 0.71,
"learning_rate": 4.03026568562135e-06,
"loss": 2.4559,
"loss_": 0.8804,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3967,
"step": 3703
},
{
"epoch": 0.71,
"learning_rate": 3.995324305711976e-06,
"loss": 2.4263,
"loss_": 0.9593,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3968,
"step": 3710
},
{
"epoch": 0.72,
"learning_rate": 3.9604972008206085e-06,
"loss": 2.4698,
"loss_": 1.2848,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3967,
"step": 3717
},
{
"epoch": 0.72,
"learning_rate": 3.9257850337420856e-06,
"loss": 2.4923,
"loss_": 1.0082,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3965,
"step": 3724
},
{
"epoch": 0.72,
"learning_rate": 3.891188465083865e-06,
"loss": 2.4502,
"loss_": 1.0253,
"moe_loss": 0.1604,
"moe_loss_longrong": 1.4059,
"step": 3731
},
{
"epoch": 0.72,
"learning_rate": 3.8567081532534374e-06,
"loss": 2.4543,
"loss_": 0.6744,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3962,
"step": 3738
},
{
"epoch": 0.72,
"learning_rate": 3.822344754445826e-06,
"loss": 2.4628,
"loss_": 1.0211,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3968,
"step": 3745
},
{
"epoch": 0.72,
"learning_rate": 3.788098922631067e-06,
"loss": 2.4765,
"loss_": 1.0228,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3968,
"step": 3752
},
{
"epoch": 0.72,
"learning_rate": 3.753971309541784e-06,
"loss": 2.4717,
"loss_": 0.846,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3967,
"step": 3759
},
{
"epoch": 0.72,
"learning_rate": 3.719962564660783e-06,
"loss": 2.4447,
"loss_": 0.4578,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3973,
"step": 3766
},
{
"epoch": 0.73,
"learning_rate": 3.6860733352086866e-06,
"loss": 2.4563,
"loss_": 0.8938,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3965,
"step": 3773
},
{
"epoch": 0.73,
"learning_rate": 3.652304266131612e-06,
"loss": 2.4641,
"loss_": 0.9597,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3966,
"step": 3780
},
{
"epoch": 0.73,
"learning_rate": 3.618656000088916e-06,
"loss": 2.4801,
"loss_": 0.7477,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3968,
"step": 3787
},
{
"epoch": 0.73,
"learning_rate": 3.585129177440938e-06,
"loss": 2.4649,
"loss_": 1.1009,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3965,
"step": 3794
},
{
"epoch": 0.73,
"learning_rate": 3.5517244362368363e-06,
"loss": 2.4828,
"loss_": 1.1634,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3966,
"step": 3801
},
{
"epoch": 0.73,
"learning_rate": 3.5184424122024406e-06,
"loss": 2.4532,
"loss_": 1.1849,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3965,
"step": 3808
},
{
"epoch": 0.73,
"learning_rate": 3.485283738728139e-06,
"loss": 2.4494,
"loss_": 0.8625,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3965,
"step": 3815
},
{
"epoch": 0.74,
"learning_rate": 3.452249046856836e-06,
"loss": 2.4715,
"loss_": 1.012,
"moe_loss": 0.1604,
"moe_loss_longrong": 1.4064,
"step": 3822
},
{
"epoch": 0.74,
"learning_rate": 3.4193389652719478e-06,
"loss": 2.4256,
"loss_": 1.0154,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3961,
"step": 3829
},
{
"epoch": 0.74,
"learning_rate": 3.3865541202854314e-06,
"loss": 2.4636,
"loss_": 1.1752,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3964,
"step": 3836
},
{
"epoch": 0.74,
"learning_rate": 3.353895135825854e-06,
"loss": 2.442,
"loss_": 0.8945,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3964,
"step": 3843
},
{
"epoch": 0.74,
"learning_rate": 3.321362633426547e-06,
"loss": 2.4677,
"loss_": 0.9853,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3963,
"step": 3850
},
{
"epoch": 0.74,
"learning_rate": 3.2889572322137454e-06,
"loss": 2.4633,
"loss_": 1.2634,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3961,
"step": 3857
},
{
"epoch": 0.74,
"learning_rate": 3.256679548894831e-06,
"loss": 2.4637,
"loss_": 0.8044,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3965,
"step": 3864
},
{
"epoch": 0.74,
"learning_rate": 3.224530197746587e-06,
"loss": 2.4343,
"loss_": 0.8018,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3968,
"step": 3871
},
{
"epoch": 0.75,
"learning_rate": 3.1925097906034962e-06,
"loss": 2.4328,
"loss_": 0.8425,
"moe_loss": 0.1604,
"moe_loss_longrong": 1.4055,
"step": 3878
},
{
"epoch": 0.75,
"learning_rate": 3.1606189368461117e-06,
"loss": 2.4648,
"loss_": 0.9644,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3964,
"step": 3885
},
{
"epoch": 0.75,
"learning_rate": 3.128858243389461e-06,
"loss": 2.4541,
"loss_": 0.6231,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3966,
"step": 3892
},
{
"epoch": 0.75,
"learning_rate": 3.097228314671481e-06,
"loss": 2.476,
"loss_": 0.9949,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3961,
"step": 3899
},
{
"epoch": 0.75,
"learning_rate": 3.065729752641532e-06,
"loss": 2.4229,
"loss_": 0.8875,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3962,
"step": 3906
},
{
"epoch": 0.75,
"learning_rate": 3.034363156748933e-06,
"loss": 2.4502,
"loss_": 0.9087,
"moe_loss": 0.1604,
"moe_loss_longrong": 1.4054,
"step": 3913
},
{
"epoch": 0.75,
"learning_rate": 3.0031291239315473e-06,
"loss": 2.4367,
"loss_": 0.8938,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3965,
"step": 3920
},
{
"epoch": 0.76,
"learning_rate": 2.9720282486044407e-06,
"loss": 2.471,
"loss_": 0.742,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3963,
"step": 3927
},
{
"epoch": 0.76,
"learning_rate": 2.941061122648545e-06,
"loss": 2.4598,
"loss_": 0.6142,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3966,
"step": 3934
},
{
"epoch": 0.76,
"learning_rate": 2.910228335399419e-06,
"loss": 2.4532,
"loss_": 0.9248,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3961,
"step": 3941
},
{
"epoch": 0.76,
"learning_rate": 2.8795304736360184e-06,
"loss": 2.4694,
"loss_": 0.876,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3961,
"step": 3948
},
{
"epoch": 0.76,
"learning_rate": 2.8489681215695242e-06,
"loss": 2.4464,
"loss_": 1.0146,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3961,
"step": 3955
},
{
"epoch": 0.76,
"learning_rate": 2.8185418608322344e-06,
"loss": 2.4632,
"loss_": 0.7415,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3964,
"step": 3962
},
{
"epoch": 0.76,
"learning_rate": 2.788252270466497e-06,
"loss": 2.4575,
"loss_": 1.1931,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3963,
"step": 3969
},
{
"epoch": 0.76,
"learning_rate": 2.7580999269136854e-06,
"loss": 2.4825,
"loss_": 0.9967,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3961,
"step": 3976
},
{
"epoch": 0.77,
"learning_rate": 2.728085404003217e-06,
"loss": 2.4658,
"loss_": 0.9402,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3961,
"step": 3983
},
{
"epoch": 0.77,
"learning_rate": 2.698209272941659e-06,
"loss": 2.4466,
"loss_": 1.1097,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3963,
"step": 3990
},
{
"epoch": 0.77,
"learning_rate": 2.668472102301829e-06,
"loss": 2.4544,
"loss_": 1.018,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3962,
"step": 3997
},
{
"epoch": 0.77,
"learning_rate": 2.6388744580119975e-06,
"loss": 2.4195,
"loss_": 0.9804,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3961,
"step": 4004
},
{
"epoch": 0.77,
"learning_rate": 2.6094169033451066e-06,
"loss": 2.4628,
"loss_": 0.7708,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3963,
"step": 4011
},
{
"epoch": 0.77,
"learning_rate": 2.580099998908049e-06,
"loss": 2.4624,
"loss_": 0.6729,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3962,
"step": 4018
},
{
"epoch": 0.77,
"learning_rate": 2.5509243026309983e-06,
"loss": 2.4753,
"loss_": 1.177,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3961,
"step": 4025
},
{
"epoch": 0.78,
"learning_rate": 2.5218903697568075e-06,
"loss": 2.4669,
"loss_": 1.1103,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.396,
"step": 4032
},
{
"epoch": 0.78,
"learning_rate": 2.4929987528304144e-06,
"loss": 2.4671,
"loss_": 1.3009,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3962,
"step": 4039
},
{
"epoch": 0.78,
"learning_rate": 2.4642500016883532e-06,
"loss": 2.4649,
"loss_": 0.4641,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3964,
"step": 4046
},
{
"epoch": 0.78,
"learning_rate": 2.4356446634482756e-06,
"loss": 2.4255,
"loss_": 0.7561,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3963,
"step": 4053
},
{
"epoch": 0.78,
"learning_rate": 2.407183282498534e-06,
"loss": 2.4512,
"loss_": 1.0891,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.396,
"step": 4060
},
{
"epoch": 0.78,
"learning_rate": 2.3788664004878405e-06,
"loss": 2.4548,
"loss_": 0.8427,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3964,
"step": 4067
},
{
"epoch": 0.78,
"learning_rate": 2.350694556314934e-06,
"loss": 2.4775,
"loss_": 1.1603,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.396,
"step": 4074
},
{
"epoch": 0.79,
"learning_rate": 2.32266828611835e-06,
"loss": 2.4762,
"loss_": 0.982,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.396,
"step": 4081
},
{
"epoch": 0.79,
"learning_rate": 2.2947881232662007e-06,
"loss": 2.4574,
"loss_": 0.6854,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3965,
"step": 4088
},
{
"epoch": 0.79,
"learning_rate": 2.2670545983460245e-06,
"loss": 2.4641,
"loss_": 1.1094,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.396,
"step": 4095
},
{
"epoch": 0.79,
"learning_rate": 2.2394682391546928e-06,
"loss": 2.4546,
"loss_": 0.8832,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3962,
"step": 4102
},
{
"epoch": 0.79,
"learning_rate": 2.2120295706883698e-06,
"loss": 2.4228,
"loss_": 0.534,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3966,
"step": 4109
},
{
"epoch": 0.79,
"learning_rate": 2.184739115132517e-06,
"loss": 2.4502,
"loss_": 0.6129,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3959,
"step": 4116
},
{
"epoch": 0.79,
"learning_rate": 2.157597391851949e-06,
"loss": 2.4514,
"loss_": 0.8927,
"moe_loss": 0.1604,
"moe_loss_longrong": 1.4045,
"step": 4123
},
{
"epoch": 0.79,
"learning_rate": 2.130604917380962e-06,
"loss": 2.4434,
"loss_": 0.87,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3958,
"step": 4130
},
{
"epoch": 0.8,
"learning_rate": 2.103762205413493e-06,
"loss": 2.4475,
"loss_": 1.1291,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3961,
"step": 4137
},
{
"epoch": 0.8,
"learning_rate": 2.0770697667933436e-06,
"loss": 2.4697,
"loss_": 0.8274,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3961,
"step": 4144
},
{
"epoch": 0.8,
"learning_rate": 2.0505281095044804e-06,
"loss": 2.4725,
"loss_": 1.0877,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.396,
"step": 4151
},
{
"epoch": 0.8,
"learning_rate": 2.024137738661329e-06,
"loss": 2.4757,
"loss_": 0.6894,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3962,
"step": 4158
},
{
"epoch": 0.8,
"learning_rate": 1.997899156499191e-06,
"loss": 2.4566,
"loss_": 0.7625,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3962,
"step": 4165
},
{
"epoch": 0.8,
"learning_rate": 1.9718128623646792e-06,
"loss": 2.514,
"loss_": 1.3132,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3958,
"step": 4172
},
{
"epoch": 0.8,
"learning_rate": 1.9458793527062035e-06,
"loss": 2.4659,
"loss_": 0.6083,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3961,
"step": 4179
},
{
"epoch": 0.81,
"learning_rate": 1.9200991210645394e-06,
"loss": 2.441,
"loss_": 0.8687,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3964,
"step": 4186
},
{
"epoch": 0.81,
"learning_rate": 1.8944726580634287e-06,
"loss": 2.4227,
"loss_": 0.863,
"moe_loss": 0.1604,
"moe_loss_longrong": 1.4043,
"step": 4193
},
{
"epoch": 0.81,
"learning_rate": 1.8690004514002314e-06,
"loss": 2.4488,
"loss_": 1.0513,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3962,
"step": 4200
},
{
"epoch": 0.81,
"learning_rate": 1.8436829858366655e-06,
"loss": 2.4269,
"loss_": 0.9285,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3959,
"step": 4207
},
{
"epoch": 0.81,
"learning_rate": 1.8185207431895613e-06,
"loss": 2.4577,
"loss_": 1.0791,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3958,
"step": 4214
},
{
"epoch": 0.81,
"learning_rate": 1.7935142023217056e-06,
"loss": 2.4565,
"loss_": 1.0052,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3958,
"step": 4221
},
{
"epoch": 0.81,
"learning_rate": 1.768663839132727e-06,
"loss": 2.4314,
"loss_": 0.7553,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3959,
"step": 4228
},
{
"epoch": 0.81,
"learning_rate": 1.7439701265500274e-06,
"loss": 2.432,
"loss_": 1.137,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3958,
"step": 4235
},
{
"epoch": 0.82,
"learning_rate": 1.7194335345197933e-06,
"loss": 2.466,
"loss_": 0.804,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3963,
"step": 4242
},
{
"epoch": 0.82,
"learning_rate": 1.6950545299980526e-06,
"loss": 2.4119,
"loss_": 0.8362,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3961,
"step": 4249
},
{
"epoch": 0.82,
"learning_rate": 1.6708335769417827e-06,
"loss": 2.4555,
"loss_": 0.8946,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3957,
"step": 4256
},
{
"epoch": 0.82,
"learning_rate": 1.6467711363000794e-06,
"loss": 2.434,
"loss_": 1.1374,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3958,
"step": 4263
},
{
"epoch": 0.82,
"learning_rate": 1.6228676660053932e-06,
"loss": 2.4705,
"loss_": 1.07,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3958,
"step": 4270
},
{
"epoch": 0.82,
"learning_rate": 1.5991236209648052e-06,
"loss": 2.4467,
"loss_": 0.5343,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3962,
"step": 4277
},
{
"epoch": 0.82,
"learning_rate": 1.575539453051369e-06,
"loss": 2.4617,
"loss_": 1.2505,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3958,
"step": 4284
},
{
"epoch": 0.83,
"learning_rate": 1.5521156110955293e-06,
"loss": 2.4389,
"loss_": 1.0836,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3959,
"step": 4291
},
{
"epoch": 0.83,
"learning_rate": 1.5288525408765564e-06,
"loss": 2.4877,
"loss_": 0.8473,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3959,
"step": 4298
},
{
"epoch": 0.83,
"learning_rate": 1.5057506851140701e-06,
"loss": 2.4786,
"loss_": 1.0259,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3958,
"step": 4305
},
{
"epoch": 0.83,
"learning_rate": 1.4828104834596268e-06,
"loss": 2.4086,
"loss_": 0.5643,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3965,
"step": 4312
},
{
"epoch": 0.83,
"learning_rate": 1.4600323724883337e-06,
"loss": 2.4481,
"loss_": 1.0485,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3959,
"step": 4319
},
{
"epoch": 0.83,
"learning_rate": 1.4374167856905542e-06,
"loss": 2.4386,
"loss_": 0.9296,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.396,
"step": 4326
},
{
"epoch": 0.83,
"learning_rate": 1.414964153463655e-06,
"loss": 2.4538,
"loss_": 0.7446,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3959,
"step": 4333
},
{
"epoch": 0.83,
"learning_rate": 1.3926749031038055e-06,
"loss": 2.4252,
"loss_": 1.0624,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3963,
"step": 4340
},
{
"epoch": 0.84,
"learning_rate": 1.370549458797863e-06,
"loss": 2.4477,
"loss_": 1.0075,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3957,
"step": 4347
},
{
"epoch": 0.84,
"learning_rate": 1.3485882416152819e-06,
"loss": 2.4224,
"loss_": 0.9794,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3959,
"step": 4354
},
{
"epoch": 0.84,
"learning_rate": 1.3267916695001172e-06,
"loss": 2.4571,
"loss_": 0.9473,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3954,
"step": 4361
},
{
"epoch": 0.84,
"learning_rate": 1.3051601572630611e-06,
"loss": 2.449,
"loss_": 1.1259,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3957,
"step": 4368
},
{
"epoch": 0.84,
"learning_rate": 1.283694116573546e-06,
"loss": 2.4477,
"loss_": 1.0313,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3959,
"step": 4375
},
{
"epoch": 0.84,
"learning_rate": 1.2623939559519161e-06,
"loss": 2.46,
"loss_": 0.888,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3958,
"step": 4382
},
{
"epoch": 0.84,
"learning_rate": 1.2412600807616526e-06,
"loss": 2.4559,
"loss_": 0.9206,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3957,
"step": 4389
},
{
"epoch": 0.85,
"learning_rate": 1.2202928932016588e-06,
"loss": 2.4259,
"loss_": 0.8758,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3958,
"step": 4396
},
{
"epoch": 0.85,
"learning_rate": 1.1994927922985999e-06,
"loss": 2.4477,
"loss_": 0.8513,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3961,
"step": 4403
},
{
"epoch": 0.85,
"learning_rate": 1.178860173899321e-06,
"loss": 2.4408,
"loss_": 1.0152,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3955,
"step": 4410
},
{
"epoch": 0.85,
"learning_rate": 1.1583954306633004e-06,
"loss": 2.4442,
"loss_": 1.1666,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.396,
"step": 4417
},
{
"epoch": 0.85,
"learning_rate": 1.138098952055181e-06,
"loss": 2.4404,
"loss_": 0.6781,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3959,
"step": 4424
},
{
"epoch": 0.85,
"learning_rate": 1.1179711243373736e-06,
"loss": 2.4439,
"loss_": 0.8599,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3962,
"step": 4431
},
{
"epoch": 0.85,
"learning_rate": 1.0980123305626812e-06,
"loss": 2.4635,
"loss_": 1.0466,
"moe_loss": 0.1605,
"moe_loss_longrong": 1.4055,
"step": 4438
},
{
"epoch": 0.86,
"learning_rate": 1.0782229505670195e-06,
"loss": 2.4436,
"loss_": 1.1018,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3958,
"step": 4445
},
{
"epoch": 0.86,
"learning_rate": 1.0586033609622004e-06,
"loss": 2.4521,
"loss_": 0.9168,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3954,
"step": 4452
},
{
"epoch": 0.86,
"learning_rate": 1.039153935128744e-06,
"loss": 2.4435,
"loss_": 1.1978,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3955,
"step": 4459
},
{
"epoch": 0.86,
"learning_rate": 1.0198750432087855e-06,
"loss": 2.4683,
"loss_": 1.2032,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3955,
"step": 4466
},
{
"epoch": 0.86,
"learning_rate": 1.0007670520990331e-06,
"loss": 2.4688,
"loss_": 1.0949,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3958,
"step": 4473
},
{
"epoch": 0.86,
"learning_rate": 9.818303254437723e-07,
"loss": 2.459,
"loss_": 1.3033,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3958,
"step": 4480
},
{
"epoch": 0.86,
"learning_rate": 9.630652236279626e-07,
"loss": 2.4758,
"loss_": 0.8537,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3958,
"step": 4487
},
{
"epoch": 0.86,
"learning_rate": 9.444721037703597e-07,
"loss": 2.4579,
"loss_": 0.6901,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3959,
"step": 4494
},
{
"epoch": 0.87,
"learning_rate": 9.260513197167398e-07,
"loss": 2.4456,
"loss_": 1.0315,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3957,
"step": 4501
},
{
"epoch": 0.87,
"learning_rate": 9.078032220331523e-07,
"loss": 2.4451,
"loss_": 1.1487,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3958,
"step": 4508
},
{
"epoch": 0.87,
"learning_rate": 8.897281579992467e-07,
"loss": 2.4204,
"loss_": 1.1135,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3958,
"step": 4515
},
{
"epoch": 0.87,
"learning_rate": 8.718264716016722e-07,
"loss": 2.4218,
"loss_": 0.8637,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3955,
"step": 4522
},
{
"epoch": 0.87,
"learning_rate": 8.540985035275273e-07,
"loss": 2.447,
"loss_": 0.8818,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3955,
"step": 4529
},
{
"epoch": 0.87,
"learning_rate": 8.365445911578785e-07,
"loss": 2.4654,
"loss_": 0.9987,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3955,
"step": 4536
},
{
"epoch": 0.87,
"learning_rate": 8.191650685613273e-07,
"loss": 2.4603,
"loss_": 1.1883,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3956,
"step": 4543
},
{
"epoch": 0.88,
"learning_rate": 8.019602664876758e-07,
"loss": 2.4475,
"loss_": 1.0646,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3958,
"step": 4550
},
{
"epoch": 0.88,
"learning_rate": 7.849305123616091e-07,
"loss": 2.4486,
"loss_": 0.8589,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3957,
"step": 4557
},
{
"epoch": 0.88,
"learning_rate": 7.680761302764727e-07,
"loss": 2.4336,
"loss_": 1.0525,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3957,
"step": 4564
},
{
"epoch": 0.88,
"learning_rate": 7.513974409881186e-07,
"loss": 2.4663,
"loss_": 1.0924,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3957,
"step": 4571
},
{
"epoch": 0.88,
"learning_rate": 7.348947619087754e-07,
"loss": 2.4417,
"loss_": 1.0197,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3958,
"step": 4578
},
{
"epoch": 0.88,
"learning_rate": 7.185684071010224e-07,
"loss": 2.4364,
"loss_": 0.9028,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3956,
"step": 4585
},
{
"epoch": 0.88,
"learning_rate": 7.024186872718164e-07,
"loss": 2.4733,
"loss_": 0.5258,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3961,
"step": 4592
},
{
"epoch": 0.88,
"learning_rate": 6.864459097665654e-07,
"loss": 2.4453,
"loss_": 0.9338,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3957,
"step": 4599
},
{
"epoch": 0.89,
"learning_rate": 6.706503785632934e-07,
"loss": 2.432,
"loss_": 1.1129,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3956,
"step": 4606
},
{
"epoch": 0.89,
"learning_rate": 6.550323942668469e-07,
"loss": 2.4297,
"loss_": 0.6761,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3963,
"step": 4613
},
{
"epoch": 0.89,
"learning_rate": 6.395922541031741e-07,
"loss": 2.4152,
"loss_": 0.8792,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3956,
"step": 4620
},
{
"epoch": 0.89,
"learning_rate": 6.24330251913674e-07,
"loss": 2.4526,
"loss_": 1.1836,
"moe_loss": 0.16,
"moe_loss_longrong": 1.396,
"step": 4627
},
{
"epoch": 0.89,
"learning_rate": 6.092466781495976e-07,
"loss": 2.4362,
"loss_": 0.9499,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3958,
"step": 4634
},
{
"epoch": 0.89,
"learning_rate": 5.943418198665251e-07,
"loss": 2.4439,
"loss_": 1.1622,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3958,
"step": 4641
},
{
"epoch": 0.89,
"learning_rate": 5.796159607189001e-07,
"loss": 2.4273,
"loss_": 0.9876,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3956,
"step": 4648
},
{
"epoch": 0.9,
"learning_rate": 5.650693809546348e-07,
"loss": 2.4735,
"loss_": 1.1284,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3956,
"step": 4655
},
{
"epoch": 0.9,
"learning_rate": 5.507023574097725e-07,
"loss": 2.4393,
"loss_": 0.8675,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3958,
"step": 4662
},
{
"epoch": 0.9,
"learning_rate": 5.365151635032218e-07,
"loss": 2.4482,
"loss_": 0.901,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3955,
"step": 4669
},
{
"epoch": 0.9,
"learning_rate": 5.225080692315532e-07,
"loss": 2.441,
"loss_": 1.0355,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3956,
"step": 4676
},
{
"epoch": 0.9,
"learning_rate": 5.086813411638581e-07,
"loss": 2.4277,
"loss_": 0.9478,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3955,
"step": 4683
},
{
"epoch": 0.9,
"learning_rate": 4.9503524243668e-07,
"loss": 2.444,
"loss_": 0.8901,
"moe_loss": 0.1604,
"moe_loss_longrong": 1.4048,
"step": 4690
},
{
"epoch": 0.9,
"learning_rate": 4.815700327490014e-07,
"loss": 2.4286,
"loss_": 0.8906,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3957,
"step": 4697
},
{
"epoch": 0.91,
"learning_rate": 4.6828596835730487e-07,
"loss": 2.4475,
"loss_": 1.028,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3957,
"step": 4704
},
{
"epoch": 0.91,
"learning_rate": 4.551833020707008e-07,
"loss": 2.4281,
"loss_": 0.6545,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3958,
"step": 4711
},
{
"epoch": 0.91,
"learning_rate": 4.4226228324610544e-07,
"loss": 2.4677,
"loss_": 0.9228,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3954,
"step": 4718
},
{
"epoch": 0.91,
"learning_rate": 4.295231577835024e-07,
"loss": 2.443,
"loss_": 0.8677,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3957,
"step": 4725
},
{
"epoch": 0.91,
"learning_rate": 4.1696616812126333e-07,
"loss": 2.4452,
"loss_": 0.8619,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3958,
"step": 4732
},
{
"epoch": 0.91,
"learning_rate": 4.0459155323153034e-07,
"loss": 2.4501,
"loss_": 0.5721,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3959,
"step": 4739
},
{
"epoch": 0.91,
"learning_rate": 3.9239954861567177e-07,
"loss": 2.4452,
"loss_": 1.2849,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3957,
"step": 4746
},
{
"epoch": 0.91,
"learning_rate": 3.803903862998004e-07,
"loss": 2.4681,
"loss_": 1.0272,
"moe_loss": 0.1605,
"moe_loss_longrong": 1.4052,
"step": 4753
},
{
"epoch": 0.92,
"learning_rate": 3.685642948303503e-07,
"loss": 2.4437,
"loss_": 0.929,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3957,
"step": 4760
},
{
"epoch": 0.92,
"learning_rate": 3.5692149926974006e-07,
"loss": 2.4502,
"loss_": 1.1455,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3958,
"step": 4767
},
{
"epoch": 0.92,
"learning_rate": 3.454622211920766e-07,
"loss": 2.4262,
"loss_": 0.6494,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3955,
"step": 4774
},
{
"epoch": 0.92,
"learning_rate": 3.341866786789505e-07,
"loss": 2.4259,
"loss_": 1.113,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3959,
"step": 4781
},
{
"epoch": 0.92,
"learning_rate": 3.2309508631527486e-07,
"loss": 2.4309,
"loss_": 0.8977,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3957,
"step": 4788
},
{
"epoch": 0.92,
"learning_rate": 3.121876551852099e-07,
"loss": 2.4311,
"loss_": 1.0739,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.396,
"step": 4795
},
{
"epoch": 0.92,
"learning_rate": 3.0146459286813924e-07,
"loss": 2.4515,
"loss_": 0.9781,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3959,
"step": 4802
},
{
"epoch": 0.93,
"learning_rate": 2.909261034347255e-07,
"loss": 2.4553,
"loss_": 0.9123,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3959,
"step": 4809
},
{
"epoch": 0.93,
"learning_rate": 2.8057238744301994e-07,
"loss": 2.4516,
"loss_": 1.131,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.396,
"step": 4816
},
{
"epoch": 0.93,
"learning_rate": 2.704036419346534e-07,
"loss": 2.4628,
"loss_": 1.0138,
"moe_loss": 0.1605,
"moe_loss_longrong": 1.4051,
"step": 4823
},
{
"epoch": 0.93,
"learning_rate": 2.604200604310825e-07,
"loss": 2.4657,
"loss_": 1.0133,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3955,
"step": 4830
},
{
"epoch": 0.93,
"learning_rate": 2.506218329299026e-07,
"loss": 2.4311,
"loss_": 1.1114,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.396,
"step": 4837
},
{
"epoch": 0.93,
"learning_rate": 2.410091459012376e-07,
"loss": 2.4529,
"loss_": 1.0407,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.396,
"step": 4844
},
{
"epoch": 0.93,
"learning_rate": 2.3158218228419127e-07,
"loss": 2.4564,
"loss_": 1.061,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3955,
"step": 4851
},
{
"epoch": 0.93,
"learning_rate": 2.2234112148336373e-07,
"loss": 2.4584,
"loss_": 0.735,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3956,
"step": 4858
},
{
"epoch": 0.94,
"learning_rate": 2.1328613936543396e-07,
"loss": 2.425,
"loss_": 0.9476,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3962,
"step": 4865
},
{
"epoch": 0.94,
"learning_rate": 2.0441740825582258e-07,
"loss": 2.4643,
"loss_": 1.0806,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3956,
"step": 4872
},
{
"epoch": 0.94,
"learning_rate": 1.9573509693540104e-07,
"loss": 2.4676,
"loss_": 0.825,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3954,
"step": 4879
},
{
"epoch": 0.94,
"learning_rate": 1.872393706372866e-07,
"loss": 2.4485,
"loss_": 0.8625,
"moe_loss": 0.1605,
"moe_loss_longrong": 1.4046,
"step": 4886
},
{
"epoch": 0.94,
"learning_rate": 1.789303910436968e-07,
"loss": 2.4413,
"loss_": 0.7278,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3958,
"step": 4893
},
{
"epoch": 0.94,
"learning_rate": 1.7080831628286886e-07,
"loss": 2.4452,
"loss_": 0.812,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3961,
"step": 4900
},
{
"epoch": 0.94,
"learning_rate": 1.6287330092605525e-07,
"loss": 2.4596,
"loss_": 0.6445,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3958,
"step": 4907
},
{
"epoch": 0.95,
"learning_rate": 1.551254959845805e-07,
"loss": 2.4541,
"loss_": 1.0014,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3957,
"step": 4914
},
{
"epoch": 0.95,
"learning_rate": 1.4756504890696466e-07,
"loss": 2.4342,
"loss_": 1.1963,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3957,
"step": 4921
},
{
"epoch": 0.95,
"learning_rate": 1.401921035761189e-07,
"loss": 2.4346,
"loss_": 0.8071,
"moe_loss": 0.1604,
"moe_loss_longrong": 1.4051,
"step": 4928
},
{
"epoch": 0.95,
"learning_rate": 1.3300680030661096e-07,
"loss": 2.464,
"loss_": 1.1756,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3957,
"step": 4935
},
{
"epoch": 0.95,
"learning_rate": 1.2600927584198618e-07,
"loss": 2.4334,
"loss_": 1.0679,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3954,
"step": 4942
},
{
"epoch": 0.95,
"learning_rate": 1.1919966335217636e-07,
"loss": 2.4779,
"loss_": 0.8872,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3956,
"step": 4949
},
{
"epoch": 0.95,
"learning_rate": 1.1257809243095385e-07,
"loss": 2.4339,
"loss_": 0.8956,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3956,
"step": 4956
},
{
"epoch": 0.95,
"learning_rate": 1.0614468909347476e-07,
"loss": 2.4414,
"loss_": 1.1397,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3956,
"step": 4963
},
{
"epoch": 0.96,
"learning_rate": 9.989957577387521e-08,
"loss": 2.4253,
"loss_": 0.8755,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3955,
"step": 4970
},
{
"epoch": 0.96,
"learning_rate": 9.384287132294223e-08,
"loss": 2.4599,
"loss_": 0.9577,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3954,
"step": 4977
},
{
"epoch": 0.96,
"learning_rate": 8.797469100585432e-08,
"loss": 2.4615,
"loss_": 0.7768,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3957,
"step": 4984
},
{
"epoch": 0.96,
"learning_rate": 8.229514649998438e-08,
"loss": 2.4414,
"loss_": 1.0058,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3956,
"step": 4991
},
{
"epoch": 0.96,
"learning_rate": 7.680434589277696e-08,
"loss": 2.4587,
"loss_": 1.0013,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3957,
"step": 4998
},
{
"epoch": 0.96,
"learning_rate": 7.150239367969102e-08,
"loss": 2.4314,
"loss_": 0.9539,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3959,
"step": 5005
},
{
"epoch": 0.96,
"learning_rate": 6.63893907622104e-08,
"loss": 2.4711,
"loss_": 0.9778,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3954,
"step": 5012
},
{
"epoch": 0.97,
"learning_rate": 6.14654344459209e-08,
"loss": 2.4558,
"loss_": 0.9942,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3958,
"step": 5019
},
{
"epoch": 0.97,
"learning_rate": 5.673061843866623e-08,
"loss": 2.4748,
"loss_": 0.7836,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3959,
"step": 5026
},
{
"epoch": 0.97,
"learning_rate": 5.218503284875609e-08,
"loss": 2.4418,
"loss_": 1.0962,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3955,
"step": 5033
},
{
"epoch": 0.97,
"learning_rate": 4.7828764183257545e-08,
"loss": 2.4128,
"loss_": 0.9561,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3958,
"step": 5040
},
{
"epoch": 0.97,
"learning_rate": 4.366189534634191e-08,
"loss": 2.4604,
"loss_": 0.9591,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3955,
"step": 5047
},
{
"epoch": 0.97,
"learning_rate": 3.9684505637718194e-08,
"loss": 2.4619,
"loss_": 1.1709,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3959,
"step": 5054
},
{
"epoch": 0.97,
"learning_rate": 3.589667075110992e-08,
"loss": 2.4199,
"loss_": 1.0163,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3959,
"step": 5061
},
{
"epoch": 0.98,
"learning_rate": 3.229846277282511e-08,
"loss": 2.4621,
"loss_": 0.9223,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3959,
"step": 5068
},
{
"epoch": 0.98,
"learning_rate": 2.8889950180382985e-08,
"loss": 2.4625,
"loss_": 0.4562,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3959,
"step": 5075
},
{
"epoch": 0.98,
"learning_rate": 2.5671197841203867e-08,
"loss": 2.4386,
"loss_": 1.0035,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3958,
"step": 5082
},
{
"epoch": 0.98,
"learning_rate": 2.264226701138461e-08,
"loss": 2.4681,
"loss_": 1.218,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3955,
"step": 5089
},
{
"epoch": 0.98,
"learning_rate": 1.9803215334522895e-08,
"loss": 2.427,
"loss_": 0.7696,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3961,
"step": 5096
},
{
"epoch": 0.98,
"learning_rate": 1.7154096840629186e-08,
"loss": 2.4851,
"loss_": 1.054,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3956,
"step": 5103
},
{
"epoch": 0.98,
"learning_rate": 1.4694961945093122e-08,
"loss": 2.4448,
"loss_": 0.8593,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3959,
"step": 5110
},
{
"epoch": 0.98,
"learning_rate": 1.2425857447725397e-08,
"loss": 2.454,
"loss_": 0.8875,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3956,
"step": 5117
},
{
"epoch": 0.99,
"learning_rate": 1.0346826531865139e-08,
"loss": 2.4289,
"loss_": 0.9578,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3957,
"step": 5124
},
{
"epoch": 0.99,
"learning_rate": 8.457908763562783e-09,
"loss": 2.4436,
"loss_": 1.026,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3956,
"step": 5131
},
{
"epoch": 0.99,
"learning_rate": 6.759140090824012e-09,
"loss": 2.4605,
"loss_": 0.6569,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3954,
"step": 5138
},
{
"epoch": 0.99,
"learning_rate": 5.25055284292475e-09,
"loss": 2.4668,
"loss_": 1.2459,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3959,
"step": 5145
},
{
"epoch": 0.99,
"learning_rate": 3.932175729797205e-09,
"loss": 2.4129,
"loss_": 0.9565,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3959,
"step": 5152
},
{
"epoch": 0.99,
"learning_rate": 2.8040338414847545e-09,
"loss": 2.4469,
"loss_": 0.8875,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3957,
"step": 5159
},
{
"epoch": 0.99,
"learning_rate": 1.8661486476612144e-09,
"loss": 2.4186,
"loss_": 0.7829,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.396,
"step": 5166
},
{
"epoch": 1.0,
"learning_rate": 1.1185379972256105e-09,
"loss": 2.4351,
"loss_": 0.7496,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3956,
"step": 5173
},
{
"epoch": 1.0,
"learning_rate": 5.612161179613385e-10,
"loss": 2.4242,
"loss_": 0.9366,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3956,
"step": 5180
},
{
"epoch": 1.0,
"learning_rate": 1.9419361626416e-10,
"loss": 2.4684,
"loss_": 1.0884,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3956,
"step": 5187
},
{
"epoch": 1.0,
"learning_rate": 1.7477476940142013e-11,
"loss": 2.4801,
"loss_": 1.0033,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3959,
"step": 5194
},
{
"epoch": 1.0,
"step": 5197,
"total_flos": 8.818772994970092e+18,
"train_loss": 2.4979199135512236,
"train_runtime": 95362.7285,
"train_samples_per_second": 6.976,
"train_steps_per_second": 0.054
}
],
"logging_steps": 7,
"max_steps": 5197,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 1000,
"total_flos": 8.818772994970092e+18,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}