diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,20954 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.0, + "eval_steps": 500, + "global_step": 17429, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 1.1474469305794606e-07, + "loss": 1.1705, + "step": 1 + }, + { + "epoch": 0.0, + "learning_rate": 5.737234652897304e-07, + "loss": 1.0623, + "step": 5 + }, + { + "epoch": 0.0, + "learning_rate": 1.1474469305794607e-06, + "loss": 1.0881, + "step": 10 + }, + { + "epoch": 0.0, + "learning_rate": 1.721170395869191e-06, + "loss": 1.1118, + "step": 15 + }, + { + "epoch": 0.0, + "learning_rate": 2.2948938611589215e-06, + "loss": 1.1156, + "step": 20 + }, + { + "epoch": 0.0, + "learning_rate": 2.868617326448652e-06, + "loss": 1.0686, + "step": 25 + }, + { + "epoch": 0.0, + "learning_rate": 3.442340791738382e-06, + "loss": 1.1194, + "step": 30 + }, + { + "epoch": 0.0, + "learning_rate": 4.016064257028113e-06, + "loss": 1.1242, + "step": 35 + }, + { + "epoch": 0.0, + "learning_rate": 4.589787722317843e-06, + "loss": 1.0856, + "step": 40 + }, + { + "epoch": 0.0, + "learning_rate": 5.163511187607573e-06, + "loss": 1.083, + "step": 45 + }, + { + "epoch": 0.0, + "learning_rate": 5.737234652897304e-06, + "loss": 0.9754, + "step": 50 + }, + { + "epoch": 0.0, + "learning_rate": 6.310958118187034e-06, + "loss": 1.1019, + "step": 55 + }, + { + "epoch": 0.0, + "learning_rate": 6.884681583476764e-06, + "loss": 1.0793, + "step": 60 + }, + { + "epoch": 0.0, + "learning_rate": 7.4584050487664955e-06, + "loss": 1.1459, + "step": 65 + }, + { + "epoch": 0.0, + "learning_rate": 8.032128514056226e-06, + "loss": 1.0241, + "step": 70 + }, + { + "epoch": 0.0, + "learning_rate": 8.605851979345956e-06, + "loss": 1.0542, + "step": 75 + }, + { + "epoch": 0.0, + "learning_rate": 9.179575444635686e-06, + "loss": 1.0993, + "step": 80 + }, + { + "epoch": 0.0, + "learning_rate": 9.753298909925416e-06, + "loss": 1.0939, + "step": 85 + }, + { + "epoch": 0.01, + "learning_rate": 1.0327022375215146e-05, + "loss": 1.0606, + "step": 90 + }, + { + "epoch": 0.01, + "learning_rate": 1.0900745840504876e-05, + "loss": 1.0591, + "step": 95 + }, + { + "epoch": 0.01, + "learning_rate": 1.1474469305794608e-05, + "loss": 1.0768, + "step": 100 + }, + { + "epoch": 0.01, + "learning_rate": 1.2048192771084338e-05, + "loss": 1.0601, + "step": 105 + }, + { + "epoch": 0.01, + "learning_rate": 1.2621916236374069e-05, + "loss": 1.0676, + "step": 110 + }, + { + "epoch": 0.01, + "learning_rate": 1.3195639701663797e-05, + "loss": 1.0468, + "step": 115 + }, + { + "epoch": 0.01, + "learning_rate": 1.3769363166953527e-05, + "loss": 1.0915, + "step": 120 + }, + { + "epoch": 0.01, + "learning_rate": 1.434308663224326e-05, + "loss": 1.0004, + "step": 125 + }, + { + "epoch": 0.01, + "learning_rate": 1.4916810097532991e-05, + "loss": 1.0573, + "step": 130 + }, + { + "epoch": 0.01, + "learning_rate": 1.549053356282272e-05, + "loss": 1.0377, + "step": 135 + }, + { + "epoch": 0.01, + "learning_rate": 1.606425702811245e-05, + "loss": 1.0571, + "step": 140 + }, + { + "epoch": 0.01, + "learning_rate": 1.663798049340218e-05, + "loss": 1.0273, + "step": 145 + }, + { + "epoch": 0.01, + "learning_rate": 1.721170395869191e-05, + "loss": 0.9969, + "step": 150 + }, + { + "epoch": 0.01, + "learning_rate": 1.7785427423981642e-05, + "loss": 0.9618, + "step": 155 + }, + { + "epoch": 0.01, + "learning_rate": 1.8359150889271372e-05, + "loss": 1.0193, + "step": 160 + }, + { + "epoch": 0.01, + "learning_rate": 1.8932874354561102e-05, + "loss": 0.9667, + "step": 165 + }, + { + "epoch": 0.01, + "learning_rate": 1.9506597819850832e-05, + "loss": 0.9475, + "step": 170 + }, + { + "epoch": 0.01, + "learning_rate": 2.0080321285140562e-05, + "loss": 1.0302, + "step": 175 + }, + { + "epoch": 0.01, + "learning_rate": 2.0654044750430293e-05, + "loss": 1.0012, + "step": 180 + }, + { + "epoch": 0.01, + "learning_rate": 2.1227768215720023e-05, + "loss": 0.9659, + "step": 185 + }, + { + "epoch": 0.01, + "learning_rate": 2.1801491681009753e-05, + "loss": 1.0251, + "step": 190 + }, + { + "epoch": 0.01, + "learning_rate": 2.2375215146299486e-05, + "loss": 0.9829, + "step": 195 + }, + { + "epoch": 0.01, + "learning_rate": 2.2948938611589217e-05, + "loss": 0.9823, + "step": 200 + }, + { + "epoch": 0.01, + "learning_rate": 2.3522662076878947e-05, + "loss": 0.9471, + "step": 205 + }, + { + "epoch": 0.01, + "learning_rate": 2.4096385542168677e-05, + "loss": 0.9049, + "step": 210 + }, + { + "epoch": 0.01, + "learning_rate": 2.4670109007458407e-05, + "loss": 0.9992, + "step": 215 + }, + { + "epoch": 0.01, + "learning_rate": 2.5243832472748137e-05, + "loss": 0.9511, + "step": 220 + }, + { + "epoch": 0.01, + "learning_rate": 2.5817555938037867e-05, + "loss": 0.9359, + "step": 225 + }, + { + "epoch": 0.01, + "learning_rate": 2.6391279403327594e-05, + "loss": 0.9657, + "step": 230 + }, + { + "epoch": 0.01, + "learning_rate": 2.6965002868617328e-05, + "loss": 1.0469, + "step": 235 + }, + { + "epoch": 0.01, + "learning_rate": 2.7538726333907055e-05, + "loss": 0.9768, + "step": 240 + }, + { + "epoch": 0.01, + "learning_rate": 2.8112449799196788e-05, + "loss": 1.0059, + "step": 245 + }, + { + "epoch": 0.01, + "learning_rate": 2.868617326448652e-05, + "loss": 0.982, + "step": 250 + }, + { + "epoch": 0.01, + "learning_rate": 2.925989672977625e-05, + "loss": 0.941, + "step": 255 + }, + { + "epoch": 0.01, + "learning_rate": 2.9833620195065982e-05, + "loss": 0.9555, + "step": 260 + }, + { + "epoch": 0.02, + "learning_rate": 3.040734366035571e-05, + "loss": 1.0077, + "step": 265 + }, + { + "epoch": 0.02, + "learning_rate": 3.098106712564544e-05, + "loss": 1.0177, + "step": 270 + }, + { + "epoch": 0.02, + "learning_rate": 3.155479059093517e-05, + "loss": 1.0121, + "step": 275 + }, + { + "epoch": 0.02, + "learning_rate": 3.21285140562249e-05, + "loss": 0.9461, + "step": 280 + }, + { + "epoch": 0.02, + "learning_rate": 3.2702237521514636e-05, + "loss": 0.9811, + "step": 285 + }, + { + "epoch": 0.02, + "learning_rate": 3.327596098680436e-05, + "loss": 0.9625, + "step": 290 + }, + { + "epoch": 0.02, + "learning_rate": 3.3849684452094096e-05, + "loss": 0.9899, + "step": 295 + }, + { + "epoch": 0.02, + "learning_rate": 3.442340791738382e-05, + "loss": 0.9944, + "step": 300 + }, + { + "epoch": 0.02, + "learning_rate": 3.499713138267356e-05, + "loss": 1.0502, + "step": 305 + }, + { + "epoch": 0.02, + "learning_rate": 3.5570854847963284e-05, + "loss": 0.9586, + "step": 310 + }, + { + "epoch": 0.02, + "learning_rate": 3.614457831325301e-05, + "loss": 0.9985, + "step": 315 + }, + { + "epoch": 0.02, + "learning_rate": 3.6718301778542744e-05, + "loss": 1.0447, + "step": 320 + }, + { + "epoch": 0.02, + "learning_rate": 3.729202524383247e-05, + "loss": 1.0216, + "step": 325 + }, + { + "epoch": 0.02, + "learning_rate": 3.7865748709122204e-05, + "loss": 1.0232, + "step": 330 + }, + { + "epoch": 0.02, + "learning_rate": 3.843947217441193e-05, + "loss": 0.9247, + "step": 335 + }, + { + "epoch": 0.02, + "learning_rate": 3.9013195639701665e-05, + "loss": 0.9981, + "step": 340 + }, + { + "epoch": 0.02, + "learning_rate": 3.958691910499139e-05, + "loss": 0.9576, + "step": 345 + }, + { + "epoch": 0.02, + "learning_rate": 4.0160642570281125e-05, + "loss": 0.9793, + "step": 350 + }, + { + "epoch": 0.02, + "learning_rate": 4.073436603557086e-05, + "loss": 0.9646, + "step": 355 + }, + { + "epoch": 0.02, + "learning_rate": 4.1308089500860585e-05, + "loss": 0.9432, + "step": 360 + }, + { + "epoch": 0.02, + "learning_rate": 4.188181296615032e-05, + "loss": 1.0045, + "step": 365 + }, + { + "epoch": 0.02, + "learning_rate": 4.2455536431440046e-05, + "loss": 0.9872, + "step": 370 + }, + { + "epoch": 0.02, + "learning_rate": 4.302925989672978e-05, + "loss": 0.9436, + "step": 375 + }, + { + "epoch": 0.02, + "learning_rate": 4.3602983362019506e-05, + "loss": 1.0294, + "step": 380 + }, + { + "epoch": 0.02, + "learning_rate": 4.417670682730924e-05, + "loss": 0.9198, + "step": 385 + }, + { + "epoch": 0.02, + "learning_rate": 4.475043029259897e-05, + "loss": 0.9835, + "step": 390 + }, + { + "epoch": 0.02, + "learning_rate": 4.53241537578887e-05, + "loss": 0.9656, + "step": 395 + }, + { + "epoch": 0.02, + "learning_rate": 4.589787722317843e-05, + "loss": 0.9951, + "step": 400 + }, + { + "epoch": 0.02, + "learning_rate": 4.647160068846816e-05, + "loss": 0.9385, + "step": 405 + }, + { + "epoch": 0.02, + "learning_rate": 4.7045324153757894e-05, + "loss": 0.9489, + "step": 410 + }, + { + "epoch": 0.02, + "learning_rate": 4.761904761904762e-05, + "loss": 0.9516, + "step": 415 + }, + { + "epoch": 0.02, + "learning_rate": 4.8192771084337354e-05, + "loss": 0.9873, + "step": 420 + }, + { + "epoch": 0.02, + "learning_rate": 4.876649454962709e-05, + "loss": 1.01, + "step": 425 + }, + { + "epoch": 0.02, + "learning_rate": 4.9340218014916814e-05, + "loss": 0.9336, + "step": 430 + }, + { + "epoch": 0.02, + "learning_rate": 4.991394148020654e-05, + "loss": 0.8962, + "step": 435 + }, + { + "epoch": 0.03, + "learning_rate": 5.0487664945496275e-05, + "loss": 0.9802, + "step": 440 + }, + { + "epoch": 0.03, + "learning_rate": 5.1061388410786e-05, + "loss": 0.9845, + "step": 445 + }, + { + "epoch": 0.03, + "learning_rate": 5.1635111876075735e-05, + "loss": 0.9281, + "step": 450 + }, + { + "epoch": 0.03, + "learning_rate": 5.220883534136547e-05, + "loss": 0.9978, + "step": 455 + }, + { + "epoch": 0.03, + "learning_rate": 5.278255880665519e-05, + "loss": 0.957, + "step": 460 + }, + { + "epoch": 0.03, + "learning_rate": 5.335628227194492e-05, + "loss": 0.8935, + "step": 465 + }, + { + "epoch": 0.03, + "learning_rate": 5.3930005737234656e-05, + "loss": 0.9418, + "step": 470 + }, + { + "epoch": 0.03, + "learning_rate": 5.450372920252439e-05, + "loss": 1.0086, + "step": 475 + }, + { + "epoch": 0.03, + "learning_rate": 5.507745266781411e-05, + "loss": 0.9691, + "step": 480 + }, + { + "epoch": 0.03, + "learning_rate": 5.565117613310384e-05, + "loss": 0.9682, + "step": 485 + }, + { + "epoch": 0.03, + "learning_rate": 5.6224899598393576e-05, + "loss": 0.9255, + "step": 490 + }, + { + "epoch": 0.03, + "learning_rate": 5.679862306368331e-05, + "loss": 1.0181, + "step": 495 + }, + { + "epoch": 0.03, + "learning_rate": 5.737234652897304e-05, + "loss": 0.9934, + "step": 500 + }, + { + "epoch": 0.03, + "learning_rate": 5.794606999426276e-05, + "loss": 0.9871, + "step": 505 + }, + { + "epoch": 0.03, + "learning_rate": 5.85197934595525e-05, + "loss": 0.9117, + "step": 510 + }, + { + "epoch": 0.03, + "learning_rate": 5.909351692484223e-05, + "loss": 1.0144, + "step": 515 + }, + { + "epoch": 0.03, + "learning_rate": 5.9667240390131964e-05, + "loss": 0.9958, + "step": 520 + }, + { + "epoch": 0.03, + "learning_rate": 6.02409638554217e-05, + "loss": 0.9979, + "step": 525 + }, + { + "epoch": 0.03, + "learning_rate": 6.081468732071142e-05, + "loss": 0.9258, + "step": 530 + }, + { + "epoch": 0.03, + "learning_rate": 6.138841078600115e-05, + "loss": 0.9732, + "step": 535 + }, + { + "epoch": 0.03, + "learning_rate": 6.196213425129088e-05, + "loss": 0.9912, + "step": 540 + }, + { + "epoch": 0.03, + "learning_rate": 6.253585771658062e-05, + "loss": 0.906, + "step": 545 + }, + { + "epoch": 0.03, + "learning_rate": 6.310958118187034e-05, + "loss": 0.9822, + "step": 550 + }, + { + "epoch": 0.03, + "learning_rate": 6.368330464716007e-05, + "loss": 0.9325, + "step": 555 + }, + { + "epoch": 0.03, + "learning_rate": 6.42570281124498e-05, + "loss": 0.8918, + "step": 560 + }, + { + "epoch": 0.03, + "learning_rate": 6.483075157773954e-05, + "loss": 0.961, + "step": 565 + }, + { + "epoch": 0.03, + "learning_rate": 6.540447504302927e-05, + "loss": 0.9948, + "step": 570 + }, + { + "epoch": 0.03, + "learning_rate": 6.597819850831899e-05, + "loss": 0.9441, + "step": 575 + }, + { + "epoch": 0.03, + "learning_rate": 6.655192197360873e-05, + "loss": 0.9356, + "step": 580 + }, + { + "epoch": 0.03, + "learning_rate": 6.712564543889846e-05, + "loss": 1.0858, + "step": 585 + }, + { + "epoch": 0.03, + "learning_rate": 6.769936890418819e-05, + "loss": 0.9111, + "step": 590 + }, + { + "epoch": 0.03, + "learning_rate": 6.827309236947793e-05, + "loss": 0.9252, + "step": 595 + }, + { + "epoch": 0.03, + "learning_rate": 6.884681583476765e-05, + "loss": 1.0092, + "step": 600 + }, + { + "epoch": 0.03, + "learning_rate": 6.942053930005738e-05, + "loss": 0.9494, + "step": 605 + }, + { + "epoch": 0.03, + "learning_rate": 6.999426276534711e-05, + "loss": 0.939, + "step": 610 + }, + { + "epoch": 0.04, + "learning_rate": 7.056798623063683e-05, + "loss": 0.9906, + "step": 615 + }, + { + "epoch": 0.04, + "learning_rate": 7.114170969592657e-05, + "loss": 1.0128, + "step": 620 + }, + { + "epoch": 0.04, + "learning_rate": 7.17154331612163e-05, + "loss": 1.0403, + "step": 625 + }, + { + "epoch": 0.04, + "learning_rate": 7.228915662650602e-05, + "loss": 0.9104, + "step": 630 + }, + { + "epoch": 0.04, + "learning_rate": 7.286288009179575e-05, + "loss": 1.0033, + "step": 635 + }, + { + "epoch": 0.04, + "learning_rate": 7.343660355708549e-05, + "loss": 0.9468, + "step": 640 + }, + { + "epoch": 0.04, + "learning_rate": 7.401032702237521e-05, + "loss": 0.9881, + "step": 645 + }, + { + "epoch": 0.04, + "learning_rate": 7.458405048766494e-05, + "loss": 0.9598, + "step": 650 + }, + { + "epoch": 0.04, + "learning_rate": 7.515777395295467e-05, + "loss": 0.9252, + "step": 655 + }, + { + "epoch": 0.04, + "learning_rate": 7.573149741824441e-05, + "loss": 0.8959, + "step": 660 + }, + { + "epoch": 0.04, + "learning_rate": 7.630522088353414e-05, + "loss": 0.983, + "step": 665 + }, + { + "epoch": 0.04, + "learning_rate": 7.687894434882386e-05, + "loss": 1.0582, + "step": 670 + }, + { + "epoch": 0.04, + "learning_rate": 7.74526678141136e-05, + "loss": 0.9976, + "step": 675 + }, + { + "epoch": 0.04, + "learning_rate": 7.802639127940333e-05, + "loss": 1.0383, + "step": 680 + }, + { + "epoch": 0.04, + "learning_rate": 7.860011474469306e-05, + "loss": 0.9541, + "step": 685 + }, + { + "epoch": 0.04, + "learning_rate": 7.917383820998278e-05, + "loss": 1.0002, + "step": 690 + }, + { + "epoch": 0.04, + "learning_rate": 7.974756167527252e-05, + "loss": 0.998, + "step": 695 + }, + { + "epoch": 0.04, + "learning_rate": 8.032128514056225e-05, + "loss": 0.9839, + "step": 700 + }, + { + "epoch": 0.04, + "learning_rate": 8.089500860585198e-05, + "loss": 0.9201, + "step": 705 + }, + { + "epoch": 0.04, + "learning_rate": 8.146873207114172e-05, + "loss": 1.0262, + "step": 710 + }, + { + "epoch": 0.04, + "learning_rate": 8.204245553643144e-05, + "loss": 0.9774, + "step": 715 + }, + { + "epoch": 0.04, + "learning_rate": 8.261617900172117e-05, + "loss": 0.9659, + "step": 720 + }, + { + "epoch": 0.04, + "learning_rate": 8.31899024670109e-05, + "loss": 0.9591, + "step": 725 + }, + { + "epoch": 0.04, + "learning_rate": 8.376362593230064e-05, + "loss": 0.9, + "step": 730 + }, + { + "epoch": 0.04, + "learning_rate": 8.433734939759037e-05, + "loss": 1.0239, + "step": 735 + }, + { + "epoch": 0.04, + "learning_rate": 8.491107286288009e-05, + "loss": 1.0009, + "step": 740 + }, + { + "epoch": 0.04, + "learning_rate": 8.548479632816982e-05, + "loss": 0.9494, + "step": 745 + }, + { + "epoch": 0.04, + "learning_rate": 8.605851979345956e-05, + "loss": 0.9904, + "step": 750 + }, + { + "epoch": 0.04, + "learning_rate": 8.663224325874929e-05, + "loss": 1.0126, + "step": 755 + }, + { + "epoch": 0.04, + "learning_rate": 8.720596672403901e-05, + "loss": 1.0262, + "step": 760 + }, + { + "epoch": 0.04, + "learning_rate": 8.777969018932875e-05, + "loss": 1.0356, + "step": 765 + }, + { + "epoch": 0.04, + "learning_rate": 8.835341365461848e-05, + "loss": 0.957, + "step": 770 + }, + { + "epoch": 0.04, + "learning_rate": 8.892713711990821e-05, + "loss": 1.0135, + "step": 775 + }, + { + "epoch": 0.04, + "learning_rate": 8.950086058519795e-05, + "loss": 0.94, + "step": 780 + }, + { + "epoch": 0.05, + "learning_rate": 9.007458405048767e-05, + "loss": 0.9753, + "step": 785 + }, + { + "epoch": 0.05, + "learning_rate": 9.06483075157774e-05, + "loss": 0.9569, + "step": 790 + }, + { + "epoch": 0.05, + "learning_rate": 9.122203098106713e-05, + "loss": 0.9542, + "step": 795 + }, + { + "epoch": 0.05, + "learning_rate": 9.179575444635687e-05, + "loss": 0.9389, + "step": 800 + }, + { + "epoch": 0.05, + "learning_rate": 9.23694779116466e-05, + "loss": 0.9925, + "step": 805 + }, + { + "epoch": 0.05, + "learning_rate": 9.294320137693632e-05, + "loss": 0.9401, + "step": 810 + }, + { + "epoch": 0.05, + "learning_rate": 9.351692484222605e-05, + "loss": 1.0296, + "step": 815 + }, + { + "epoch": 0.05, + "learning_rate": 9.409064830751579e-05, + "loss": 0.9979, + "step": 820 + }, + { + "epoch": 0.05, + "learning_rate": 9.466437177280552e-05, + "loss": 0.9868, + "step": 825 + }, + { + "epoch": 0.05, + "learning_rate": 9.523809523809524e-05, + "loss": 0.9528, + "step": 830 + }, + { + "epoch": 0.05, + "learning_rate": 9.581181870338497e-05, + "loss": 0.9407, + "step": 835 + }, + { + "epoch": 0.05, + "learning_rate": 9.638554216867471e-05, + "loss": 0.93, + "step": 840 + }, + { + "epoch": 0.05, + "learning_rate": 9.695926563396444e-05, + "loss": 0.9299, + "step": 845 + }, + { + "epoch": 0.05, + "learning_rate": 9.753298909925417e-05, + "loss": 0.9063, + "step": 850 + }, + { + "epoch": 0.05, + "learning_rate": 9.81067125645439e-05, + "loss": 1.0166, + "step": 855 + }, + { + "epoch": 0.05, + "learning_rate": 9.868043602983363e-05, + "loss": 1.0016, + "step": 860 + }, + { + "epoch": 0.05, + "learning_rate": 9.925415949512336e-05, + "loss": 0.9345, + "step": 865 + }, + { + "epoch": 0.05, + "learning_rate": 9.982788296041308e-05, + "loss": 0.9582, + "step": 870 + }, + { + "epoch": 0.05, + "learning_rate": 0.00010040160642570282, + "loss": 1.0, + "step": 875 + }, + { + "epoch": 0.05, + "learning_rate": 0.00010097532989099255, + "loss": 0.9176, + "step": 880 + }, + { + "epoch": 0.05, + "learning_rate": 0.00010154905335628228, + "loss": 0.9909, + "step": 885 + }, + { + "epoch": 0.05, + "learning_rate": 0.000102122776821572, + "loss": 0.9756, + "step": 890 + }, + { + "epoch": 0.05, + "learning_rate": 0.00010269650028686174, + "loss": 1.0205, + "step": 895 + }, + { + "epoch": 0.05, + "learning_rate": 0.00010327022375215147, + "loss": 1.0203, + "step": 900 + }, + { + "epoch": 0.05, + "learning_rate": 0.00010384394721744119, + "loss": 1.0004, + "step": 905 + }, + { + "epoch": 0.05, + "learning_rate": 0.00010441767068273094, + "loss": 0.9371, + "step": 910 + }, + { + "epoch": 0.05, + "learning_rate": 0.00010499139414802066, + "loss": 0.9558, + "step": 915 + }, + { + "epoch": 0.05, + "learning_rate": 0.00010556511761331038, + "loss": 1.0647, + "step": 920 + }, + { + "epoch": 0.05, + "learning_rate": 0.00010613884107860012, + "loss": 1.0336, + "step": 925 + }, + { + "epoch": 0.05, + "learning_rate": 0.00010671256454388984, + "loss": 0.96, + "step": 930 + }, + { + "epoch": 0.05, + "learning_rate": 0.00010728628800917956, + "loss": 0.9684, + "step": 935 + }, + { + "epoch": 0.05, + "learning_rate": 0.00010786001147446931, + "loss": 1.0326, + "step": 940 + }, + { + "epoch": 0.05, + "learning_rate": 0.00010843373493975903, + "loss": 1.0274, + "step": 945 + }, + { + "epoch": 0.05, + "learning_rate": 0.00010900745840504878, + "loss": 1.0132, + "step": 950 + }, + { + "epoch": 0.05, + "learning_rate": 0.0001095811818703385, + "loss": 0.9982, + "step": 955 + }, + { + "epoch": 0.06, + "learning_rate": 0.00011015490533562822, + "loss": 0.9481, + "step": 960 + }, + { + "epoch": 0.06, + "learning_rate": 0.00011072862880091797, + "loss": 0.9013, + "step": 965 + }, + { + "epoch": 0.06, + "learning_rate": 0.00011130235226620769, + "loss": 0.9961, + "step": 970 + }, + { + "epoch": 0.06, + "learning_rate": 0.00011187607573149743, + "loss": 0.9674, + "step": 975 + }, + { + "epoch": 0.06, + "learning_rate": 0.00011244979919678715, + "loss": 0.9243, + "step": 980 + }, + { + "epoch": 0.06, + "learning_rate": 0.00011302352266207687, + "loss": 0.9551, + "step": 985 + }, + { + "epoch": 0.06, + "learning_rate": 0.00011359724612736662, + "loss": 1.0446, + "step": 990 + }, + { + "epoch": 0.06, + "learning_rate": 0.00011417096959265634, + "loss": 0.9859, + "step": 995 + }, + { + "epoch": 0.06, + "learning_rate": 0.00011474469305794609, + "loss": 0.9351, + "step": 1000 + }, + { + "epoch": 0.06, + "learning_rate": 0.0001153184165232358, + "loss": 0.9987, + "step": 1005 + }, + { + "epoch": 0.06, + "learning_rate": 0.00011589213998852553, + "loss": 1.0441, + "step": 1010 + }, + { + "epoch": 0.06, + "learning_rate": 0.00011646586345381527, + "loss": 0.9965, + "step": 1015 + }, + { + "epoch": 0.06, + "learning_rate": 0.000117039586919105, + "loss": 1.0476, + "step": 1020 + }, + { + "epoch": 0.06, + "learning_rate": 0.00011761331038439474, + "loss": 0.978, + "step": 1025 + }, + { + "epoch": 0.06, + "learning_rate": 0.00011818703384968446, + "loss": 0.9991, + "step": 1030 + }, + { + "epoch": 0.06, + "learning_rate": 0.00011876075731497418, + "loss": 0.9742, + "step": 1035 + }, + { + "epoch": 0.06, + "learning_rate": 0.00011933448078026393, + "loss": 0.9523, + "step": 1040 + }, + { + "epoch": 0.06, + "learning_rate": 0.00011990820424555365, + "loss": 0.9462, + "step": 1045 + }, + { + "epoch": 0.06, + "learning_rate": 0.0001204819277108434, + "loss": 0.9774, + "step": 1050 + }, + { + "epoch": 0.06, + "learning_rate": 0.00012105565117613311, + "loss": 0.9817, + "step": 1055 + }, + { + "epoch": 0.06, + "learning_rate": 0.00012162937464142283, + "loss": 0.9062, + "step": 1060 + }, + { + "epoch": 0.06, + "learning_rate": 0.00012220309810671257, + "loss": 0.9564, + "step": 1065 + }, + { + "epoch": 0.06, + "learning_rate": 0.0001227768215720023, + "loss": 0.9818, + "step": 1070 + }, + { + "epoch": 0.06, + "learning_rate": 0.000123350545037292, + "loss": 0.9531, + "step": 1075 + }, + { + "epoch": 0.06, + "learning_rate": 0.00012392426850258177, + "loss": 0.9669, + "step": 1080 + }, + { + "epoch": 0.06, + "learning_rate": 0.00012449799196787148, + "loss": 0.9073, + "step": 1085 + }, + { + "epoch": 0.06, + "learning_rate": 0.00012507171543316124, + "loss": 1.0106, + "step": 1090 + }, + { + "epoch": 0.06, + "learning_rate": 0.00012564543889845094, + "loss": 0.951, + "step": 1095 + }, + { + "epoch": 0.06, + "learning_rate": 0.00012621916236374068, + "loss": 1.0042, + "step": 1100 + }, + { + "epoch": 0.06, + "learning_rate": 0.0001267928858290304, + "loss": 1.0158, + "step": 1105 + }, + { + "epoch": 0.06, + "learning_rate": 0.00012736660929432014, + "loss": 1.0665, + "step": 1110 + }, + { + "epoch": 0.06, + "learning_rate": 0.00012794033275960988, + "loss": 0.9769, + "step": 1115 + }, + { + "epoch": 0.06, + "learning_rate": 0.0001285140562248996, + "loss": 0.9897, + "step": 1120 + }, + { + "epoch": 0.06, + "learning_rate": 0.00012908777969018932, + "loss": 1.0277, + "step": 1125 + }, + { + "epoch": 0.06, + "learning_rate": 0.00012966150315547908, + "loss": 0.999, + "step": 1130 + }, + { + "epoch": 0.07, + "learning_rate": 0.00013023522662076878, + "loss": 0.9439, + "step": 1135 + }, + { + "epoch": 0.07, + "learning_rate": 0.00013080895008605854, + "loss": 1.0495, + "step": 1140 + }, + { + "epoch": 0.07, + "learning_rate": 0.00013138267355134825, + "loss": 0.9381, + "step": 1145 + }, + { + "epoch": 0.07, + "learning_rate": 0.00013195639701663798, + "loss": 0.9482, + "step": 1150 + }, + { + "epoch": 0.07, + "learning_rate": 0.00013253012048192772, + "loss": 0.9986, + "step": 1155 + }, + { + "epoch": 0.07, + "learning_rate": 0.00013310384394721745, + "loss": 1.0784, + "step": 1160 + }, + { + "epoch": 0.07, + "learning_rate": 0.00013367756741250719, + "loss": 0.961, + "step": 1165 + }, + { + "epoch": 0.07, + "learning_rate": 0.00013425129087779692, + "loss": 0.9253, + "step": 1170 + }, + { + "epoch": 0.07, + "learning_rate": 0.00013482501434308663, + "loss": 0.9941, + "step": 1175 + }, + { + "epoch": 0.07, + "learning_rate": 0.00013539873780837639, + "loss": 0.9823, + "step": 1180 + }, + { + "epoch": 0.07, + "learning_rate": 0.0001359724612736661, + "loss": 0.9685, + "step": 1185 + }, + { + "epoch": 0.07, + "learning_rate": 0.00013654618473895585, + "loss": 0.9845, + "step": 1190 + }, + { + "epoch": 0.07, + "learning_rate": 0.00013711990820424556, + "loss": 0.9238, + "step": 1195 + }, + { + "epoch": 0.07, + "learning_rate": 0.0001376936316695353, + "loss": 0.971, + "step": 1200 + }, + { + "epoch": 0.07, + "learning_rate": 0.00013826735513482503, + "loss": 0.9483, + "step": 1205 + }, + { + "epoch": 0.07, + "learning_rate": 0.00013884107860011476, + "loss": 0.933, + "step": 1210 + }, + { + "epoch": 0.07, + "learning_rate": 0.00013941480206540447, + "loss": 1.0371, + "step": 1215 + }, + { + "epoch": 0.07, + "learning_rate": 0.00013998852553069423, + "loss": 0.9498, + "step": 1220 + }, + { + "epoch": 0.07, + "learning_rate": 0.00014056224899598393, + "loss": 0.9524, + "step": 1225 + }, + { + "epoch": 0.07, + "learning_rate": 0.00014113597246127367, + "loss": 0.9567, + "step": 1230 + }, + { + "epoch": 0.07, + "learning_rate": 0.0001417096959265634, + "loss": 0.9739, + "step": 1235 + }, + { + "epoch": 0.07, + "learning_rate": 0.00014228341939185313, + "loss": 0.9551, + "step": 1240 + }, + { + "epoch": 0.07, + "learning_rate": 0.00014285714285714287, + "loss": 0.9942, + "step": 1245 + }, + { + "epoch": 0.07, + "learning_rate": 0.0001434308663224326, + "loss": 0.9211, + "step": 1250 + }, + { + "epoch": 0.07, + "learning_rate": 0.00014400458978772233, + "loss": 1.0062, + "step": 1255 + }, + { + "epoch": 0.07, + "learning_rate": 0.00014457831325301204, + "loss": 0.9997, + "step": 1260 + }, + { + "epoch": 0.07, + "learning_rate": 0.00014515203671830177, + "loss": 0.9647, + "step": 1265 + }, + { + "epoch": 0.07, + "learning_rate": 0.0001457257601835915, + "loss": 1.0235, + "step": 1270 + }, + { + "epoch": 0.07, + "learning_rate": 0.00014629948364888124, + "loss": 1.0429, + "step": 1275 + }, + { + "epoch": 0.07, + "learning_rate": 0.00014687320711417098, + "loss": 0.9401, + "step": 1280 + }, + { + "epoch": 0.07, + "learning_rate": 0.0001474469305794607, + "loss": 0.9661, + "step": 1285 + }, + { + "epoch": 0.07, + "learning_rate": 0.00014802065404475042, + "loss": 0.9528, + "step": 1290 + }, + { + "epoch": 0.07, + "learning_rate": 0.00014859437751004018, + "loss": 0.9837, + "step": 1295 + }, + { + "epoch": 0.07, + "learning_rate": 0.00014916810097532988, + "loss": 1.0043, + "step": 1300 + }, + { + "epoch": 0.07, + "learning_rate": 0.00014974182444061964, + "loss": 1.0775, + "step": 1305 + }, + { + "epoch": 0.08, + "learning_rate": 0.00015031554790590935, + "loss": 0.9867, + "step": 1310 + }, + { + "epoch": 0.08, + "learning_rate": 0.00015088927137119908, + "loss": 0.9846, + "step": 1315 + }, + { + "epoch": 0.08, + "learning_rate": 0.00015146299483648882, + "loss": 0.9796, + "step": 1320 + }, + { + "epoch": 0.08, + "learning_rate": 0.00015203671830177855, + "loss": 1.053, + "step": 1325 + }, + { + "epoch": 0.08, + "learning_rate": 0.00015261044176706828, + "loss": 0.944, + "step": 1330 + }, + { + "epoch": 0.08, + "learning_rate": 0.00015318416523235802, + "loss": 0.9963, + "step": 1335 + }, + { + "epoch": 0.08, + "learning_rate": 0.00015375788869764772, + "loss": 0.9996, + "step": 1340 + }, + { + "epoch": 0.08, + "learning_rate": 0.00015433161216293748, + "loss": 1.0571, + "step": 1345 + }, + { + "epoch": 0.08, + "learning_rate": 0.0001549053356282272, + "loss": 0.9715, + "step": 1350 + }, + { + "epoch": 0.08, + "learning_rate": 0.00015547905909351695, + "loss": 0.9664, + "step": 1355 + }, + { + "epoch": 0.08, + "learning_rate": 0.00015605278255880666, + "loss": 1.0288, + "step": 1360 + }, + { + "epoch": 0.08, + "learning_rate": 0.0001566265060240964, + "loss": 0.9653, + "step": 1365 + }, + { + "epoch": 0.08, + "learning_rate": 0.00015720022948938613, + "loss": 0.9467, + "step": 1370 + }, + { + "epoch": 0.08, + "learning_rate": 0.00015777395295467586, + "loss": 0.9892, + "step": 1375 + }, + { + "epoch": 0.08, + "learning_rate": 0.00015834767641996557, + "loss": 1.0086, + "step": 1380 + }, + { + "epoch": 0.08, + "learning_rate": 0.00015892139988525533, + "loss": 1.0353, + "step": 1385 + }, + { + "epoch": 0.08, + "learning_rate": 0.00015949512335054503, + "loss": 0.9849, + "step": 1390 + }, + { + "epoch": 0.08, + "learning_rate": 0.0001600688468158348, + "loss": 0.9907, + "step": 1395 + }, + { + "epoch": 0.08, + "learning_rate": 0.0001606425702811245, + "loss": 1.0342, + "step": 1400 + }, + { + "epoch": 0.08, + "learning_rate": 0.00016121629374641423, + "loss": 1.0026, + "step": 1405 + }, + { + "epoch": 0.08, + "learning_rate": 0.00016179001721170397, + "loss": 0.9064, + "step": 1410 + }, + { + "epoch": 0.08, + "learning_rate": 0.0001623637406769937, + "loss": 0.9871, + "step": 1415 + }, + { + "epoch": 0.08, + "learning_rate": 0.00016293746414228343, + "loss": 1.0059, + "step": 1420 + }, + { + "epoch": 0.08, + "learning_rate": 0.00016351118760757317, + "loss": 0.9875, + "step": 1425 + }, + { + "epoch": 0.08, + "learning_rate": 0.00016408491107286287, + "loss": 0.9565, + "step": 1430 + }, + { + "epoch": 0.08, + "learning_rate": 0.00016465863453815263, + "loss": 0.945, + "step": 1435 + }, + { + "epoch": 0.08, + "learning_rate": 0.00016523235800344234, + "loss": 0.9648, + "step": 1440 + }, + { + "epoch": 0.08, + "learning_rate": 0.0001658060814687321, + "loss": 1.0576, + "step": 1445 + }, + { + "epoch": 0.08, + "learning_rate": 0.0001663798049340218, + "loss": 0.9933, + "step": 1450 + }, + { + "epoch": 0.08, + "learning_rate": 0.00016695352839931154, + "loss": 1.0253, + "step": 1455 + }, + { + "epoch": 0.08, + "learning_rate": 0.00016752725186460127, + "loss": 0.9739, + "step": 1460 + }, + { + "epoch": 0.08, + "learning_rate": 0.000168100975329891, + "loss": 1.0493, + "step": 1465 + }, + { + "epoch": 0.08, + "learning_rate": 0.00016867469879518074, + "loss": 1.0177, + "step": 1470 + }, + { + "epoch": 0.08, + "learning_rate": 0.00016924842226047048, + "loss": 1.0153, + "step": 1475 + }, + { + "epoch": 0.08, + "learning_rate": 0.00016982214572576018, + "loss": 0.9422, + "step": 1480 + }, + { + "epoch": 0.09, + "learning_rate": 0.00017039586919104992, + "loss": 1.0439, + "step": 1485 + }, + { + "epoch": 0.09, + "learning_rate": 0.00017096959265633965, + "loss": 0.9329, + "step": 1490 + }, + { + "epoch": 0.09, + "learning_rate": 0.00017154331612162938, + "loss": 0.9185, + "step": 1495 + }, + { + "epoch": 0.09, + "learning_rate": 0.00017211703958691912, + "loss": 0.9594, + "step": 1500 + }, + { + "epoch": 0.09, + "learning_rate": 0.00017269076305220885, + "loss": 0.9732, + "step": 1505 + }, + { + "epoch": 0.09, + "learning_rate": 0.00017326448651749858, + "loss": 1.0135, + "step": 1510 + }, + { + "epoch": 0.09, + "learning_rate": 0.0001738382099827883, + "loss": 1.0635, + "step": 1515 + }, + { + "epoch": 0.09, + "learning_rate": 0.00017441193344807802, + "loss": 1.0554, + "step": 1520 + }, + { + "epoch": 0.09, + "learning_rate": 0.00017498565691336776, + "loss": 1.0567, + "step": 1525 + }, + { + "epoch": 0.09, + "learning_rate": 0.0001755593803786575, + "loss": 1.0248, + "step": 1530 + }, + { + "epoch": 0.09, + "learning_rate": 0.00017613310384394722, + "loss": 1.0039, + "step": 1535 + }, + { + "epoch": 0.09, + "learning_rate": 0.00017670682730923696, + "loss": 0.9948, + "step": 1540 + }, + { + "epoch": 0.09, + "learning_rate": 0.00017728055077452666, + "loss": 1.0919, + "step": 1545 + }, + { + "epoch": 0.09, + "learning_rate": 0.00017785427423981642, + "loss": 1.0281, + "step": 1550 + }, + { + "epoch": 0.09, + "learning_rate": 0.00017842799770510613, + "loss": 0.9439, + "step": 1555 + }, + { + "epoch": 0.09, + "learning_rate": 0.0001790017211703959, + "loss": 1.1149, + "step": 1560 + }, + { + "epoch": 0.09, + "learning_rate": 0.0001795754446356856, + "loss": 1.0071, + "step": 1565 + }, + { + "epoch": 0.09, + "learning_rate": 0.00018014916810097533, + "loss": 1.1358, + "step": 1570 + }, + { + "epoch": 0.09, + "learning_rate": 0.00018072289156626507, + "loss": 1.389, + "step": 1575 + }, + { + "epoch": 0.09, + "learning_rate": 0.0001812966150315548, + "loss": 1.8714, + "step": 1580 + }, + { + "epoch": 0.09, + "learning_rate": 0.00018187033849684453, + "loss": 2.5047, + "step": 1585 + }, + { + "epoch": 0.09, + "learning_rate": 0.00018244406196213427, + "loss": 2.6965, + "step": 1590 + }, + { + "epoch": 0.09, + "learning_rate": 0.00018301778542742397, + "loss": 3.029, + "step": 1595 + }, + { + "epoch": 0.09, + "learning_rate": 0.00018359150889271373, + "loss": 3.8631, + "step": 1600 + }, + { + "epoch": 0.09, + "learning_rate": 0.00018416523235800344, + "loss": 3.9482, + "step": 1605 + }, + { + "epoch": 0.09, + "learning_rate": 0.0001847389558232932, + "loss": 4.1312, + "step": 1610 + }, + { + "epoch": 0.09, + "learning_rate": 0.0001853126792885829, + "loss": 3.8592, + "step": 1615 + }, + { + "epoch": 0.09, + "learning_rate": 0.00018588640275387264, + "loss": 3.7334, + "step": 1620 + }, + { + "epoch": 0.09, + "learning_rate": 0.00018646012621916237, + "loss": 3.9385, + "step": 1625 + }, + { + "epoch": 0.09, + "learning_rate": 0.0001870338496844521, + "loss": 3.7088, + "step": 1630 + }, + { + "epoch": 0.09, + "learning_rate": 0.00018760757314974184, + "loss": 3.9882, + "step": 1635 + }, + { + "epoch": 0.09, + "learning_rate": 0.00018818129661503157, + "loss": 3.6985, + "step": 1640 + }, + { + "epoch": 0.09, + "learning_rate": 0.00018875502008032128, + "loss": 3.9489, + "step": 1645 + }, + { + "epoch": 0.09, + "learning_rate": 0.00018932874354561104, + "loss": 3.7395, + "step": 1650 + }, + { + "epoch": 0.09, + "learning_rate": 0.00018990246701090075, + "loss": 3.8121, + "step": 1655 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019047619047619048, + "loss": 3.6156, + "step": 1660 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019104991394148021, + "loss": 3.6947, + "step": 1665 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019162363740676995, + "loss": 3.7272, + "step": 1670 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019219736087205968, + "loss": 3.4795, + "step": 1675 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019277108433734942, + "loss": 3.5508, + "step": 1680 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019334480780263912, + "loss": 3.4919, + "step": 1685 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019391853126792888, + "loss": 3.4661, + "step": 1690 + }, + { + "epoch": 0.1, + "learning_rate": 0.0001944922547332186, + "loss": 3.7751, + "step": 1695 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019506597819850835, + "loss": 3.5772, + "step": 1700 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019563970166379806, + "loss": 4.0097, + "step": 1705 + }, + { + "epoch": 0.1, + "learning_rate": 0.0001962134251290878, + "loss": 4.098, + "step": 1710 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019678714859437752, + "loss": 3.6938, + "step": 1715 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019736087205966726, + "loss": 4.0025, + "step": 1720 + }, + { + "epoch": 0.1, + "learning_rate": 0.000197934595524957, + "loss": 3.8794, + "step": 1725 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019850831899024672, + "loss": 4.074, + "step": 1730 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019908204245553643, + "loss": 3.9138, + "step": 1735 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019965576592082616, + "loss": 3.8634, + "step": 1740 + }, + { + "epoch": 0.1, + "learning_rate": 0.0001999999919775815, + "loss": 4.0523, + "step": 1745 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019999990172538815, + "loss": 4.1861, + "step": 1750 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019999971119306908, + "loss": 4.1269, + "step": 1755 + }, + { + "epoch": 0.1, + "learning_rate": 0.0001999994203808154, + "loss": 4.0752, + "step": 1760 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019999902928891875, + "loss": 4.4602, + "step": 1765 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019999853791777126, + "loss": 4.5566, + "step": 1770 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019999794626786573, + "loss": 4.4079, + "step": 1775 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019999725433979544, + "loss": 4.2193, + "step": 1780 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019999646213425426, + "loss": 4.1227, + "step": 1785 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019999556965203663, + "loss": 4.1697, + "step": 1790 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019999457689403753, + "loss": 4.1337, + "step": 1795 + }, + { + "epoch": 0.1, + "learning_rate": 0.0001999934838612525, + "loss": 3.9798, + "step": 1800 + }, + { + "epoch": 0.1, + "learning_rate": 0.0001999922905547776, + "loss": 3.9761, + "step": 1805 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019999099697580954, + "loss": 3.7486, + "step": 1810 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019998960312564548, + "loss": 3.9724, + "step": 1815 + }, + { + "epoch": 0.1, + "learning_rate": 0.0001999881090056832, + "loss": 3.9185, + "step": 1820 + }, + { + "epoch": 0.1, + "learning_rate": 0.000199986514617421, + "loss": 3.6141, + "step": 1825 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019998481996245772, + "loss": 3.9031, + "step": 1830 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019998302504249278, + "loss": 3.765, + "step": 1835 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019998112985932613, + "loss": 3.6993, + "step": 1840 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019997913441485826, + "loss": 3.7082, + "step": 1845 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019997703871109021, + "loss": 3.9119, + "step": 1850 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019997484275012357, + "loss": 3.915, + "step": 1855 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019997254653416043, + "loss": 3.9133, + "step": 1860 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019997015006550342, + "loss": 3.8751, + "step": 1865 + }, + { + "epoch": 0.11, + "learning_rate": 0.0001999676533465558, + "loss": 4.5563, + "step": 1870 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019996505637982122, + "loss": 4.1213, + "step": 1875 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019996235916790392, + "loss": 3.7805, + "step": 1880 + }, + { + "epoch": 0.11, + "learning_rate": 0.0001999595617135087, + "loss": 4.1168, + "step": 1885 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019995666401944085, + "loss": 4.1894, + "step": 1890 + }, + { + "epoch": 0.11, + "learning_rate": 0.0001999536660886062, + "loss": 3.9056, + "step": 1895 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019995056792401105, + "loss": 3.721, + "step": 1900 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019994736952876226, + "loss": 3.7291, + "step": 1905 + }, + { + "epoch": 0.11, + "learning_rate": 0.0001999440709060672, + "loss": 3.9405, + "step": 1910 + }, + { + "epoch": 0.11, + "learning_rate": 0.0001999406720592337, + "loss": 4.2803, + "step": 1915 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019993717299167014, + "loss": 4.0277, + "step": 1920 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019993357370688542, + "loss": 3.6311, + "step": 1925 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019992987420848891, + "loss": 3.598, + "step": 1930 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019992607450019048, + "loss": 4.0497, + "step": 1935 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019992217458580043, + "loss": 4.165, + "step": 1940 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019991817446922964, + "loss": 3.7754, + "step": 1945 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019991407415448947, + "loss": 3.8028, + "step": 1950 + }, + { + "epoch": 0.11, + "learning_rate": 0.0001999098736456917, + "loss": 3.8109, + "step": 1955 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019990557294704856, + "loss": 3.9275, + "step": 1960 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019990117206287287, + "loss": 3.8176, + "step": 1965 + }, + { + "epoch": 0.11, + "learning_rate": 0.0001998966709975778, + "loss": 3.817, + "step": 1970 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019989206975567708, + "loss": 3.6848, + "step": 1975 + }, + { + "epoch": 0.11, + "learning_rate": 0.0001998873683417848, + "loss": 3.7611, + "step": 1980 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019988256676061554, + "loss": 3.6979, + "step": 1985 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019987766501698438, + "loss": 3.7871, + "step": 1990 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019987266311580676, + "loss": 3.9897, + "step": 1995 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019986756106209864, + "loss": 3.9645, + "step": 2000 + }, + { + "epoch": 0.12, + "learning_rate": 0.0001998623588609763, + "loss": 3.8128, + "step": 2005 + }, + { + "epoch": 0.12, + "learning_rate": 0.0001998570565176566, + "loss": 4.0431, + "step": 2010 + }, + { + "epoch": 0.12, + "learning_rate": 0.0001998516540374567, + "loss": 4.1457, + "step": 2015 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019984615142579424, + "loss": 3.8534, + "step": 2020 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019984054868818724, + "loss": 3.7402, + "step": 2025 + }, + { + "epoch": 0.12, + "learning_rate": 0.0001998348458302541, + "loss": 4.2085, + "step": 2030 + }, + { + "epoch": 0.12, + "learning_rate": 0.0001998290428577137, + "loss": 4.1647, + "step": 2035 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019982313977638528, + "loss": 3.9169, + "step": 2040 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019981713659218846, + "loss": 3.7777, + "step": 2045 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019981103331114322, + "loss": 3.7297, + "step": 2050 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019980482993936995, + "loss": 3.9071, + "step": 2055 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019979852648308947, + "loss": 3.863, + "step": 2060 + }, + { + "epoch": 0.12, + "learning_rate": 0.0001997921229486228, + "loss": 3.8292, + "step": 2065 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019978561934239149, + "loss": 3.9396, + "step": 2070 + }, + { + "epoch": 0.12, + "learning_rate": 0.0001997790156709173, + "loss": 3.9623, + "step": 2075 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019977231194082248, + "loss": 3.9638, + "step": 2080 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019976550815882952, + "loss": 4.0928, + "step": 2085 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019975860433176128, + "loss": 4.1301, + "step": 2090 + }, + { + "epoch": 0.12, + "learning_rate": 0.0001997516004665409, + "loss": 3.9469, + "step": 2095 + }, + { + "epoch": 0.12, + "learning_rate": 0.0001997444965701919, + "loss": 3.8513, + "step": 2100 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019973729264983808, + "loss": 3.9729, + "step": 2105 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019972998871270353, + "loss": 3.9228, + "step": 2110 + }, + { + "epoch": 0.12, + "learning_rate": 0.0001997225847661127, + "loss": 3.9859, + "step": 2115 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019971508081749023, + "loss": 3.7867, + "step": 2120 + }, + { + "epoch": 0.12, + "learning_rate": 0.0001997074768743611, + "loss": 4.0779, + "step": 2125 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019969977294435057, + "loss": 4.2388, + "step": 2130 + }, + { + "epoch": 0.12, + "learning_rate": 0.0001996919690351842, + "loss": 3.9913, + "step": 2135 + }, + { + "epoch": 0.12, + "learning_rate": 0.0001996840651546877, + "loss": 3.9429, + "step": 2140 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019967606131078718, + "loss": 3.7879, + "step": 2145 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019966795751150885, + "loss": 3.6277, + "step": 2150 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019965975376497918, + "loss": 3.7193, + "step": 2155 + }, + { + "epoch": 0.12, + "learning_rate": 0.000199651450079425, + "loss": 4.1212, + "step": 2160 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019964304646317323, + "loss": 4.3442, + "step": 2165 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019963454292465103, + "loss": 4.1223, + "step": 2170 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019962593947238578, + "loss": 3.9412, + "step": 2175 + }, + { + "epoch": 0.13, + "learning_rate": 0.000199617236115005, + "loss": 4.104, + "step": 2180 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019960843286123648, + "loss": 4.1006, + "step": 2185 + }, + { + "epoch": 0.13, + "learning_rate": 0.0001995995297199081, + "loss": 4.1352, + "step": 2190 + }, + { + "epoch": 0.13, + "learning_rate": 0.000199590526699948, + "loss": 4.1343, + "step": 2195 + }, + { + "epoch": 0.13, + "learning_rate": 0.0001995814238103844, + "loss": 3.9857, + "step": 2200 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019957222106034572, + "loss": 4.125, + "step": 2205 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019956291845906046, + "loss": 4.1851, + "step": 2210 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019955351601585731, + "loss": 3.9971, + "step": 2215 + }, + { + "epoch": 0.13, + "learning_rate": 0.0001995440137401651, + "loss": 4.009, + "step": 2220 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019953441164151264, + "loss": 3.8323, + "step": 2225 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019952470972952902, + "loss": 3.6549, + "step": 2230 + }, + { + "epoch": 0.13, + "learning_rate": 0.0001995149080139433, + "loss": 3.6998, + "step": 2235 + }, + { + "epoch": 0.13, + "learning_rate": 0.0001995050065045847, + "loss": 3.9666, + "step": 2240 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019949500521138243, + "loss": 4.2325, + "step": 2245 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019948490414436584, + "loss": 4.2426, + "step": 2250 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019947470331366427, + "loss": 4.0235, + "step": 2255 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019946440272950716, + "loss": 3.9708, + "step": 2260 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019945400240222396, + "loss": 3.9884, + "step": 2265 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019944350234224416, + "loss": 3.9914, + "step": 2270 + }, + { + "epoch": 0.13, + "learning_rate": 0.0001994329025600972, + "loss": 3.9556, + "step": 2275 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019942220306641258, + "loss": 3.8435, + "step": 2280 + }, + { + "epoch": 0.13, + "learning_rate": 0.0001994114038719198, + "loss": 3.8276, + "step": 2285 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019940050498744828, + "loss": 3.797, + "step": 2290 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019938950642392746, + "loss": 3.8611, + "step": 2295 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019937840819238677, + "loss": 3.9601, + "step": 2300 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019936721030395547, + "loss": 4.0263, + "step": 2305 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019935591276986286, + "loss": 4.1813, + "step": 2310 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019934451560143815, + "loss": 4.0571, + "step": 2315 + }, + { + "epoch": 0.13, + "learning_rate": 0.0001993330188101104, + "loss": 3.9657, + "step": 2320 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019932142240740866, + "loss": 4.0322, + "step": 2325 + }, + { + "epoch": 0.13, + "learning_rate": 0.0001993097264049618, + "loss": 3.8831, + "step": 2330 + }, + { + "epoch": 0.13, + "learning_rate": 0.0001992979308144986, + "loss": 3.817, + "step": 2335 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019928603564784773, + "loss": 3.8604, + "step": 2340 + }, + { + "epoch": 0.13, + "learning_rate": 0.0001992740409169377, + "loss": 3.9535, + "step": 2345 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019926194663379677, + "loss": 4.0421, + "step": 2350 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019924975281055324, + "loss": 4.0244, + "step": 2355 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019923745945943502, + "loss": 3.9142, + "step": 2360 + }, + { + "epoch": 0.14, + "learning_rate": 0.0001992250665927699, + "loss": 3.7527, + "step": 2365 + }, + { + "epoch": 0.14, + "learning_rate": 0.0001992125742229855, + "loss": 3.6311, + "step": 2370 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019919998236260923, + "loss": 3.6715, + "step": 2375 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019918729102426816, + "loss": 4.1127, + "step": 2380 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019917450022068927, + "loss": 4.0694, + "step": 2385 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019916160996469914, + "loss": 4.0454, + "step": 2390 + }, + { + "epoch": 0.14, + "learning_rate": 0.0001991486202692242, + "loss": 3.9685, + "step": 2395 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019913553114729053, + "loss": 4.0583, + "step": 2400 + }, + { + "epoch": 0.14, + "learning_rate": 0.0001991223426120239, + "loss": 3.898, + "step": 2405 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019910905467664987, + "loss": 3.7952, + "step": 2410 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019909566735449354, + "loss": 3.7981, + "step": 2415 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019908218065897978, + "loss": 3.8415, + "step": 2420 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019906859460363307, + "loss": 3.7588, + "step": 2425 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019905490920207755, + "loss": 3.7436, + "step": 2430 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019904112446803699, + "loss": 3.7062, + "step": 2435 + }, + { + "epoch": 0.14, + "learning_rate": 0.0001990272404153347, + "loss": 3.8589, + "step": 2440 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019901325705789366, + "loss": 3.8012, + "step": 2445 + }, + { + "epoch": 0.14, + "learning_rate": 0.0001989991744097364, + "loss": 3.8096, + "step": 2450 + }, + { + "epoch": 0.14, + "learning_rate": 0.0001989849924849851, + "loss": 3.7199, + "step": 2455 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019897071129786132, + "loss": 3.7549, + "step": 2460 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019895633086268637, + "loss": 3.822, + "step": 2465 + }, + { + "epoch": 0.14, + "learning_rate": 0.0001989418511938809, + "loss": 3.808, + "step": 2470 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019892727230596519, + "loss": 3.7525, + "step": 2475 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019891259421355895, + "loss": 3.7883, + "step": 2480 + }, + { + "epoch": 0.14, + "learning_rate": 0.0001988978169313815, + "loss": 3.7333, + "step": 2485 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019888294047425143, + "loss": 3.727, + "step": 2490 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019886796485708692, + "loss": 3.7949, + "step": 2495 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019885289009490556, + "loss": 3.7852, + "step": 2500 + }, + { + "epoch": 0.14, + "learning_rate": 0.0001988377162028243, + "loss": 3.7526, + "step": 2505 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019882244319605966, + "loss": 3.6963, + "step": 2510 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019880707108992738, + "loss": 3.5769, + "step": 2515 + }, + { + "epoch": 0.14, + "learning_rate": 0.0001987915998998426, + "loss": 3.8725, + "step": 2520 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019877602964131995, + "loss": 3.8018, + "step": 2525 + }, + { + "epoch": 0.15, + "learning_rate": 0.0001987603603299733, + "loss": 3.9893, + "step": 2530 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019874459198151583, + "loss": 3.9219, + "step": 2535 + }, + { + "epoch": 0.15, + "learning_rate": 0.0001987287246117601, + "loss": 3.8392, + "step": 2540 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019871275823661795, + "loss": 3.8697, + "step": 2545 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019869669287210046, + "loss": 3.9055, + "step": 2550 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019868052853431808, + "loss": 3.8216, + "step": 2555 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019866426523948037, + "loss": 3.7507, + "step": 2560 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019864790300389625, + "loss": 3.8071, + "step": 2565 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019863144184397376, + "loss": 3.7521, + "step": 2570 + }, + { + "epoch": 0.15, + "learning_rate": 0.0001986148817762203, + "loss": 3.6759, + "step": 2575 + }, + { + "epoch": 0.15, + "learning_rate": 0.0001985982228172422, + "loss": 3.7664, + "step": 2580 + }, + { + "epoch": 0.15, + "learning_rate": 0.0001985814649837452, + "loss": 3.835, + "step": 2585 + }, + { + "epoch": 0.15, + "learning_rate": 0.000198564608292534, + "loss": 4.0195, + "step": 2590 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019854765276051264, + "loss": 3.8679, + "step": 2595 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019853059840468408, + "loss": 3.9831, + "step": 2600 + }, + { + "epoch": 0.15, + "learning_rate": 0.0001985134452421505, + "loss": 3.8295, + "step": 2605 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019849619329011315, + "loss": 3.8679, + "step": 2610 + }, + { + "epoch": 0.15, + "learning_rate": 0.0001984788425658723, + "loss": 3.7208, + "step": 2615 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019846139308682729, + "loss": 3.906, + "step": 2620 + }, + { + "epoch": 0.15, + "learning_rate": 0.0001984438448704765, + "loss": 3.8421, + "step": 2625 + }, + { + "epoch": 0.15, + "learning_rate": 0.0001984261979344173, + "loss": 3.8865, + "step": 2630 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019840845229634612, + "loss": 3.8744, + "step": 2635 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019839060797405833, + "loss": 3.6772, + "step": 2640 + }, + { + "epoch": 0.15, + "learning_rate": 0.0001983726649854482, + "loss": 3.5666, + "step": 2645 + }, + { + "epoch": 0.15, + "learning_rate": 0.0001983546233485091, + "loss": 3.625, + "step": 2650 + }, + { + "epoch": 0.15, + "learning_rate": 0.0001983364830813331, + "loss": 3.9214, + "step": 2655 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019831824420211137, + "loss": 3.8779, + "step": 2660 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019829990672913387, + "loss": 3.693, + "step": 2665 + }, + { + "epoch": 0.15, + "learning_rate": 0.0001982814706807895, + "loss": 3.5565, + "step": 2670 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019826293607556593, + "loss": 3.5689, + "step": 2675 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019824430293204973, + "loss": 3.8407, + "step": 2680 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019822557126892627, + "loss": 3.9356, + "step": 2685 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019820674110497966, + "loss": 3.947, + "step": 2690 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019818781245909285, + "loss": 4.0446, + "step": 2695 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019816878535024754, + "loss": 4.0271, + "step": 2700 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019814965979752413, + "loss": 3.9889, + "step": 2705 + }, + { + "epoch": 0.16, + "learning_rate": 0.0001981304358201018, + "loss": 3.849, + "step": 2710 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019811111343725842, + "loss": 3.8934, + "step": 2715 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019809169266837043, + "loss": 3.6373, + "step": 2720 + }, + { + "epoch": 0.16, + "learning_rate": 0.0001980721735329131, + "loss": 3.8044, + "step": 2725 + }, + { + "epoch": 0.16, + "learning_rate": 0.0001980525560504602, + "loss": 3.9025, + "step": 2730 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019803284024068427, + "loss": 3.9514, + "step": 2735 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019801302612335628, + "loss": 4.0759, + "step": 2740 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019799311371834595, + "loss": 4.0086, + "step": 2745 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019797310304562143, + "loss": 3.9306, + "step": 2750 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019795299412524945, + "loss": 3.7961, + "step": 2755 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019793278697739533, + "loss": 3.8123, + "step": 2760 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019791248162232285, + "loss": 3.8225, + "step": 2765 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019789207808039425, + "loss": 3.8726, + "step": 2770 + }, + { + "epoch": 0.16, + "learning_rate": 0.0001978715763720702, + "loss": 3.9609, + "step": 2775 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019785097651790992, + "loss": 4.164, + "step": 2780 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019783027853857097, + "loss": 4.1816, + "step": 2785 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019780948245480933, + "loss": 4.0187, + "step": 2790 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019778858828747934, + "loss": 3.8095, + "step": 2795 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019776759605753377, + "loss": 3.9314, + "step": 2800 + }, + { + "epoch": 0.16, + "learning_rate": 0.0001977465057860236, + "loss": 3.9116, + "step": 2805 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019772531749409828, + "loss": 3.8993, + "step": 2810 + }, + { + "epoch": 0.16, + "learning_rate": 0.0001977040312030054, + "loss": 3.9434, + "step": 2815 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019768264693409098, + "loss": 4.136, + "step": 2820 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019766116470879913, + "loss": 4.0453, + "step": 2825 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019763958454867235, + "loss": 3.9698, + "step": 2830 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019761790647535124, + "loss": 3.9778, + "step": 2835 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019759613051057462, + "loss": 3.8023, + "step": 2840 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019757425667617945, + "loss": 3.8562, + "step": 2845 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019755228499410092, + "loss": 3.7574, + "step": 2850 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019753021548637222, + "loss": 3.9049, + "step": 2855 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019750804817512477, + "loss": 4.0198, + "step": 2860 + }, + { + "epoch": 0.16, + "learning_rate": 0.0001974857830825879, + "loss": 4.0176, + "step": 2865 + }, + { + "epoch": 0.16, + "learning_rate": 0.0001974634202310892, + "loss": 3.9994, + "step": 2870 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019744095964305413, + "loss": 3.8939, + "step": 2875 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019741840134100623, + "loss": 3.9773, + "step": 2880 + }, + { + "epoch": 0.17, + "learning_rate": 0.000197395745347567, + "loss": 3.8911, + "step": 2885 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019737299168545597, + "loss": 3.9289, + "step": 2890 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019735014037749053, + "loss": 3.8102, + "step": 2895 + }, + { + "epoch": 0.17, + "learning_rate": 0.000197327191446586, + "loss": 3.7984, + "step": 2900 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019730414491575564, + "loss": 4.0121, + "step": 2905 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019728100080811057, + "loss": 3.9728, + "step": 2910 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019725775914685977, + "loss": 3.9335, + "step": 2915 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019723441995531, + "loss": 3.8701, + "step": 2920 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019721098325686584, + "loss": 3.8718, + "step": 2925 + }, + { + "epoch": 0.17, + "learning_rate": 0.0001971874490750297, + "loss": 3.8089, + "step": 2930 + }, + { + "epoch": 0.17, + "learning_rate": 0.0001971638174334017, + "loss": 3.9738, + "step": 2935 + }, + { + "epoch": 0.17, + "learning_rate": 0.0001971400883556797, + "loss": 4.1796, + "step": 2940 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019711626186565929, + "loss": 4.2039, + "step": 2945 + }, + { + "epoch": 0.17, + "learning_rate": 0.0001970923379872337, + "loss": 4.0764, + "step": 2950 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019706831674439382, + "loss": 3.9673, + "step": 2955 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019704419816122826, + "loss": 3.9997, + "step": 2960 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019701998226192319, + "loss": 3.9886, + "step": 2965 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019699566907076236, + "loss": 3.975, + "step": 2970 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019697125861212707, + "loss": 3.8967, + "step": 2975 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019694675091049617, + "loss": 3.9401, + "step": 2980 + }, + { + "epoch": 0.17, + "learning_rate": 0.0001969221459904461, + "loss": 3.8775, + "step": 2985 + }, + { + "epoch": 0.17, + "learning_rate": 0.0001968974438766507, + "loss": 4.0269, + "step": 2990 + }, + { + "epoch": 0.17, + "learning_rate": 0.0001968726445938813, + "loss": 4.1525, + "step": 2995 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019684774816700664, + "loss": 4.1041, + "step": 3000 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019682275462099298, + "loss": 3.9867, + "step": 3005 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019679766398090383, + "loss": 3.9841, + "step": 3010 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019677247627190026, + "loss": 4.1187, + "step": 3015 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019674719151924043, + "loss": 4.0399, + "step": 3020 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019672180974828, + "loss": 4.1619, + "step": 3025 + }, + { + "epoch": 0.17, + "learning_rate": 0.0001966963309844719, + "loss": 4.0161, + "step": 3030 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019667075525336622, + "loss": 4.1612, + "step": 3035 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019664508258061044, + "loss": 4.1662, + "step": 3040 + }, + { + "epoch": 0.17, + "learning_rate": 0.0001966193129919491, + "loss": 4.1021, + "step": 3045 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019659344651322405, + "loss": 4.0468, + "step": 3050 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019656748317037424, + "loss": 4.0429, + "step": 3055 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019654142298943574, + "loss": 4.1886, + "step": 3060 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019651526599654182, + "loss": 4.1133, + "step": 3065 + }, + { + "epoch": 0.18, + "learning_rate": 0.0001964890122179227, + "loss": 3.9713, + "step": 3070 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019646266167990578, + "loss": 3.9225, + "step": 3075 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019643621440891543, + "loss": 3.8559, + "step": 3080 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019640967043147302, + "loss": 3.9145, + "step": 3085 + }, + { + "epoch": 0.18, + "learning_rate": 0.0001963830297741969, + "loss": 4.2188, + "step": 3090 + }, + { + "epoch": 0.18, + "learning_rate": 0.0001963562924638024, + "loss": 4.0723, + "step": 3095 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019632945852710173, + "loss": 4.0658, + "step": 3100 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019630252799100409, + "loss": 4.0304, + "step": 3105 + }, + { + "epoch": 0.18, + "learning_rate": 0.0001962755008825154, + "loss": 3.9481, + "step": 3110 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019624837722873856, + "loss": 3.9624, + "step": 3115 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019622115705687318, + "loss": 3.8568, + "step": 3120 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019619384039421575, + "loss": 3.9446, + "step": 3125 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019616642726815947, + "loss": 3.8899, + "step": 3130 + }, + { + "epoch": 0.18, + "learning_rate": 0.0001961389177061943, + "loss": 3.9193, + "step": 3135 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019611131173590687, + "loss": 4.005, + "step": 3140 + }, + { + "epoch": 0.18, + "learning_rate": 0.0001960836093849805, + "loss": 3.972, + "step": 3145 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019605581068119518, + "loss": 3.9895, + "step": 3150 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019602791565242754, + "loss": 3.8762, + "step": 3155 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019599992432665073, + "loss": 3.8782, + "step": 3160 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019597183673193452, + "loss": 3.7442, + "step": 3165 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019594365289644529, + "loss": 3.87, + "step": 3170 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019591537284844573, + "loss": 3.7133, + "step": 3175 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019588699661629523, + "loss": 3.6664, + "step": 3180 + }, + { + "epoch": 0.18, + "learning_rate": 0.0001958585242284495, + "loss": 3.6013, + "step": 3185 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019582995571346072, + "loss": 3.4943, + "step": 3190 + }, + { + "epoch": 0.18, + "learning_rate": 0.0001958012910999775, + "loss": 3.4814, + "step": 3195 + }, + { + "epoch": 0.18, + "learning_rate": 0.0001957725304167447, + "loss": 3.5685, + "step": 3200 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019574367369260364, + "loss": 3.8341, + "step": 3205 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019571472095649192, + "loss": 3.8938, + "step": 3210 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019568567223744339, + "loss": 3.8136, + "step": 3215 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019565652756458818, + "loss": 3.6728, + "step": 3220 + }, + { + "epoch": 0.19, + "learning_rate": 0.00019562728696715263, + "loss": 3.7193, + "step": 3225 + }, + { + "epoch": 0.19, + "learning_rate": 0.00019559795047445927, + "loss": 3.7583, + "step": 3230 + }, + { + "epoch": 0.19, + "learning_rate": 0.0001955685181159268, + "loss": 3.7125, + "step": 3235 + }, + { + "epoch": 0.19, + "learning_rate": 0.0001955389899210701, + "loss": 3.7783, + "step": 3240 + }, + { + "epoch": 0.19, + "learning_rate": 0.00019550936591950006, + "loss": 3.704, + "step": 3245 + }, + { + "epoch": 0.19, + "learning_rate": 0.0001954796461409237, + "loss": 3.8314, + "step": 3250 + }, + { + "epoch": 0.19, + "learning_rate": 0.0001954498306151441, + "loss": 3.8665, + "step": 3255 + }, + { + "epoch": 0.19, + "learning_rate": 0.0001954199193720603, + "loss": 4.0657, + "step": 3260 + }, + { + "epoch": 0.19, + "learning_rate": 0.00019538991244166738, + "loss": 3.8345, + "step": 3265 + }, + { + "epoch": 0.19, + "learning_rate": 0.00019535980985405639, + "loss": 3.7016, + "step": 3270 + }, + { + "epoch": 0.19, + "learning_rate": 0.00019532961163941422, + "loss": 3.8221, + "step": 3275 + }, + { + "epoch": 0.19, + "learning_rate": 0.00019529931782802376, + "loss": 3.9244, + "step": 3280 + }, + { + "epoch": 0.19, + "learning_rate": 0.00019526892845026365, + "loss": 3.9155, + "step": 3285 + }, + { + "epoch": 0.19, + "learning_rate": 0.00019523844353660849, + "loss": 3.9713, + "step": 3290 + }, + { + "epoch": 0.19, + "learning_rate": 0.0001952078631176286, + "loss": 3.784, + "step": 3295 + }, + { + "epoch": 0.19, + "learning_rate": 0.00019517718722399002, + "loss": 4.0003, + "step": 3300 + }, + { + "epoch": 0.19, + "learning_rate": 0.00019514641588645471, + "loss": 4.0561, + "step": 3305 + }, + { + "epoch": 0.19, + "learning_rate": 0.0001951155491358802, + "loss": 4.0978, + "step": 3310 + }, + { + "epoch": 0.19, + "learning_rate": 0.00019508458700321973, + "loss": 4.0968, + "step": 3315 + }, + { + "epoch": 0.19, + "learning_rate": 0.00019505352951952221, + "loss": 3.9513, + "step": 3320 + }, + { + "epoch": 0.19, + "learning_rate": 0.00019502237671593212, + "loss": 3.8549, + "step": 3325 + }, + { + "epoch": 0.19, + "learning_rate": 0.0001949911286236896, + "loss": 3.8865, + "step": 3330 + }, + { + "epoch": 0.19, + "learning_rate": 0.00019495978527413028, + "loss": 4.0474, + "step": 3335 + }, + { + "epoch": 0.19, + "learning_rate": 0.00019492834669868536, + "loss": 4.0087, + "step": 3340 + }, + { + "epoch": 0.19, + "learning_rate": 0.00019489681292888148, + "loss": 4.0238, + "step": 3345 + }, + { + "epoch": 0.19, + "learning_rate": 0.00019486518399634083, + "loss": 3.9486, + "step": 3350 + }, + { + "epoch": 0.19, + "learning_rate": 0.00019483345993278093, + "loss": 3.8412, + "step": 3355 + }, + { + "epoch": 0.19, + "learning_rate": 0.00019480164077001475, + "loss": 3.9113, + "step": 3360 + }, + { + "epoch": 0.19, + "learning_rate": 0.00019476972653995062, + "loss": 3.7728, + "step": 3365 + }, + { + "epoch": 0.19, + "learning_rate": 0.00019473771727459224, + "loss": 3.9642, + "step": 3370 + }, + { + "epoch": 0.19, + "learning_rate": 0.00019470561300603852, + "loss": 3.9977, + "step": 3375 + }, + { + "epoch": 0.19, + "learning_rate": 0.00019467341376648372, + "loss": 3.8893, + "step": 3380 + }, + { + "epoch": 0.19, + "learning_rate": 0.00019464111958821727, + "loss": 3.8033, + "step": 3385 + }, + { + "epoch": 0.19, + "learning_rate": 0.0001946087305036239, + "loss": 3.6589, + "step": 3390 + }, + { + "epoch": 0.19, + "learning_rate": 0.00019457624654518343, + "loss": 3.5628, + "step": 3395 + }, + { + "epoch": 0.2, + "learning_rate": 0.00019454366774547083, + "loss": 3.5085, + "step": 3400 + }, + { + "epoch": 0.2, + "learning_rate": 0.00019451099413715626, + "loss": 3.5625, + "step": 3405 + }, + { + "epoch": 0.2, + "learning_rate": 0.0001944782257530048, + "loss": 3.747, + "step": 3410 + }, + { + "epoch": 0.2, + "learning_rate": 0.00019444536262587669, + "loss": 3.8478, + "step": 3415 + }, + { + "epoch": 0.2, + "learning_rate": 0.00019441240478872718, + "loss": 3.8523, + "step": 3420 + }, + { + "epoch": 0.2, + "learning_rate": 0.0001943793522746064, + "loss": 3.8604, + "step": 3425 + }, + { + "epoch": 0.2, + "learning_rate": 0.00019434620511665958, + "loss": 3.9138, + "step": 3430 + }, + { + "epoch": 0.2, + "learning_rate": 0.0001943129633481267, + "loss": 3.7584, + "step": 3435 + }, + { + "epoch": 0.2, + "learning_rate": 0.00019427962700234268, + "loss": 3.8157, + "step": 3440 + }, + { + "epoch": 0.2, + "learning_rate": 0.00019424619611273727, + "loss": 3.9208, + "step": 3445 + }, + { + "epoch": 0.2, + "learning_rate": 0.0001942126707128351, + "loss": 3.8077, + "step": 3450 + }, + { + "epoch": 0.2, + "learning_rate": 0.00019417905083625545, + "loss": 3.9654, + "step": 3455 + }, + { + "epoch": 0.2, + "learning_rate": 0.00019414533651671242, + "loss": 3.8997, + "step": 3460 + }, + { + "epoch": 0.2, + "learning_rate": 0.00019411152778801486, + "loss": 3.7784, + "step": 3465 + }, + { + "epoch": 0.2, + "learning_rate": 0.00019407762468406619, + "loss": 3.7086, + "step": 3470 + }, + { + "epoch": 0.2, + "learning_rate": 0.00019404362723886452, + "loss": 3.7156, + "step": 3475 + }, + { + "epoch": 0.2, + "learning_rate": 0.00019400953548650258, + "loss": 3.7288, + "step": 3480 + }, + { + "epoch": 0.2, + "learning_rate": 0.00019397534946116762, + "loss": 3.9152, + "step": 3485 + }, + { + "epoch": 0.2, + "learning_rate": 0.00019394106919714155, + "loss": 4.0103, + "step": 3490 + }, + { + "epoch": 0.2, + "learning_rate": 0.0001939066947288006, + "loss": 3.9918, + "step": 3495 + }, + { + "epoch": 0.2, + "learning_rate": 0.0001938722260906156, + "loss": 3.8941, + "step": 3500 + }, + { + "epoch": 0.2, + "learning_rate": 0.00019383766331715178, + "loss": 3.8285, + "step": 3505 + }, + { + "epoch": 0.2, + "learning_rate": 0.00019380300644306878, + "loss": 3.7736, + "step": 3510 + }, + { + "epoch": 0.2, + "learning_rate": 0.00019376825550312057, + "loss": 3.7274, + "step": 3515 + }, + { + "epoch": 0.2, + "learning_rate": 0.00019373341053215547, + "loss": 3.7859, + "step": 3520 + }, + { + "epoch": 0.2, + "learning_rate": 0.0001936984715651161, + "loss": 3.7412, + "step": 3525 + }, + { + "epoch": 0.2, + "learning_rate": 0.00019366343863703932, + "loss": 3.6731, + "step": 3530 + }, + { + "epoch": 0.2, + "learning_rate": 0.0001936283117830563, + "loss": 3.7049, + "step": 3535 + }, + { + "epoch": 0.2, + "learning_rate": 0.00019359309103839225, + "loss": 3.7449, + "step": 3540 + }, + { + "epoch": 0.2, + "learning_rate": 0.0001935577764383666, + "loss": 3.6854, + "step": 3545 + }, + { + "epoch": 0.2, + "learning_rate": 0.00019352236801839298, + "loss": 3.7409, + "step": 3550 + }, + { + "epoch": 0.2, + "learning_rate": 0.000193486865813979, + "loss": 3.7686, + "step": 3555 + }, + { + "epoch": 0.2, + "learning_rate": 0.00019345126986072635, + "loss": 3.6891, + "step": 3560 + }, + { + "epoch": 0.2, + "learning_rate": 0.0001934155801943307, + "loss": 3.7046, + "step": 3565 + }, + { + "epoch": 0.2, + "learning_rate": 0.0001933797968505818, + "loss": 3.6589, + "step": 3570 + }, + { + "epoch": 0.21, + "learning_rate": 0.0001933439198653632, + "loss": 3.7049, + "step": 3575 + }, + { + "epoch": 0.21, + "learning_rate": 0.00019330794927465247, + "loss": 3.5971, + "step": 3580 + }, + { + "epoch": 0.21, + "learning_rate": 0.00019327188511452094, + "loss": 3.6787, + "step": 3585 + }, + { + "epoch": 0.21, + "learning_rate": 0.00019323572742113387, + "loss": 3.9087, + "step": 3590 + }, + { + "epoch": 0.21, + "learning_rate": 0.0001931994762307503, + "loss": 3.7466, + "step": 3595 + }, + { + "epoch": 0.21, + "learning_rate": 0.00019316313157972297, + "loss": 3.7719, + "step": 3600 + }, + { + "epoch": 0.21, + "learning_rate": 0.00019312669350449836, + "loss": 3.7041, + "step": 3605 + }, + { + "epoch": 0.21, + "learning_rate": 0.0001930901620416167, + "loss": 3.6416, + "step": 3610 + }, + { + "epoch": 0.21, + "learning_rate": 0.0001930535372277118, + "loss": 3.7735, + "step": 3615 + }, + { + "epoch": 0.21, + "learning_rate": 0.0001930168190995111, + "loss": 3.6501, + "step": 3620 + }, + { + "epoch": 0.21, + "learning_rate": 0.00019298000769383565, + "loss": 3.5963, + "step": 3625 + }, + { + "epoch": 0.21, + "learning_rate": 0.00019294310304759994, + "loss": 3.6151, + "step": 3630 + }, + { + "epoch": 0.21, + "learning_rate": 0.00019290610519781212, + "loss": 3.5001, + "step": 3635 + }, + { + "epoch": 0.21, + "learning_rate": 0.00019286901418157367, + "loss": 3.622, + "step": 3640 + }, + { + "epoch": 0.21, + "learning_rate": 0.00019283183003607955, + "loss": 3.4281, + "step": 3645 + }, + { + "epoch": 0.21, + "learning_rate": 0.0001927945527986181, + "loss": 3.5798, + "step": 3650 + }, + { + "epoch": 0.21, + "learning_rate": 0.000192757182506571, + "loss": 3.6014, + "step": 3655 + }, + { + "epoch": 0.21, + "learning_rate": 0.00019271971919741332, + "loss": 3.8189, + "step": 3660 + }, + { + "epoch": 0.21, + "learning_rate": 0.0001926821629087133, + "loss": 3.6664, + "step": 3665 + }, + { + "epoch": 0.21, + "learning_rate": 0.0001926445136781325, + "loss": 3.7436, + "step": 3670 + }, + { + "epoch": 0.21, + "learning_rate": 0.00019260677154342564, + "loss": 3.7009, + "step": 3675 + }, + { + "epoch": 0.21, + "learning_rate": 0.0001925689365424406, + "loss": 3.7745, + "step": 3680 + }, + { + "epoch": 0.21, + "learning_rate": 0.00019253100871311843, + "loss": 3.637, + "step": 3685 + }, + { + "epoch": 0.21, + "learning_rate": 0.00019249298809349323, + "loss": 3.649, + "step": 3690 + }, + { + "epoch": 0.21, + "learning_rate": 0.00019245487472169216, + "loss": 3.6066, + "step": 3695 + }, + { + "epoch": 0.21, + "learning_rate": 0.0001924166686359354, + "loss": 3.6039, + "step": 3700 + }, + { + "epoch": 0.21, + "learning_rate": 0.00019237836987453613, + "loss": 3.484, + "step": 3705 + }, + { + "epoch": 0.21, + "learning_rate": 0.00019233997847590035, + "loss": 3.7251, + "step": 3710 + }, + { + "epoch": 0.21, + "learning_rate": 0.00019230149447852714, + "loss": 3.6939, + "step": 3715 + }, + { + "epoch": 0.21, + "learning_rate": 0.00019226291792100826, + "loss": 3.7109, + "step": 3720 + }, + { + "epoch": 0.21, + "learning_rate": 0.0001922242488420284, + "loss": 3.687, + "step": 3725 + }, + { + "epoch": 0.21, + "learning_rate": 0.00019218548728036503, + "loss": 3.6643, + "step": 3730 + }, + { + "epoch": 0.21, + "learning_rate": 0.00019214663327488828, + "loss": 3.8078, + "step": 3735 + }, + { + "epoch": 0.21, + "learning_rate": 0.00019210768686456106, + "loss": 3.6718, + "step": 3740 + }, + { + "epoch": 0.21, + "learning_rate": 0.00019206864808843892, + "loss": 3.748, + "step": 3745 + }, + { + "epoch": 0.22, + "learning_rate": 0.00019202951698566999, + "loss": 3.6618, + "step": 3750 + }, + { + "epoch": 0.22, + "learning_rate": 0.0001919902935954951, + "loss": 3.6457, + "step": 3755 + }, + { + "epoch": 0.22, + "learning_rate": 0.00019195097795724747, + "loss": 3.9062, + "step": 3760 + }, + { + "epoch": 0.22, + "learning_rate": 0.000191911570110353, + "loss": 3.8633, + "step": 3765 + }, + { + "epoch": 0.22, + "learning_rate": 0.0001918720700943299, + "loss": 3.8972, + "step": 3770 + }, + { + "epoch": 0.22, + "learning_rate": 0.0001918324779487889, + "loss": 3.869, + "step": 3775 + }, + { + "epoch": 0.22, + "learning_rate": 0.0001917927937134331, + "loss": 3.8311, + "step": 3780 + }, + { + "epoch": 0.22, + "learning_rate": 0.00019175301742805793, + "loss": 3.6895, + "step": 3785 + }, + { + "epoch": 0.22, + "learning_rate": 0.00019171314913255113, + "loss": 3.7514, + "step": 3790 + }, + { + "epoch": 0.22, + "learning_rate": 0.00019167318886689273, + "loss": 3.7494, + "step": 3795 + }, + { + "epoch": 0.22, + "learning_rate": 0.00019163313667115497, + "loss": 3.7804, + "step": 3800 + }, + { + "epoch": 0.22, + "learning_rate": 0.00019159299258550227, + "loss": 3.7613, + "step": 3805 + }, + { + "epoch": 0.22, + "learning_rate": 0.0001915527566501912, + "loss": 3.7294, + "step": 3810 + }, + { + "epoch": 0.22, + "learning_rate": 0.0001915124289055705, + "loss": 3.7854, + "step": 3815 + }, + { + "epoch": 0.22, + "learning_rate": 0.00019147200939208088, + "loss": 3.7894, + "step": 3820 + }, + { + "epoch": 0.22, + "learning_rate": 0.0001914314981502551, + "loss": 3.8074, + "step": 3825 + }, + { + "epoch": 0.22, + "learning_rate": 0.000191390895220718, + "loss": 3.8708, + "step": 3830 + }, + { + "epoch": 0.22, + "learning_rate": 0.0001913502006441862, + "loss": 3.7719, + "step": 3835 + }, + { + "epoch": 0.22, + "learning_rate": 0.00019130941446146837, + "loss": 3.7068, + "step": 3840 + }, + { + "epoch": 0.22, + "learning_rate": 0.00019126853671346496, + "loss": 3.609, + "step": 3845 + }, + { + "epoch": 0.22, + "learning_rate": 0.00019122756744116828, + "loss": 3.4527, + "step": 3850 + }, + { + "epoch": 0.22, + "learning_rate": 0.0001911865066856624, + "loss": 3.6164, + "step": 3855 + }, + { + "epoch": 0.22, + "learning_rate": 0.00019114535448812311, + "loss": 3.7123, + "step": 3860 + }, + { + "epoch": 0.22, + "learning_rate": 0.00019110411088981802, + "loss": 3.7241, + "step": 3865 + }, + { + "epoch": 0.22, + "learning_rate": 0.0001910627759321062, + "loss": 3.7562, + "step": 3870 + }, + { + "epoch": 0.22, + "learning_rate": 0.00019102134965643847, + "loss": 3.726, + "step": 3875 + }, + { + "epoch": 0.22, + "learning_rate": 0.0001909798321043572, + "loss": 3.7099, + "step": 3880 + }, + { + "epoch": 0.22, + "learning_rate": 0.00019093822331749634, + "loss": 3.7649, + "step": 3885 + }, + { + "epoch": 0.22, + "learning_rate": 0.00019089652333758114, + "loss": 3.7754, + "step": 3890 + }, + { + "epoch": 0.22, + "learning_rate": 0.00019085473220642856, + "loss": 3.7369, + "step": 3895 + }, + { + "epoch": 0.22, + "learning_rate": 0.0001908128499659468, + "loss": 3.6229, + "step": 3900 + }, + { + "epoch": 0.22, + "learning_rate": 0.00019077087665813545, + "loss": 3.6502, + "step": 3905 + }, + { + "epoch": 0.22, + "learning_rate": 0.0001907288123250854, + "loss": 3.6347, + "step": 3910 + }, + { + "epoch": 0.22, + "learning_rate": 0.00019068665700897896, + "loss": 3.7046, + "step": 3915 + }, + { + "epoch": 0.22, + "learning_rate": 0.0001906444107520895, + "loss": 3.6719, + "step": 3920 + }, + { + "epoch": 0.23, + "learning_rate": 0.00019060207359678164, + "loss": 3.6832, + "step": 3925 + }, + { + "epoch": 0.23, + "learning_rate": 0.00019055964558551124, + "loss": 3.7177, + "step": 3930 + }, + { + "epoch": 0.23, + "learning_rate": 0.00019051712676082522, + "loss": 3.741, + "step": 3935 + }, + { + "epoch": 0.23, + "learning_rate": 0.00019047451716536147, + "loss": 3.65, + "step": 3940 + }, + { + "epoch": 0.23, + "learning_rate": 0.000190431816841849, + "loss": 3.7518, + "step": 3945 + }, + { + "epoch": 0.23, + "learning_rate": 0.00019038902583310785, + "loss": 3.7061, + "step": 3950 + }, + { + "epoch": 0.23, + "learning_rate": 0.00019034614418204893, + "loss": 3.8555, + "step": 3955 + }, + { + "epoch": 0.23, + "learning_rate": 0.000190303171931674, + "loss": 3.9051, + "step": 3960 + }, + { + "epoch": 0.23, + "learning_rate": 0.00019026010912507577, + "loss": 3.7699, + "step": 3965 + }, + { + "epoch": 0.23, + "learning_rate": 0.00019021695580543772, + "loss": 3.584, + "step": 3970 + }, + { + "epoch": 0.23, + "learning_rate": 0.00019017371201603407, + "loss": 3.6451, + "step": 3975 + }, + { + "epoch": 0.23, + "learning_rate": 0.00019013037780022982, + "loss": 3.5583, + "step": 3980 + }, + { + "epoch": 0.23, + "learning_rate": 0.00019008695320148062, + "loss": 3.5669, + "step": 3985 + }, + { + "epoch": 0.23, + "learning_rate": 0.0001900434382633327, + "loss": 3.3989, + "step": 3990 + }, + { + "epoch": 0.23, + "learning_rate": 0.00018999983302942302, + "loss": 3.4922, + "step": 3995 + }, + { + "epoch": 0.23, + "learning_rate": 0.00018995613754347893, + "loss": 3.4463, + "step": 4000 + }, + { + "epoch": 0.23, + "learning_rate": 0.00018991235184931843, + "loss": 3.4361, + "step": 4005 + }, + { + "epoch": 0.23, + "learning_rate": 0.00018986847599084986, + "loss": 3.5724, + "step": 4010 + }, + { + "epoch": 0.23, + "learning_rate": 0.00018982451001207205, + "loss": 3.5376, + "step": 4015 + }, + { + "epoch": 0.23, + "learning_rate": 0.00018978045395707418, + "loss": 3.6229, + "step": 4020 + }, + { + "epoch": 0.23, + "learning_rate": 0.00018973630787003575, + "loss": 3.6468, + "step": 4025 + }, + { + "epoch": 0.23, + "learning_rate": 0.0001896920717952266, + "loss": 3.4721, + "step": 4030 + }, + { + "epoch": 0.23, + "learning_rate": 0.00018964774577700667, + "loss": 3.5094, + "step": 4035 + }, + { + "epoch": 0.23, + "learning_rate": 0.00018960332985982627, + "loss": 3.5052, + "step": 4040 + }, + { + "epoch": 0.23, + "learning_rate": 0.0001895588240882258, + "loss": 3.5885, + "step": 4045 + }, + { + "epoch": 0.23, + "learning_rate": 0.0001895142285068357, + "loss": 3.5499, + "step": 4050 + }, + { + "epoch": 0.23, + "learning_rate": 0.00018946954316037648, + "loss": 3.5257, + "step": 4055 + }, + { + "epoch": 0.23, + "learning_rate": 0.0001894247680936588, + "loss": 3.6446, + "step": 4060 + }, + { + "epoch": 0.23, + "learning_rate": 0.00018937990335158312, + "loss": 3.6611, + "step": 4065 + }, + { + "epoch": 0.23, + "learning_rate": 0.00018933494897913997, + "loss": 3.7228, + "step": 4070 + }, + { + "epoch": 0.23, + "learning_rate": 0.00018928990502140963, + "loss": 3.6856, + "step": 4075 + }, + { + "epoch": 0.23, + "learning_rate": 0.00018924477152356233, + "loss": 3.6701, + "step": 4080 + }, + { + "epoch": 0.23, + "learning_rate": 0.00018919954853085803, + "loss": 3.6045, + "step": 4085 + }, + { + "epoch": 0.23, + "learning_rate": 0.0001891542360886464, + "loss": 3.5751, + "step": 4090 + }, + { + "epoch": 0.23, + "learning_rate": 0.00018910883424236695, + "loss": 3.3857, + "step": 4095 + }, + { + "epoch": 0.24, + "learning_rate": 0.0001890633430375487, + "loss": 3.498, + "step": 4100 + }, + { + "epoch": 0.24, + "learning_rate": 0.00018901776251981032, + "loss": 3.4611, + "step": 4105 + }, + { + "epoch": 0.24, + "learning_rate": 0.0001889720927348601, + "loss": 3.6429, + "step": 4110 + }, + { + "epoch": 0.24, + "learning_rate": 0.00018892633372849575, + "loss": 3.6317, + "step": 4115 + }, + { + "epoch": 0.24, + "learning_rate": 0.00018888048554660454, + "loss": 3.6851, + "step": 4120 + }, + { + "epoch": 0.24, + "learning_rate": 0.00018883454823516313, + "loss": 3.5615, + "step": 4125 + }, + { + "epoch": 0.24, + "learning_rate": 0.0001887885218402375, + "loss": 3.5977, + "step": 4130 + }, + { + "epoch": 0.24, + "learning_rate": 0.00018874240640798316, + "loss": 3.5784, + "step": 4135 + }, + { + "epoch": 0.24, + "learning_rate": 0.0001886962019846446, + "loss": 3.6911, + "step": 4140 + }, + { + "epoch": 0.24, + "learning_rate": 0.00018864990861655584, + "loss": 3.6698, + "step": 4145 + }, + { + "epoch": 0.24, + "learning_rate": 0.0001886035263501399, + "loss": 3.6395, + "step": 4150 + }, + { + "epoch": 0.24, + "learning_rate": 0.00018855705523190908, + "loss": 3.6592, + "step": 4155 + }, + { + "epoch": 0.24, + "learning_rate": 0.0001885104953084647, + "loss": 3.5498, + "step": 4160 + }, + { + "epoch": 0.24, + "learning_rate": 0.00018846384662649714, + "loss": 3.6721, + "step": 4165 + }, + { + "epoch": 0.24, + "learning_rate": 0.00018841710923278582, + "loss": 3.5488, + "step": 4170 + }, + { + "epoch": 0.24, + "learning_rate": 0.00018837028317419908, + "loss": 3.539, + "step": 4175 + }, + { + "epoch": 0.24, + "learning_rate": 0.00018832336849769425, + "loss": 3.6718, + "step": 4180 + }, + { + "epoch": 0.24, + "learning_rate": 0.0001882763652503174, + "loss": 3.6152, + "step": 4185 + }, + { + "epoch": 0.24, + "learning_rate": 0.00018822927347920355, + "loss": 3.6229, + "step": 4190 + }, + { + "epoch": 0.24, + "learning_rate": 0.00018818209323157638, + "loss": 3.5397, + "step": 4195 + }, + { + "epoch": 0.24, + "learning_rate": 0.0001881348245547484, + "loss": 3.518, + "step": 4200 + }, + { + "epoch": 0.24, + "learning_rate": 0.00018808746749612073, + "loss": 3.6075, + "step": 4205 + }, + { + "epoch": 0.24, + "learning_rate": 0.0001880400221031831, + "loss": 3.597, + "step": 4210 + }, + { + "epoch": 0.24, + "learning_rate": 0.00018799248842351393, + "loss": 3.56, + "step": 4215 + }, + { + "epoch": 0.24, + "learning_rate": 0.00018794486650478004, + "loss": 3.5637, + "step": 4220 + }, + { + "epoch": 0.24, + "learning_rate": 0.0001878971563947368, + "loss": 3.5983, + "step": 4225 + }, + { + "epoch": 0.24, + "learning_rate": 0.00018784935814122804, + "loss": 3.6155, + "step": 4230 + }, + { + "epoch": 0.24, + "learning_rate": 0.0001878014717921859, + "loss": 3.6629, + "step": 4235 + }, + { + "epoch": 0.24, + "learning_rate": 0.000187753497395631, + "loss": 3.6388, + "step": 4240 + }, + { + "epoch": 0.24, + "learning_rate": 0.00018770543499967217, + "loss": 3.6751, + "step": 4245 + }, + { + "epoch": 0.24, + "learning_rate": 0.00018765728465250644, + "loss": 3.715, + "step": 4250 + }, + { + "epoch": 0.24, + "learning_rate": 0.00018760904640241906, + "loss": 3.6934, + "step": 4255 + }, + { + "epoch": 0.24, + "learning_rate": 0.00018756072029778352, + "loss": 3.6022, + "step": 4260 + }, + { + "epoch": 0.24, + "learning_rate": 0.00018751230638706131, + "loss": 3.6856, + "step": 4265 + }, + { + "epoch": 0.24, + "learning_rate": 0.00018746380471880203, + "loss": 3.5337, + "step": 4270 + }, + { + "epoch": 0.25, + "learning_rate": 0.00018741521534164325, + "loss": 3.6901, + "step": 4275 + }, + { + "epoch": 0.25, + "learning_rate": 0.00018736653830431048, + "loss": 3.6717, + "step": 4280 + }, + { + "epoch": 0.25, + "learning_rate": 0.0001873177736556172, + "loss": 3.6712, + "step": 4285 + }, + { + "epoch": 0.25, + "learning_rate": 0.00018726892144446474, + "loss": 3.5113, + "step": 4290 + }, + { + "epoch": 0.25, + "learning_rate": 0.0001872199817198421, + "loss": 3.6584, + "step": 4295 + }, + { + "epoch": 0.25, + "learning_rate": 0.00018717095453082627, + "loss": 3.7496, + "step": 4300 + }, + { + "epoch": 0.25, + "learning_rate": 0.00018712183992658174, + "loss": 3.751, + "step": 4305 + }, + { + "epoch": 0.25, + "learning_rate": 0.00018707263795636077, + "loss": 3.8571, + "step": 4310 + }, + { + "epoch": 0.25, + "learning_rate": 0.0001870233486695032, + "loss": 3.8003, + "step": 4315 + }, + { + "epoch": 0.25, + "learning_rate": 0.0001869739721154365, + "loss": 3.7638, + "step": 4320 + }, + { + "epoch": 0.25, + "learning_rate": 0.00018692450834367546, + "loss": 3.5772, + "step": 4325 + }, + { + "epoch": 0.25, + "learning_rate": 0.00018687495740382258, + "loss": 3.5074, + "step": 4330 + }, + { + "epoch": 0.25, + "learning_rate": 0.00018682531934556757, + "loss": 3.4393, + "step": 4335 + }, + { + "epoch": 0.25, + "learning_rate": 0.00018677559421868766, + "loss": 3.5662, + "step": 4340 + }, + { + "epoch": 0.25, + "learning_rate": 0.00018672578207304727, + "loss": 3.5174, + "step": 4345 + }, + { + "epoch": 0.25, + "learning_rate": 0.00018667588295859816, + "loss": 3.5552, + "step": 4350 + }, + { + "epoch": 0.25, + "learning_rate": 0.0001866258969253792, + "loss": 3.6251, + "step": 4355 + }, + { + "epoch": 0.25, + "learning_rate": 0.00018657582402351663, + "loss": 3.7857, + "step": 4360 + }, + { + "epoch": 0.25, + "learning_rate": 0.00018652566430322356, + "loss": 3.6927, + "step": 4365 + }, + { + "epoch": 0.25, + "learning_rate": 0.0001864754178148003, + "loss": 3.7058, + "step": 4370 + }, + { + "epoch": 0.25, + "learning_rate": 0.00018642508460863416, + "loss": 3.5257, + "step": 4375 + }, + { + "epoch": 0.25, + "learning_rate": 0.00018637466473519937, + "loss": 3.5633, + "step": 4380 + }, + { + "epoch": 0.25, + "learning_rate": 0.0001863241582450571, + "loss": 3.4822, + "step": 4385 + }, + { + "epoch": 0.25, + "learning_rate": 0.00018627356518885536, + "loss": 3.4217, + "step": 4390 + }, + { + "epoch": 0.25, + "learning_rate": 0.000186222885617329, + "loss": 3.4629, + "step": 4395 + }, + { + "epoch": 0.25, + "learning_rate": 0.00018617211958129958, + "loss": 3.5844, + "step": 4400 + }, + { + "epoch": 0.25, + "learning_rate": 0.00018612126713167542, + "loss": 3.6745, + "step": 4405 + }, + { + "epoch": 0.25, + "learning_rate": 0.0001860703283194515, + "loss": 3.7324, + "step": 4410 + }, + { + "epoch": 0.25, + "learning_rate": 0.00018601930319570934, + "loss": 3.7076, + "step": 4415 + }, + { + "epoch": 0.25, + "learning_rate": 0.000185968191811617, + "loss": 3.6037, + "step": 4420 + }, + { + "epoch": 0.25, + "learning_rate": 0.00018591699421842916, + "loss": 3.5516, + "step": 4425 + }, + { + "epoch": 0.25, + "learning_rate": 0.00018586571046748685, + "loss": 3.4967, + "step": 4430 + }, + { + "epoch": 0.25, + "learning_rate": 0.00018581434061021754, + "loss": 3.6028, + "step": 4435 + }, + { + "epoch": 0.25, + "learning_rate": 0.00018576288469813505, + "loss": 3.358, + "step": 4440 + }, + { + "epoch": 0.26, + "learning_rate": 0.00018571134278283946, + "loss": 3.3776, + "step": 4445 + }, + { + "epoch": 0.26, + "learning_rate": 0.0001856597149160171, + "loss": 3.4153, + "step": 4450 + }, + { + "epoch": 0.26, + "learning_rate": 0.00018560800114944063, + "loss": 3.5318, + "step": 4455 + }, + { + "epoch": 0.26, + "learning_rate": 0.00018555620153496856, + "loss": 3.5411, + "step": 4460 + }, + { + "epoch": 0.26, + "learning_rate": 0.00018550431612454578, + "loss": 3.6165, + "step": 4465 + }, + { + "epoch": 0.26, + "learning_rate": 0.00018545234497020302, + "loss": 3.7134, + "step": 4470 + }, + { + "epoch": 0.26, + "learning_rate": 0.00018540028812405717, + "loss": 3.6751, + "step": 4475 + }, + { + "epoch": 0.26, + "learning_rate": 0.00018534814563831082, + "loss": 3.6107, + "step": 4480 + }, + { + "epoch": 0.26, + "learning_rate": 0.00018529591756525268, + "loss": 3.5181, + "step": 4485 + }, + { + "epoch": 0.26, + "learning_rate": 0.00018524360395725715, + "loss": 3.4012, + "step": 4490 + }, + { + "epoch": 0.26, + "learning_rate": 0.0001851912048667844, + "loss": 3.3784, + "step": 4495 + }, + { + "epoch": 0.26, + "learning_rate": 0.00018513872034638037, + "loss": 3.3335, + "step": 4500 + }, + { + "epoch": 0.26, + "learning_rate": 0.00018508615044867668, + "loss": 3.4279, + "step": 4505 + }, + { + "epoch": 0.26, + "learning_rate": 0.00018503349522639052, + "loss": 3.5395, + "step": 4510 + }, + { + "epoch": 0.26, + "learning_rate": 0.00018498075473232469, + "loss": 3.4997, + "step": 4515 + }, + { + "epoch": 0.26, + "learning_rate": 0.00018492792901936742, + "loss": 3.4724, + "step": 4520 + }, + { + "epoch": 0.26, + "learning_rate": 0.0001848750181404925, + "loss": 3.4462, + "step": 4525 + }, + { + "epoch": 0.26, + "learning_rate": 0.00018482202214875908, + "loss": 3.5919, + "step": 4530 + }, + { + "epoch": 0.26, + "learning_rate": 0.00018476894109731166, + "loss": 3.6629, + "step": 4535 + }, + { + "epoch": 0.26, + "learning_rate": 0.00018471577503938, + "loss": 3.6637, + "step": 4540 + }, + { + "epoch": 0.26, + "learning_rate": 0.00018466252402827915, + "loss": 3.7109, + "step": 4545 + }, + { + "epoch": 0.26, + "learning_rate": 0.00018460918811740937, + "loss": 3.6368, + "step": 4550 + }, + { + "epoch": 0.26, + "learning_rate": 0.00018455576736025602, + "loss": 3.6998, + "step": 4555 + }, + { + "epoch": 0.26, + "learning_rate": 0.00018450226181038955, + "loss": 3.7358, + "step": 4560 + }, + { + "epoch": 0.26, + "learning_rate": 0.00018444867152146545, + "loss": 3.6636, + "step": 4565 + }, + { + "epoch": 0.26, + "learning_rate": 0.00018439499654722421, + "loss": 3.6544, + "step": 4570 + }, + { + "epoch": 0.26, + "learning_rate": 0.00018434123694149117, + "loss": 3.6887, + "step": 4575 + }, + { + "epoch": 0.26, + "learning_rate": 0.0001842873927581766, + "loss": 3.659, + "step": 4580 + }, + { + "epoch": 0.26, + "learning_rate": 0.00018423346405127555, + "loss": 3.6723, + "step": 4585 + }, + { + "epoch": 0.26, + "learning_rate": 0.0001841794508748679, + "loss": 3.6484, + "step": 4590 + }, + { + "epoch": 0.26, + "learning_rate": 0.00018412535328311814, + "loss": 3.6946, + "step": 4595 + }, + { + "epoch": 0.26, + "learning_rate": 0.00018407117133027544, + "loss": 3.5223, + "step": 4600 + }, + { + "epoch": 0.26, + "learning_rate": 0.0001840169050706736, + "loss": 3.6291, + "step": 4605 + }, + { + "epoch": 0.26, + "learning_rate": 0.00018396255455873102, + "loss": 3.4695, + "step": 4610 + }, + { + "epoch": 0.26, + "learning_rate": 0.0001839081198489504, + "loss": 3.4071, + "step": 4615 + }, + { + "epoch": 0.27, + "learning_rate": 0.0001838536009959191, + "loss": 3.5346, + "step": 4620 + }, + { + "epoch": 0.27, + "learning_rate": 0.00018379899805430862, + "loss": 3.5617, + "step": 4625 + }, + { + "epoch": 0.27, + "learning_rate": 0.00018374431107887502, + "loss": 3.5673, + "step": 4630 + }, + { + "epoch": 0.27, + "learning_rate": 0.00018368954012445846, + "loss": 3.5891, + "step": 4635 + }, + { + "epoch": 0.27, + "learning_rate": 0.00018363468524598342, + "loss": 3.5041, + "step": 4640 + }, + { + "epoch": 0.27, + "learning_rate": 0.0001835797464984585, + "loss": 3.4957, + "step": 4645 + }, + { + "epoch": 0.27, + "learning_rate": 0.00018352472393697632, + "loss": 3.4831, + "step": 4650 + }, + { + "epoch": 0.27, + "learning_rate": 0.0001834696176167137, + "loss": 3.4898, + "step": 4655 + }, + { + "epoch": 0.27, + "learning_rate": 0.00018341442759293137, + "loss": 3.6039, + "step": 4660 + }, + { + "epoch": 0.27, + "learning_rate": 0.00018335915392097402, + "loss": 3.5844, + "step": 4665 + }, + { + "epoch": 0.27, + "learning_rate": 0.00018330379665627014, + "loss": 3.7121, + "step": 4670 + }, + { + "epoch": 0.27, + "learning_rate": 0.00018324835585433225, + "loss": 3.641, + "step": 4675 + }, + { + "epoch": 0.27, + "learning_rate": 0.00018319283157075636, + "loss": 3.5295, + "step": 4680 + }, + { + "epoch": 0.27, + "learning_rate": 0.00018313722386122247, + "loss": 3.5396, + "step": 4685 + }, + { + "epoch": 0.27, + "learning_rate": 0.00018308153278149406, + "loss": 3.5559, + "step": 4690 + }, + { + "epoch": 0.27, + "learning_rate": 0.00018302575838741826, + "loss": 3.5536, + "step": 4695 + }, + { + "epoch": 0.27, + "learning_rate": 0.0001829699007349258, + "loss": 3.7527, + "step": 4700 + }, + { + "epoch": 0.27, + "learning_rate": 0.0001829139598800308, + "loss": 3.5316, + "step": 4705 + }, + { + "epoch": 0.27, + "learning_rate": 0.00018285793587883092, + "loss": 3.5608, + "step": 4710 + }, + { + "epoch": 0.27, + "learning_rate": 0.00018280182878750717, + "loss": 3.5152, + "step": 4715 + }, + { + "epoch": 0.27, + "learning_rate": 0.0001827456386623238, + "loss": 3.434, + "step": 4720 + }, + { + "epoch": 0.27, + "learning_rate": 0.00018268936555962845, + "loss": 3.3267, + "step": 4725 + }, + { + "epoch": 0.27, + "learning_rate": 0.0001826330095358519, + "loss": 3.4513, + "step": 4730 + }, + { + "epoch": 0.27, + "learning_rate": 0.00018257657064750808, + "loss": 3.5652, + "step": 4735 + }, + { + "epoch": 0.27, + "learning_rate": 0.00018252004895119404, + "loss": 3.5403, + "step": 4740 + }, + { + "epoch": 0.27, + "learning_rate": 0.00018246344450358986, + "loss": 3.4439, + "step": 4745 + }, + { + "epoch": 0.27, + "learning_rate": 0.00018240675736145865, + "loss": 3.474, + "step": 4750 + }, + { + "epoch": 0.27, + "learning_rate": 0.0001823499875816464, + "loss": 3.5473, + "step": 4755 + }, + { + "epoch": 0.27, + "learning_rate": 0.00018229313522108187, + "loss": 3.6774, + "step": 4760 + }, + { + "epoch": 0.27, + "learning_rate": 0.00018223620033677685, + "loss": 3.6312, + "step": 4765 + }, + { + "epoch": 0.27, + "learning_rate": 0.00018217918298582572, + "loss": 3.6326, + "step": 4770 + }, + { + "epoch": 0.27, + "learning_rate": 0.0001821220832254056, + "loss": 3.6744, + "step": 4775 + }, + { + "epoch": 0.27, + "learning_rate": 0.00018206490111277629, + "loss": 3.7397, + "step": 4780 + }, + { + "epoch": 0.27, + "learning_rate": 0.00018200763670528011, + "loss": 3.6019, + "step": 4785 + }, + { + "epoch": 0.27, + "learning_rate": 0.00018195029006034193, + "loss": 3.7168, + "step": 4790 + }, + { + "epoch": 0.28, + "learning_rate": 0.00018189286123546916, + "loss": 3.6102, + "step": 4795 + }, + { + "epoch": 0.28, + "learning_rate": 0.00018183535028825149, + "loss": 3.6562, + "step": 4800 + }, + { + "epoch": 0.28, + "learning_rate": 0.00018177775727636105, + "loss": 3.5175, + "step": 4805 + }, + { + "epoch": 0.28, + "learning_rate": 0.00018172008225755224, + "loss": 3.4547, + "step": 4810 + }, + { + "epoch": 0.28, + "learning_rate": 0.00018166232528966169, + "loss": 3.5996, + "step": 4815 + }, + { + "epoch": 0.28, + "learning_rate": 0.0001816044864306082, + "loss": 3.5837, + "step": 4820 + }, + { + "epoch": 0.28, + "learning_rate": 0.00018154656573839275, + "loss": 3.5342, + "step": 4825 + }, + { + "epoch": 0.28, + "learning_rate": 0.00018148856327109832, + "loss": 3.4871, + "step": 4830 + }, + { + "epoch": 0.28, + "learning_rate": 0.00018143047908688993, + "loss": 3.459, + "step": 4835 + }, + { + "epoch": 0.28, + "learning_rate": 0.00018137231324401448, + "loss": 3.4399, + "step": 4840 + }, + { + "epoch": 0.28, + "learning_rate": 0.00018131406580080084, + "loss": 3.5712, + "step": 4845 + }, + { + "epoch": 0.28, + "learning_rate": 0.00018125573681565969, + "loss": 3.5813, + "step": 4850 + }, + { + "epoch": 0.28, + "learning_rate": 0.00018119732634708346, + "loss": 3.5923, + "step": 4855 + }, + { + "epoch": 0.28, + "learning_rate": 0.0001811388344536463, + "loss": 3.6825, + "step": 4860 + }, + { + "epoch": 0.28, + "learning_rate": 0.00018108026119400397, + "loss": 3.6596, + "step": 4865 + }, + { + "epoch": 0.28, + "learning_rate": 0.00018102160662689394, + "loss": 3.6993, + "step": 4870 + }, + { + "epoch": 0.28, + "learning_rate": 0.0001809628708111351, + "loss": 3.707, + "step": 4875 + }, + { + "epoch": 0.28, + "learning_rate": 0.00018090405380562786, + "loss": 3.6086, + "step": 4880 + }, + { + "epoch": 0.28, + "learning_rate": 0.00018084515566935402, + "loss": 3.5868, + "step": 4885 + }, + { + "epoch": 0.28, + "learning_rate": 0.00018078617646137682, + "loss": 3.5422, + "step": 4890 + }, + { + "epoch": 0.28, + "learning_rate": 0.00018072711624084068, + "loss": 3.5596, + "step": 4895 + }, + { + "epoch": 0.28, + "learning_rate": 0.00018066797506697136, + "loss": 3.4794, + "step": 4900 + }, + { + "epoch": 0.28, + "learning_rate": 0.0001806087529990758, + "loss": 3.6109, + "step": 4905 + }, + { + "epoch": 0.28, + "learning_rate": 0.00018054945009654194, + "loss": 3.5043, + "step": 4910 + }, + { + "epoch": 0.28, + "learning_rate": 0.00018049006641883888, + "loss": 3.5935, + "step": 4915 + }, + { + "epoch": 0.28, + "learning_rate": 0.00018043060202551674, + "loss": 3.469, + "step": 4920 + }, + { + "epoch": 0.28, + "learning_rate": 0.00018037105697620655, + "loss": 3.5094, + "step": 4925 + }, + { + "epoch": 0.28, + "learning_rate": 0.0001803114313306202, + "loss": 3.4461, + "step": 4930 + }, + { + "epoch": 0.28, + "learning_rate": 0.00018025172514855043, + "loss": 3.4925, + "step": 4935 + }, + { + "epoch": 0.28, + "learning_rate": 0.0001801919384898707, + "loss": 3.4768, + "step": 4940 + }, + { + "epoch": 0.28, + "learning_rate": 0.00018013207141453523, + "loss": 3.5027, + "step": 4945 + }, + { + "epoch": 0.28, + "learning_rate": 0.00018007212398257888, + "loss": 3.6509, + "step": 4950 + }, + { + "epoch": 0.28, + "learning_rate": 0.00018001209625411705, + "loss": 3.5809, + "step": 4955 + }, + { + "epoch": 0.28, + "learning_rate": 0.00017995198828934568, + "loss": 3.5605, + "step": 4960 + }, + { + "epoch": 0.28, + "learning_rate": 0.00017989180014854115, + "loss": 3.3905, + "step": 4965 + }, + { + "epoch": 0.29, + "learning_rate": 0.0001798315318920603, + "loss": 3.4068, + "step": 4970 + }, + { + "epoch": 0.29, + "learning_rate": 0.00017977118358034023, + "loss": 3.4062, + "step": 4975 + }, + { + "epoch": 0.29, + "learning_rate": 0.0001797107552738984, + "loss": 3.316, + "step": 4980 + }, + { + "epoch": 0.29, + "learning_rate": 0.00017965024703333246, + "loss": 3.3665, + "step": 4985 + }, + { + "epoch": 0.29, + "learning_rate": 0.0001795896589193202, + "loss": 3.4396, + "step": 4990 + }, + { + "epoch": 0.29, + "learning_rate": 0.00017952899099261943, + "loss": 3.4862, + "step": 4995 + }, + { + "epoch": 0.29, + "learning_rate": 0.00017946824331406823, + "loss": 3.5381, + "step": 5000 + }, + { + "epoch": 0.29, + "learning_rate": 0.00017940741594458444, + "loss": 3.6569, + "step": 5005 + }, + { + "epoch": 0.29, + "learning_rate": 0.00017934650894516584, + "loss": 3.6969, + "step": 5010 + }, + { + "epoch": 0.29, + "learning_rate": 0.00017928552237689015, + "loss": 3.6113, + "step": 5015 + }, + { + "epoch": 0.29, + "learning_rate": 0.00017922445630091485, + "loss": 3.5132, + "step": 5020 + }, + { + "epoch": 0.29, + "learning_rate": 0.0001791633107784771, + "loss": 3.613, + "step": 5025 + }, + { + "epoch": 0.29, + "learning_rate": 0.0001791020858708938, + "loss": 3.4321, + "step": 5030 + }, + { + "epoch": 0.29, + "learning_rate": 0.00017904078163956142, + "loss": 3.4922, + "step": 5035 + }, + { + "epoch": 0.29, + "learning_rate": 0.00017897939814595596, + "loss": 3.4979, + "step": 5040 + }, + { + "epoch": 0.29, + "learning_rate": 0.00017891793545163297, + "loss": 3.4183, + "step": 5045 + }, + { + "epoch": 0.29, + "learning_rate": 0.00017885639361822728, + "loss": 3.4688, + "step": 5050 + }, + { + "epoch": 0.29, + "learning_rate": 0.00017879477270745328, + "loss": 3.4716, + "step": 5055 + }, + { + "epoch": 0.29, + "learning_rate": 0.0001787330727811045, + "loss": 3.4939, + "step": 5060 + }, + { + "epoch": 0.29, + "learning_rate": 0.00017867129390105384, + "loss": 3.5855, + "step": 5065 + }, + { + "epoch": 0.29, + "learning_rate": 0.0001786094361292532, + "loss": 3.6108, + "step": 5070 + }, + { + "epoch": 0.29, + "learning_rate": 0.00017854749952773372, + "loss": 3.568, + "step": 5075 + }, + { + "epoch": 0.29, + "learning_rate": 0.00017848548415860563, + "loss": 3.6623, + "step": 5080 + }, + { + "epoch": 0.29, + "learning_rate": 0.00017842339008405803, + "loss": 3.6116, + "step": 5085 + }, + { + "epoch": 0.29, + "learning_rate": 0.00017836121736635894, + "loss": 3.5552, + "step": 5090 + }, + { + "epoch": 0.29, + "learning_rate": 0.00017829896606785543, + "loss": 3.5208, + "step": 5095 + }, + { + "epoch": 0.29, + "learning_rate": 0.00017823663625097312, + "loss": 3.5107, + "step": 5100 + }, + { + "epoch": 0.29, + "learning_rate": 0.00017817422797821656, + "loss": 3.4409, + "step": 5105 + }, + { + "epoch": 0.29, + "learning_rate": 0.0001781117413121689, + "loss": 3.4277, + "step": 5110 + }, + { + "epoch": 0.29, + "learning_rate": 0.00017804917631549189, + "loss": 3.6021, + "step": 5115 + }, + { + "epoch": 0.29, + "learning_rate": 0.00017798653305092584, + "loss": 3.5987, + "step": 5120 + }, + { + "epoch": 0.29, + "learning_rate": 0.00017792381158128956, + "loss": 3.6167, + "step": 5125 + }, + { + "epoch": 0.29, + "learning_rate": 0.00017786101196948034, + "loss": 3.5973, + "step": 5130 + }, + { + "epoch": 0.29, + "learning_rate": 0.00017779813427847368, + "loss": 3.6569, + "step": 5135 + }, + { + "epoch": 0.29, + "learning_rate": 0.00017773517857132355, + "loss": 3.5712, + "step": 5140 + }, + { + "epoch": 0.3, + "learning_rate": 0.00017767214491116198, + "loss": 3.623, + "step": 5145 + }, + { + "epoch": 0.3, + "learning_rate": 0.00017760903336119937, + "loss": 3.4881, + "step": 5150 + }, + { + "epoch": 0.3, + "learning_rate": 0.00017754584398472405, + "loss": 3.4635, + "step": 5155 + }, + { + "epoch": 0.3, + "learning_rate": 0.0001774825768451025, + "loss": 3.4988, + "step": 5160 + }, + { + "epoch": 0.3, + "learning_rate": 0.00017741923200577917, + "loss": 3.5065, + "step": 5165 + }, + { + "epoch": 0.3, + "learning_rate": 0.00017735580953027636, + "loss": 3.4935, + "step": 5170 + }, + { + "epoch": 0.3, + "learning_rate": 0.00017729230948219428, + "loss": 3.6191, + "step": 5175 + }, + { + "epoch": 0.3, + "learning_rate": 0.00017722873192521096, + "loss": 3.5619, + "step": 5180 + }, + { + "epoch": 0.3, + "learning_rate": 0.00017716507692308207, + "loss": 3.5122, + "step": 5185 + }, + { + "epoch": 0.3, + "learning_rate": 0.000177101344539641, + "loss": 3.4819, + "step": 5190 + }, + { + "epoch": 0.3, + "learning_rate": 0.00017703753483879874, + "loss": 3.5116, + "step": 5195 + }, + { + "epoch": 0.3, + "learning_rate": 0.0001769736478845438, + "loss": 3.4395, + "step": 5200 + }, + { + "epoch": 0.3, + "learning_rate": 0.00017690968374094217, + "loss": 3.4563, + "step": 5205 + }, + { + "epoch": 0.3, + "learning_rate": 0.00017684564247213722, + "loss": 3.5256, + "step": 5210 + }, + { + "epoch": 0.3, + "learning_rate": 0.00017678152414234968, + "loss": 3.6109, + "step": 5215 + }, + { + "epoch": 0.3, + "learning_rate": 0.00017671732881587756, + "loss": 3.5264, + "step": 5220 + }, + { + "epoch": 0.3, + "learning_rate": 0.0001766530565570961, + "loss": 3.6489, + "step": 5225 + }, + { + "epoch": 0.3, + "learning_rate": 0.00017658870743045757, + "loss": 3.5348, + "step": 5230 + }, + { + "epoch": 0.3, + "learning_rate": 0.00017652428150049152, + "loss": 3.6791, + "step": 5235 + }, + { + "epoch": 0.3, + "learning_rate": 0.0001764597788318044, + "loss": 3.5172, + "step": 5240 + }, + { + "epoch": 0.3, + "learning_rate": 0.00017639519948907961, + "loss": 3.4979, + "step": 5245 + }, + { + "epoch": 0.3, + "learning_rate": 0.00017633054353707745, + "loss": 3.4882, + "step": 5250 + }, + { + "epoch": 0.3, + "learning_rate": 0.0001762658110406351, + "loss": 3.4439, + "step": 5255 + }, + { + "epoch": 0.3, + "learning_rate": 0.00017620100206466635, + "loss": 3.4567, + "step": 5260 + }, + { + "epoch": 0.3, + "learning_rate": 0.00017613611667416192, + "loss": 3.3919, + "step": 5265 + }, + { + "epoch": 0.3, + "learning_rate": 0.00017607115493418896, + "loss": 3.4757, + "step": 5270 + }, + { + "epoch": 0.3, + "learning_rate": 0.00017600611690989123, + "loss": 3.5503, + "step": 5275 + }, + { + "epoch": 0.3, + "learning_rate": 0.00017594100266648906, + "loss": 3.5627, + "step": 5280 + }, + { + "epoch": 0.3, + "learning_rate": 0.0001758758122692791, + "loss": 3.5196, + "step": 5285 + }, + { + "epoch": 0.3, + "learning_rate": 0.00017581054578363445, + "loss": 3.6055, + "step": 5290 + }, + { + "epoch": 0.3, + "learning_rate": 0.00017574520327500451, + "loss": 3.5199, + "step": 5295 + }, + { + "epoch": 0.3, + "learning_rate": 0.0001756797848089149, + "loss": 3.58, + "step": 5300 + }, + { + "epoch": 0.3, + "learning_rate": 0.00017561429045096733, + "loss": 3.5646, + "step": 5305 + }, + { + "epoch": 0.3, + "learning_rate": 0.00017554872026683978, + "loss": 3.5007, + "step": 5310 + }, + { + "epoch": 0.3, + "learning_rate": 0.00017548307432228608, + "loss": 3.6639, + "step": 5315 + }, + { + "epoch": 0.31, + "learning_rate": 0.00017541735268313623, + "loss": 3.5978, + "step": 5320 + }, + { + "epoch": 0.31, + "learning_rate": 0.000175351555415296, + "loss": 3.5732, + "step": 5325 + }, + { + "epoch": 0.31, + "learning_rate": 0.00017528568258474704, + "loss": 3.5469, + "step": 5330 + }, + { + "epoch": 0.31, + "learning_rate": 0.00017521973425754675, + "loss": 3.586, + "step": 5335 + }, + { + "epoch": 0.31, + "learning_rate": 0.00017515371049982824, + "loss": 3.4719, + "step": 5340 + }, + { + "epoch": 0.31, + "learning_rate": 0.00017508761137780037, + "loss": 3.4752, + "step": 5345 + }, + { + "epoch": 0.31, + "learning_rate": 0.00017502143695774741, + "loss": 3.4714, + "step": 5350 + }, + { + "epoch": 0.31, + "learning_rate": 0.00017495518730602924, + "loss": 3.5341, + "step": 5355 + }, + { + "epoch": 0.31, + "learning_rate": 0.00017488886248908118, + "loss": 3.5975, + "step": 5360 + }, + { + "epoch": 0.31, + "learning_rate": 0.0001748224625734139, + "loss": 3.6289, + "step": 5365 + }, + { + "epoch": 0.31, + "learning_rate": 0.00017475598762561333, + "loss": 3.7205, + "step": 5370 + }, + { + "epoch": 0.31, + "learning_rate": 0.00017468943771234075, + "loss": 3.7765, + "step": 5375 + }, + { + "epoch": 0.31, + "learning_rate": 0.00017462281290033256, + "loss": 3.8245, + "step": 5380 + }, + { + "epoch": 0.31, + "learning_rate": 0.00017455611325640024, + "loss": 3.6582, + "step": 5385 + }, + { + "epoch": 0.31, + "learning_rate": 0.00017448933884743037, + "loss": 3.5739, + "step": 5390 + }, + { + "epoch": 0.31, + "learning_rate": 0.0001744224897403845, + "loss": 3.5564, + "step": 5395 + }, + { + "epoch": 0.31, + "learning_rate": 0.00017435556600229902, + "loss": 3.5791, + "step": 5400 + }, + { + "epoch": 0.31, + "learning_rate": 0.0001742885677002852, + "loss": 3.6856, + "step": 5405 + }, + { + "epoch": 0.31, + "learning_rate": 0.00017422149490152914, + "loss": 3.6104, + "step": 5410 + }, + { + "epoch": 0.31, + "learning_rate": 0.00017415434767329154, + "loss": 3.5441, + "step": 5415 + }, + { + "epoch": 0.31, + "learning_rate": 0.0001740871260829078, + "loss": 3.4595, + "step": 5420 + }, + { + "epoch": 0.31, + "learning_rate": 0.0001740198301977879, + "loss": 3.5485, + "step": 5425 + }, + { + "epoch": 0.31, + "learning_rate": 0.0001739524600854163, + "loss": 3.4794, + "step": 5430 + }, + { + "epoch": 0.31, + "learning_rate": 0.0001738850158133519, + "loss": 3.6365, + "step": 5435 + }, + { + "epoch": 0.31, + "learning_rate": 0.00017381749744922796, + "loss": 3.7492, + "step": 5440 + }, + { + "epoch": 0.31, + "learning_rate": 0.00017374990506075207, + "loss": 3.733, + "step": 5445 + }, + { + "epoch": 0.31, + "learning_rate": 0.00017368223871570596, + "loss": 3.7354, + "step": 5450 + }, + { + "epoch": 0.31, + "learning_rate": 0.0001736144984819457, + "loss": 3.6032, + "step": 5455 + }, + { + "epoch": 0.31, + "learning_rate": 0.00017354668442740126, + "loss": 3.6179, + "step": 5460 + }, + { + "epoch": 0.31, + "learning_rate": 0.00017347879662007676, + "loss": 3.6409, + "step": 5465 + }, + { + "epoch": 0.31, + "learning_rate": 0.00017341083512805025, + "loss": 3.6133, + "step": 5470 + }, + { + "epoch": 0.31, + "learning_rate": 0.00017334280001947362, + "loss": 3.5418, + "step": 5475 + }, + { + "epoch": 0.31, + "learning_rate": 0.00017327469136257272, + "loss": 3.5113, + "step": 5480 + }, + { + "epoch": 0.31, + "learning_rate": 0.000173206509225647, + "loss": 3.4768, + "step": 5485 + }, + { + "epoch": 0.31, + "learning_rate": 0.00017313825367706967, + "loss": 3.5098, + "step": 5490 + }, + { + "epoch": 0.32, + "learning_rate": 0.00017306992478528753, + "loss": 3.5288, + "step": 5495 + }, + { + "epoch": 0.32, + "learning_rate": 0.000173001522618821, + "loss": 3.5987, + "step": 5500 + }, + { + "epoch": 0.32, + "learning_rate": 0.00017293304724626385, + "loss": 3.4316, + "step": 5505 + }, + { + "epoch": 0.32, + "learning_rate": 0.0001728644987362834, + "loss": 3.3949, + "step": 5510 + }, + { + "epoch": 0.32, + "learning_rate": 0.00017279587715762022, + "loss": 3.4527, + "step": 5515 + }, + { + "epoch": 0.32, + "learning_rate": 0.0001727271825790882, + "loss": 3.4952, + "step": 5520 + }, + { + "epoch": 0.32, + "learning_rate": 0.0001726584150695744, + "loss": 3.5837, + "step": 5525 + }, + { + "epoch": 0.32, + "learning_rate": 0.00017258957469803906, + "loss": 3.6553, + "step": 5530 + }, + { + "epoch": 0.32, + "learning_rate": 0.0001725206615335154, + "loss": 3.6109, + "step": 5535 + }, + { + "epoch": 0.32, + "learning_rate": 0.00017245167564510974, + "loss": 3.6551, + "step": 5540 + }, + { + "epoch": 0.32, + "learning_rate": 0.00017238261710200128, + "loss": 3.6372, + "step": 5545 + }, + { + "epoch": 0.32, + "learning_rate": 0.0001723134859734421, + "loss": 3.545, + "step": 5550 + }, + { + "epoch": 0.32, + "learning_rate": 0.00017224428232875703, + "loss": 3.5932, + "step": 5555 + }, + { + "epoch": 0.32, + "learning_rate": 0.00017217500623734365, + "loss": 3.5913, + "step": 5560 + }, + { + "epoch": 0.32, + "learning_rate": 0.00017210565776867216, + "loss": 3.5502, + "step": 5565 + }, + { + "epoch": 0.32, + "learning_rate": 0.00017203623699228537, + "loss": 3.5324, + "step": 5570 + }, + { + "epoch": 0.32, + "learning_rate": 0.0001719667439777986, + "loss": 3.6524, + "step": 5575 + }, + { + "epoch": 0.32, + "learning_rate": 0.00017189717879489958, + "loss": 3.4114, + "step": 5580 + }, + { + "epoch": 0.32, + "learning_rate": 0.00017182754151334842, + "loss": 3.5108, + "step": 5585 + }, + { + "epoch": 0.32, + "learning_rate": 0.00017175783220297762, + "loss": 3.6321, + "step": 5590 + }, + { + "epoch": 0.32, + "learning_rate": 0.00017168805093369173, + "loss": 3.6101, + "step": 5595 + }, + { + "epoch": 0.32, + "learning_rate": 0.00017161819777546767, + "loss": 3.5566, + "step": 5600 + }, + { + "epoch": 0.32, + "learning_rate": 0.0001715482727983542, + "loss": 3.6725, + "step": 5605 + }, + { + "epoch": 0.32, + "learning_rate": 0.00017147827607247242, + "loss": 3.6074, + "step": 5610 + }, + { + "epoch": 0.32, + "learning_rate": 0.00017140820766801506, + "loss": 3.6478, + "step": 5615 + }, + { + "epoch": 0.32, + "learning_rate": 0.00017133806765524693, + "loss": 3.4643, + "step": 5620 + }, + { + "epoch": 0.32, + "learning_rate": 0.0001712678561045046, + "loss": 3.4981, + "step": 5625 + }, + { + "epoch": 0.32, + "learning_rate": 0.00017119757308619639, + "loss": 3.6574, + "step": 5630 + }, + { + "epoch": 0.32, + "learning_rate": 0.00017112721867080217, + "loss": 3.5729, + "step": 5635 + }, + { + "epoch": 0.32, + "learning_rate": 0.0001710567929288736, + "loss": 3.7408, + "step": 5640 + }, + { + "epoch": 0.32, + "learning_rate": 0.00017098629593103378, + "loss": 3.7945, + "step": 5645 + }, + { + "epoch": 0.32, + "learning_rate": 0.00017091572774797714, + "loss": 3.6832, + "step": 5650 + }, + { + "epoch": 0.32, + "learning_rate": 0.00017084508845046975, + "loss": 3.5869, + "step": 5655 + }, + { + "epoch": 0.32, + "learning_rate": 0.00017077437810934882, + "loss": 3.5659, + "step": 5660 + }, + { + "epoch": 0.33, + "learning_rate": 0.0001707035967955228, + "loss": 3.6252, + "step": 5665 + }, + { + "epoch": 0.33, + "learning_rate": 0.00017063274457997137, + "loss": 3.5674, + "step": 5670 + }, + { + "epoch": 0.33, + "learning_rate": 0.00017056182153374526, + "loss": 3.6366, + "step": 5675 + }, + { + "epoch": 0.33, + "learning_rate": 0.00017049082772796633, + "loss": 3.4651, + "step": 5680 + }, + { + "epoch": 0.33, + "learning_rate": 0.00017041976323382726, + "loss": 3.6629, + "step": 5685 + }, + { + "epoch": 0.33, + "learning_rate": 0.00017034862812259174, + "loss": 3.6828, + "step": 5690 + }, + { + "epoch": 0.33, + "learning_rate": 0.00017027742246559417, + "loss": 3.6665, + "step": 5695 + }, + { + "epoch": 0.33, + "learning_rate": 0.00017020614633423976, + "loss": 3.731, + "step": 5700 + }, + { + "epoch": 0.33, + "learning_rate": 0.00017013479980000436, + "loss": 3.5837, + "step": 5705 + }, + { + "epoch": 0.33, + "learning_rate": 0.00017006338293443446, + "loss": 3.4745, + "step": 5710 + }, + { + "epoch": 0.33, + "learning_rate": 0.00016999189580914708, + "loss": 3.558, + "step": 5715 + }, + { + "epoch": 0.33, + "learning_rate": 0.00016992033849582962, + "loss": 3.4107, + "step": 5720 + }, + { + "epoch": 0.33, + "learning_rate": 0.00016984871106623988, + "loss": 3.4014, + "step": 5725 + }, + { + "epoch": 0.33, + "learning_rate": 0.00016977701359220613, + "loss": 3.4296, + "step": 5730 + }, + { + "epoch": 0.33, + "learning_rate": 0.00016970524614562664, + "loss": 3.5029, + "step": 5735 + }, + { + "epoch": 0.33, + "learning_rate": 0.00016963340879847002, + "loss": 3.5958, + "step": 5740 + }, + { + "epoch": 0.33, + "learning_rate": 0.0001695615016227749, + "loss": 3.6557, + "step": 5745 + }, + { + "epoch": 0.33, + "learning_rate": 0.00016948952469065, + "loss": 3.7016, + "step": 5750 + }, + { + "epoch": 0.33, + "learning_rate": 0.00016941747807427387, + "loss": 3.6829, + "step": 5755 + }, + { + "epoch": 0.33, + "learning_rate": 0.00016934536184589512, + "loss": 3.565, + "step": 5760 + }, + { + "epoch": 0.33, + "learning_rate": 0.00016927317607783198, + "loss": 3.5091, + "step": 5765 + }, + { + "epoch": 0.33, + "learning_rate": 0.00016920092084247255, + "loss": 3.456, + "step": 5770 + }, + { + "epoch": 0.33, + "learning_rate": 0.0001691285962122745, + "loss": 3.4439, + "step": 5775 + }, + { + "epoch": 0.33, + "learning_rate": 0.00016905620225976517, + "loss": 3.4839, + "step": 5780 + }, + { + "epoch": 0.33, + "learning_rate": 0.00016898373905754137, + "loss": 3.3974, + "step": 5785 + }, + { + "epoch": 0.33, + "learning_rate": 0.00016891120667826933, + "loss": 3.4783, + "step": 5790 + }, + { + "epoch": 0.33, + "learning_rate": 0.00016883860519468472, + "loss": 3.5309, + "step": 5795 + }, + { + "epoch": 0.33, + "learning_rate": 0.0001687659346795925, + "loss": 3.4802, + "step": 5800 + }, + { + "epoch": 0.33, + "learning_rate": 0.00016869319520586675, + "loss": 3.5528, + "step": 5805 + }, + { + "epoch": 0.33, + "learning_rate": 0.00016862038684645078, + "loss": 3.558, + "step": 5810 + }, + { + "epoch": 0.33, + "learning_rate": 0.00016854750967435704, + "loss": 3.5805, + "step": 5815 + }, + { + "epoch": 0.33, + "learning_rate": 0.0001684745637626669, + "loss": 3.5233, + "step": 5820 + }, + { + "epoch": 0.33, + "learning_rate": 0.00016840154918453063, + "loss": 3.576, + "step": 5825 + }, + { + "epoch": 0.33, + "learning_rate": 0.00016832846601316749, + "loss": 3.5552, + "step": 5830 + }, + { + "epoch": 0.33, + "learning_rate": 0.00016825531432186543, + "loss": 3.5863, + "step": 5835 + }, + { + "epoch": 0.34, + "learning_rate": 0.00016818209418398107, + "loss": 3.5365, + "step": 5840 + }, + { + "epoch": 0.34, + "learning_rate": 0.0001681088056729398, + "loss": 3.6114, + "step": 5845 + }, + { + "epoch": 0.34, + "learning_rate": 0.00016803544886223547, + "loss": 3.6611, + "step": 5850 + }, + { + "epoch": 0.34, + "learning_rate": 0.00016796202382543047, + "loss": 3.5575, + "step": 5855 + }, + { + "epoch": 0.34, + "learning_rate": 0.00016788853063615556, + "loss": 3.608, + "step": 5860 + }, + { + "epoch": 0.34, + "learning_rate": 0.0001678149693681099, + "loss": 3.5907, + "step": 5865 + }, + { + "epoch": 0.34, + "learning_rate": 0.0001677413400950609, + "loss": 3.5429, + "step": 5870 + }, + { + "epoch": 0.34, + "learning_rate": 0.00016766764289084414, + "loss": 3.5161, + "step": 5875 + }, + { + "epoch": 0.34, + "learning_rate": 0.00016759387782936335, + "loss": 3.532, + "step": 5880 + }, + { + "epoch": 0.34, + "learning_rate": 0.00016752004498459032, + "loss": 3.5109, + "step": 5885 + }, + { + "epoch": 0.34, + "learning_rate": 0.00016744614443056475, + "loss": 3.626, + "step": 5890 + }, + { + "epoch": 0.34, + "learning_rate": 0.00016737217624139433, + "loss": 3.494, + "step": 5895 + }, + { + "epoch": 0.34, + "learning_rate": 0.0001672981404912545, + "loss": 3.4936, + "step": 5900 + }, + { + "epoch": 0.34, + "learning_rate": 0.00016722403725438845, + "loss": 3.5505, + "step": 5905 + }, + { + "epoch": 0.34, + "learning_rate": 0.00016714986660510715, + "loss": 3.5738, + "step": 5910 + }, + { + "epoch": 0.34, + "learning_rate": 0.000167075628617789, + "loss": 3.527, + "step": 5915 + }, + { + "epoch": 0.34, + "learning_rate": 0.00016700132336688005, + "loss": 3.5489, + "step": 5920 + }, + { + "epoch": 0.34, + "learning_rate": 0.0001669269509268938, + "loss": 3.5462, + "step": 5925 + }, + { + "epoch": 0.34, + "learning_rate": 0.00016685251137241113, + "loss": 3.5018, + "step": 5930 + }, + { + "epoch": 0.34, + "learning_rate": 0.0001667780047780801, + "loss": 3.5255, + "step": 5935 + }, + { + "epoch": 0.34, + "learning_rate": 0.00016670343121861613, + "loss": 3.4632, + "step": 5940 + }, + { + "epoch": 0.34, + "learning_rate": 0.00016662879076880178, + "loss": 3.5284, + "step": 5945 + }, + { + "epoch": 0.34, + "learning_rate": 0.00016655408350348664, + "loss": 3.5107, + "step": 5950 + }, + { + "epoch": 0.34, + "learning_rate": 0.0001664793094975873, + "loss": 3.5045, + "step": 5955 + }, + { + "epoch": 0.34, + "learning_rate": 0.00016640446882608737, + "loss": 3.5215, + "step": 5960 + }, + { + "epoch": 0.34, + "learning_rate": 0.00016632956156403716, + "loss": 3.6234, + "step": 5965 + }, + { + "epoch": 0.34, + "learning_rate": 0.00016625458778655387, + "loss": 3.6275, + "step": 5970 + }, + { + "epoch": 0.34, + "learning_rate": 0.00016617954756882144, + "loss": 3.7022, + "step": 5975 + }, + { + "epoch": 0.34, + "learning_rate": 0.00016610444098609026, + "loss": 3.7182, + "step": 5980 + }, + { + "epoch": 0.34, + "learning_rate": 0.00016602926811367744, + "loss": 3.6733, + "step": 5985 + }, + { + "epoch": 0.34, + "learning_rate": 0.00016595402902696646, + "loss": 3.4904, + "step": 5990 + }, + { + "epoch": 0.34, + "learning_rate": 0.0001658787238014073, + "loss": 3.566, + "step": 5995 + }, + { + "epoch": 0.34, + "learning_rate": 0.00016580335251251623, + "loss": 3.6039, + "step": 6000 + }, + { + "epoch": 0.34, + "learning_rate": 0.00016572791523587562, + "loss": 3.6402, + "step": 6005 + }, + { + "epoch": 0.34, + "learning_rate": 0.00016565241204713428, + "loss": 3.7669, + "step": 6010 + }, + { + "epoch": 0.35, + "learning_rate": 0.0001655768430220069, + "loss": 3.6709, + "step": 6015 + }, + { + "epoch": 0.35, + "learning_rate": 0.0001655012082362743, + "loss": 3.5203, + "step": 6020 + }, + { + "epoch": 0.35, + "learning_rate": 0.00016542550776578322, + "loss": 3.5563, + "step": 6025 + }, + { + "epoch": 0.35, + "learning_rate": 0.00016534974168644625, + "loss": 3.5158, + "step": 6030 + }, + { + "epoch": 0.35, + "learning_rate": 0.0001652739100742417, + "loss": 3.4622, + "step": 6035 + }, + { + "epoch": 0.35, + "learning_rate": 0.00016519801300521385, + "loss": 3.4893, + "step": 6040 + }, + { + "epoch": 0.35, + "learning_rate": 0.0001651220505554723, + "loss": 3.6005, + "step": 6045 + }, + { + "epoch": 0.35, + "learning_rate": 0.00016504602280119243, + "loss": 3.4028, + "step": 6050 + }, + { + "epoch": 0.35, + "learning_rate": 0.0001649699298186151, + "loss": 3.5378, + "step": 6055 + }, + { + "epoch": 0.35, + "learning_rate": 0.0001648937716840464, + "loss": 3.5115, + "step": 6060 + }, + { + "epoch": 0.35, + "learning_rate": 0.00016481754847385793, + "loss": 3.4949, + "step": 6065 + }, + { + "epoch": 0.35, + "learning_rate": 0.00016474126026448652, + "loss": 3.4338, + "step": 6070 + }, + { + "epoch": 0.35, + "learning_rate": 0.00016466490713243416, + "loss": 3.4798, + "step": 6075 + }, + { + "epoch": 0.35, + "learning_rate": 0.00016458848915426792, + "loss": 3.5758, + "step": 6080 + }, + { + "epoch": 0.35, + "learning_rate": 0.00016451200640661993, + "loss": 3.5334, + "step": 6085 + }, + { + "epoch": 0.35, + "learning_rate": 0.00016443545896618723, + "loss": 3.5144, + "step": 6090 + }, + { + "epoch": 0.35, + "learning_rate": 0.0001643588469097318, + "loss": 3.5127, + "step": 6095 + }, + { + "epoch": 0.35, + "learning_rate": 0.00016428217031408038, + "loss": 3.4371, + "step": 6100 + }, + { + "epoch": 0.35, + "learning_rate": 0.0001642054292561244, + "loss": 3.5499, + "step": 6105 + }, + { + "epoch": 0.35, + "learning_rate": 0.00016412862381282004, + "loss": 3.4309, + "step": 6110 + }, + { + "epoch": 0.35, + "learning_rate": 0.00016405175406118786, + "loss": 3.4376, + "step": 6115 + }, + { + "epoch": 0.35, + "learning_rate": 0.00016397482007831312, + "loss": 3.4657, + "step": 6120 + }, + { + "epoch": 0.35, + "learning_rate": 0.00016389782194134534, + "loss": 3.4929, + "step": 6125 + }, + { + "epoch": 0.35, + "learning_rate": 0.00016382075972749843, + "loss": 3.6048, + "step": 6130 + }, + { + "epoch": 0.35, + "learning_rate": 0.00016374363351405054, + "loss": 3.6179, + "step": 6135 + }, + { + "epoch": 0.35, + "learning_rate": 0.00016366644337834405, + "loss": 3.6143, + "step": 6140 + }, + { + "epoch": 0.35, + "learning_rate": 0.00016358918939778536, + "loss": 3.5434, + "step": 6145 + }, + { + "epoch": 0.35, + "learning_rate": 0.00016351187164984494, + "loss": 3.543, + "step": 6150 + }, + { + "epoch": 0.35, + "learning_rate": 0.00016343449021205726, + "loss": 3.4592, + "step": 6155 + }, + { + "epoch": 0.35, + "learning_rate": 0.00016335704516202051, + "loss": 3.5511, + "step": 6160 + }, + { + "epoch": 0.35, + "learning_rate": 0.00016327953657739678, + "loss": 3.5785, + "step": 6165 + }, + { + "epoch": 0.35, + "learning_rate": 0.0001632019645359119, + "loss": 3.6696, + "step": 6170 + }, + { + "epoch": 0.35, + "learning_rate": 0.00016312432911535528, + "loss": 3.509, + "step": 6175 + }, + { + "epoch": 0.35, + "learning_rate": 0.00016304663039357986, + "loss": 3.5554, + "step": 6180 + }, + { + "epoch": 0.35, + "learning_rate": 0.0001629688684485021, + "loss": 3.5759, + "step": 6185 + }, + { + "epoch": 0.36, + "learning_rate": 0.00016289104335810185, + "loss": 3.7073, + "step": 6190 + }, + { + "epoch": 0.36, + "learning_rate": 0.00016281315520042233, + "loss": 3.6145, + "step": 6195 + }, + { + "epoch": 0.36, + "learning_rate": 0.0001627352040535699, + "loss": 3.6244, + "step": 6200 + }, + { + "epoch": 0.36, + "learning_rate": 0.00016265718999571415, + "loss": 3.5074, + "step": 6205 + }, + { + "epoch": 0.36, + "learning_rate": 0.0001625791131050878, + "loss": 3.4655, + "step": 6210 + }, + { + "epoch": 0.36, + "learning_rate": 0.0001625009734599865, + "loss": 3.5015, + "step": 6215 + }, + { + "epoch": 0.36, + "learning_rate": 0.00016242277113876887, + "loss": 3.5042, + "step": 6220 + }, + { + "epoch": 0.36, + "learning_rate": 0.00016234450621985635, + "loss": 3.5255, + "step": 6225 + }, + { + "epoch": 0.36, + "learning_rate": 0.00016226617878173317, + "loss": 3.4764, + "step": 6230 + }, + { + "epoch": 0.36, + "learning_rate": 0.00016218778890294636, + "loss": 3.5077, + "step": 6235 + }, + { + "epoch": 0.36, + "learning_rate": 0.00016210933666210533, + "loss": 3.4549, + "step": 6240 + }, + { + "epoch": 0.36, + "learning_rate": 0.0001620308221378822, + "loss": 3.429, + "step": 6245 + }, + { + "epoch": 0.36, + "learning_rate": 0.00016195224540901156, + "loss": 3.4301, + "step": 6250 + }, + { + "epoch": 0.36, + "learning_rate": 0.00016187360655429034, + "loss": 3.4431, + "step": 6255 + }, + { + "epoch": 0.36, + "learning_rate": 0.0001617949056525777, + "loss": 3.5251, + "step": 6260 + }, + { + "epoch": 0.36, + "learning_rate": 0.0001617161427827951, + "loss": 3.4229, + "step": 6265 + }, + { + "epoch": 0.36, + "learning_rate": 0.0001616373180239261, + "loss": 3.3444, + "step": 6270 + }, + { + "epoch": 0.36, + "learning_rate": 0.0001615584314550164, + "loss": 3.4663, + "step": 6275 + }, + { + "epoch": 0.36, + "learning_rate": 0.00016147948315517357, + "loss": 3.6094, + "step": 6280 + }, + { + "epoch": 0.36, + "learning_rate": 0.00016140047320356723, + "loss": 3.469, + "step": 6285 + }, + { + "epoch": 0.36, + "learning_rate": 0.00016132140167942862, + "loss": 3.437, + "step": 6290 + }, + { + "epoch": 0.36, + "learning_rate": 0.0001612422686620509, + "loss": 3.3836, + "step": 6295 + }, + { + "epoch": 0.36, + "learning_rate": 0.0001611630742307889, + "loss": 3.3962, + "step": 6300 + }, + { + "epoch": 0.36, + "learning_rate": 0.00016108381846505885, + "loss": 3.4229, + "step": 6305 + }, + { + "epoch": 0.36, + "learning_rate": 0.0001610045014443387, + "loss": 3.3483, + "step": 6310 + }, + { + "epoch": 0.36, + "learning_rate": 0.00016092512324816772, + "loss": 3.4173, + "step": 6315 + }, + { + "epoch": 0.36, + "learning_rate": 0.00016084568395614648, + "loss": 3.3849, + "step": 6320 + }, + { + "epoch": 0.36, + "learning_rate": 0.00016076618364793696, + "loss": 3.4033, + "step": 6325 + }, + { + "epoch": 0.36, + "learning_rate": 0.0001606866224032622, + "loss": 3.3968, + "step": 6330 + }, + { + "epoch": 0.36, + "learning_rate": 0.0001606070003019064, + "loss": 3.37, + "step": 6335 + }, + { + "epoch": 0.36, + "learning_rate": 0.00016052731742371485, + "loss": 3.4304, + "step": 6340 + }, + { + "epoch": 0.36, + "learning_rate": 0.00016044757384859365, + "loss": 3.5981, + "step": 6345 + }, + { + "epoch": 0.36, + "learning_rate": 0.0001603677696565098, + "loss": 3.4604, + "step": 6350 + }, + { + "epoch": 0.36, + "learning_rate": 0.00016028790492749118, + "loss": 3.522, + "step": 6355 + }, + { + "epoch": 0.36, + "learning_rate": 0.00016020797974162636, + "loss": 3.5371, + "step": 6360 + }, + { + "epoch": 0.37, + "learning_rate": 0.0001601279941790644, + "loss": 3.4421, + "step": 6365 + }, + { + "epoch": 0.37, + "learning_rate": 0.00016004794832001507, + "loss": 3.53, + "step": 6370 + }, + { + "epoch": 0.37, + "learning_rate": 0.0001599678422447485, + "loss": 3.4144, + "step": 6375 + }, + { + "epoch": 0.37, + "learning_rate": 0.00015988767603359526, + "loss": 3.4577, + "step": 6380 + }, + { + "epoch": 0.37, + "learning_rate": 0.00015980744976694622, + "loss": 3.5133, + "step": 6385 + }, + { + "epoch": 0.37, + "learning_rate": 0.00015972716352525242, + "loss": 3.437, + "step": 6390 + }, + { + "epoch": 0.37, + "learning_rate": 0.0001596468173890251, + "loss": 3.5091, + "step": 6395 + }, + { + "epoch": 0.37, + "learning_rate": 0.0001595664114388356, + "loss": 3.5598, + "step": 6400 + }, + { + "epoch": 0.37, + "learning_rate": 0.00015948594575531508, + "loss": 3.4805, + "step": 6405 + }, + { + "epoch": 0.37, + "learning_rate": 0.00015940542041915478, + "loss": 3.5829, + "step": 6410 + }, + { + "epoch": 0.37, + "learning_rate": 0.00015932483551110572, + "loss": 3.4433, + "step": 6415 + }, + { + "epoch": 0.37, + "learning_rate": 0.00015924419111197852, + "loss": 3.5017, + "step": 6420 + }, + { + "epoch": 0.37, + "learning_rate": 0.00015916348730264367, + "loss": 3.4387, + "step": 6425 + }, + { + "epoch": 0.37, + "learning_rate": 0.00015908272416403105, + "loss": 3.506, + "step": 6430 + }, + { + "epoch": 0.37, + "learning_rate": 0.00015900190177713016, + "loss": 3.4489, + "step": 6435 + }, + { + "epoch": 0.37, + "learning_rate": 0.00015892102022298986, + "loss": 3.4474, + "step": 6440 + }, + { + "epoch": 0.37, + "learning_rate": 0.0001588400795827184, + "loss": 3.4771, + "step": 6445 + }, + { + "epoch": 0.37, + "learning_rate": 0.00015875907993748314, + "loss": 3.3982, + "step": 6450 + }, + { + "epoch": 0.37, + "learning_rate": 0.0001586780213685108, + "loss": 3.4813, + "step": 6455 + }, + { + "epoch": 0.37, + "learning_rate": 0.00015859690395708702, + "loss": 3.4482, + "step": 6460 + }, + { + "epoch": 0.37, + "learning_rate": 0.00015851572778455657, + "loss": 3.4593, + "step": 6465 + }, + { + "epoch": 0.37, + "learning_rate": 0.00015843449293232307, + "loss": 3.5203, + "step": 6470 + }, + { + "epoch": 0.37, + "learning_rate": 0.00015835319948184903, + "loss": 3.438, + "step": 6475 + }, + { + "epoch": 0.37, + "learning_rate": 0.0001582718475146557, + "loss": 3.442, + "step": 6480 + }, + { + "epoch": 0.37, + "learning_rate": 0.000158190437112323, + "loss": 3.5365, + "step": 6485 + }, + { + "epoch": 0.37, + "learning_rate": 0.00015810896835648952, + "loss": 3.5282, + "step": 6490 + }, + { + "epoch": 0.37, + "learning_rate": 0.00015802744132885227, + "loss": 3.432, + "step": 6495 + }, + { + "epoch": 0.37, + "learning_rate": 0.0001579458561111667, + "loss": 3.4429, + "step": 6500 + }, + { + "epoch": 0.37, + "learning_rate": 0.0001578642127852467, + "loss": 3.5468, + "step": 6505 + }, + { + "epoch": 0.37, + "learning_rate": 0.00015778251143296437, + "loss": 3.5394, + "step": 6510 + }, + { + "epoch": 0.37, + "learning_rate": 0.00015770075213625, + "loss": 3.4705, + "step": 6515 + }, + { + "epoch": 0.37, + "learning_rate": 0.000157618934977092, + "loss": 3.4472, + "step": 6520 + }, + { + "epoch": 0.37, + "learning_rate": 0.00015753706003753678, + "loss": 3.5572, + "step": 6525 + }, + { + "epoch": 0.37, + "learning_rate": 0.00015745512739968878, + "loss": 3.4419, + "step": 6530 + }, + { + "epoch": 0.37, + "learning_rate": 0.00015737313714571017, + "loss": 3.4559, + "step": 6535 + }, + { + "epoch": 0.38, + "learning_rate": 0.00015729108935782094, + "loss": 3.6062, + "step": 6540 + }, + { + "epoch": 0.38, + "learning_rate": 0.00015720898411829889, + "loss": 3.5413, + "step": 6545 + }, + { + "epoch": 0.38, + "learning_rate": 0.00015712682150947923, + "loss": 3.6413, + "step": 6550 + }, + { + "epoch": 0.38, + "learning_rate": 0.0001570446016137549, + "loss": 3.4494, + "step": 6555 + }, + { + "epoch": 0.38, + "learning_rate": 0.00015696232451357616, + "loss": 3.5807, + "step": 6560 + }, + { + "epoch": 0.38, + "learning_rate": 0.0001568799902914506, + "loss": 3.4453, + "step": 6565 + }, + { + "epoch": 0.38, + "learning_rate": 0.00015679759902994332, + "loss": 3.4969, + "step": 6570 + }, + { + "epoch": 0.38, + "learning_rate": 0.0001567151508116763, + "loss": 3.4427, + "step": 6575 + }, + { + "epoch": 0.38, + "learning_rate": 0.00015663264571932892, + "loss": 3.421, + "step": 6580 + }, + { + "epoch": 0.38, + "learning_rate": 0.0001565500838356374, + "loss": 3.4833, + "step": 6585 + }, + { + "epoch": 0.38, + "learning_rate": 0.00015646746524339497, + "loss": 3.4319, + "step": 6590 + }, + { + "epoch": 0.38, + "learning_rate": 0.00015638479002545182, + "loss": 3.4571, + "step": 6595 + }, + { + "epoch": 0.38, + "learning_rate": 0.00015630205826471478, + "loss": 3.4472, + "step": 6600 + }, + { + "epoch": 0.38, + "learning_rate": 0.00015621927004414747, + "loss": 3.5269, + "step": 6605 + }, + { + "epoch": 0.38, + "learning_rate": 0.0001561364254467701, + "loss": 3.5947, + "step": 6610 + }, + { + "epoch": 0.38, + "learning_rate": 0.00015605352455565937, + "loss": 3.5058, + "step": 6615 + }, + { + "epoch": 0.38, + "learning_rate": 0.0001559705674539486, + "loss": 3.4868, + "step": 6620 + }, + { + "epoch": 0.38, + "learning_rate": 0.0001558875542248272, + "loss": 3.5223, + "step": 6625 + }, + { + "epoch": 0.38, + "learning_rate": 0.0001558044849515411, + "loss": 3.5227, + "step": 6630 + }, + { + "epoch": 0.38, + "learning_rate": 0.00015572135971739242, + "loss": 3.5201, + "step": 6635 + }, + { + "epoch": 0.38, + "learning_rate": 0.0001556381786057392, + "loss": 3.6066, + "step": 6640 + }, + { + "epoch": 0.38, + "learning_rate": 0.00015555494169999578, + "loss": 3.6072, + "step": 6645 + }, + { + "epoch": 0.38, + "learning_rate": 0.00015547164908363224, + "loss": 3.5514, + "step": 6650 + }, + { + "epoch": 0.38, + "learning_rate": 0.00015538830084017456, + "loss": 3.5122, + "step": 6655 + }, + { + "epoch": 0.38, + "learning_rate": 0.00015530489705320463, + "loss": 3.555, + "step": 6660 + }, + { + "epoch": 0.38, + "learning_rate": 0.0001552214378063599, + "loss": 3.5747, + "step": 6665 + }, + { + "epoch": 0.38, + "learning_rate": 0.0001551379231833335, + "loss": 3.5918, + "step": 6670 + }, + { + "epoch": 0.38, + "learning_rate": 0.00015505435326787414, + "loss": 3.5799, + "step": 6675 + }, + { + "epoch": 0.38, + "learning_rate": 0.00015497072814378584, + "loss": 3.6473, + "step": 6680 + }, + { + "epoch": 0.38, + "learning_rate": 0.0001548870478949281, + "loss": 3.5756, + "step": 6685 + }, + { + "epoch": 0.38, + "learning_rate": 0.00015480331260521565, + "loss": 3.5637, + "step": 6690 + }, + { + "epoch": 0.38, + "learning_rate": 0.00015471952235861843, + "loss": 3.5847, + "step": 6695 + }, + { + "epoch": 0.38, + "learning_rate": 0.0001546356772391615, + "loss": 3.5189, + "step": 6700 + }, + { + "epoch": 0.38, + "learning_rate": 0.0001545517773309249, + "loss": 3.453, + "step": 6705 + }, + { + "epoch": 0.38, + "learning_rate": 0.00015446782271804366, + "loss": 3.4587, + "step": 6710 + }, + { + "epoch": 0.39, + "learning_rate": 0.00015438381348470767, + "loss": 3.4759, + "step": 6715 + }, + { + "epoch": 0.39, + "learning_rate": 0.00015429974971516156, + "loss": 3.4924, + "step": 6720 + }, + { + "epoch": 0.39, + "learning_rate": 0.0001542156314937047, + "loss": 3.4918, + "step": 6725 + }, + { + "epoch": 0.39, + "learning_rate": 0.000154131458904691, + "loss": 3.4925, + "step": 6730 + }, + { + "epoch": 0.39, + "learning_rate": 0.00015404723203252894, + "loss": 3.5319, + "step": 6735 + }, + { + "epoch": 0.39, + "learning_rate": 0.0001539629509616814, + "loss": 3.528, + "step": 6740 + }, + { + "epoch": 0.39, + "learning_rate": 0.00015387861577666559, + "loss": 3.5666, + "step": 6745 + }, + { + "epoch": 0.39, + "learning_rate": 0.00015379422656205307, + "loss": 3.4085, + "step": 6750 + }, + { + "epoch": 0.39, + "learning_rate": 0.00015370978340246955, + "loss": 3.5032, + "step": 6755 + }, + { + "epoch": 0.39, + "learning_rate": 0.00015362528638259478, + "loss": 3.4526, + "step": 6760 + }, + { + "epoch": 0.39, + "learning_rate": 0.0001535407355871626, + "loss": 3.3519, + "step": 6765 + }, + { + "epoch": 0.39, + "learning_rate": 0.00015345613110096068, + "loss": 3.4045, + "step": 6770 + }, + { + "epoch": 0.39, + "learning_rate": 0.00015337147300883066, + "loss": 3.4666, + "step": 6775 + }, + { + "epoch": 0.39, + "learning_rate": 0.0001532867613956678, + "loss": 3.4439, + "step": 6780 + }, + { + "epoch": 0.39, + "learning_rate": 0.0001532019963464211, + "loss": 3.4539, + "step": 6785 + }, + { + "epoch": 0.39, + "learning_rate": 0.00015311717794609325, + "loss": 3.4985, + "step": 6790 + }, + { + "epoch": 0.39, + "learning_rate": 0.0001530323062797402, + "loss": 3.5674, + "step": 6795 + }, + { + "epoch": 0.39, + "learning_rate": 0.00015294738143247148, + "loss": 3.5435, + "step": 6800 + }, + { + "epoch": 0.39, + "learning_rate": 0.00015286240348944997, + "loss": 3.5603, + "step": 6805 + }, + { + "epoch": 0.39, + "learning_rate": 0.00015277737253589164, + "loss": 3.4159, + "step": 6810 + }, + { + "epoch": 0.39, + "learning_rate": 0.00015269228865706584, + "loss": 3.4943, + "step": 6815 + }, + { + "epoch": 0.39, + "learning_rate": 0.0001526071519382948, + "loss": 3.4371, + "step": 6820 + }, + { + "epoch": 0.39, + "learning_rate": 0.00015252196246495382, + "loss": 3.5355, + "step": 6825 + }, + { + "epoch": 0.39, + "learning_rate": 0.00015243672032247112, + "loss": 3.464, + "step": 6830 + }, + { + "epoch": 0.39, + "learning_rate": 0.00015235142559632766, + "loss": 3.5677, + "step": 6835 + }, + { + "epoch": 0.39, + "learning_rate": 0.00015226607837205727, + "loss": 3.4668, + "step": 6840 + }, + { + "epoch": 0.39, + "learning_rate": 0.00015218067873524625, + "loss": 3.4798, + "step": 6845 + }, + { + "epoch": 0.39, + "learning_rate": 0.00015209522677153364, + "loss": 3.4834, + "step": 6850 + }, + { + "epoch": 0.39, + "learning_rate": 0.00015200972256661075, + "loss": 3.5855, + "step": 6855 + }, + { + "epoch": 0.39, + "learning_rate": 0.00015192416620622145, + "loss": 3.4574, + "step": 6860 + }, + { + "epoch": 0.39, + "learning_rate": 0.00015183855777616188, + "loss": 3.528, + "step": 6865 + }, + { + "epoch": 0.39, + "learning_rate": 0.0001517528973622803, + "loss": 3.5396, + "step": 6870 + }, + { + "epoch": 0.39, + "learning_rate": 0.00015166718505047722, + "loss": 3.5505, + "step": 6875 + }, + { + "epoch": 0.39, + "learning_rate": 0.0001515814209267051, + "loss": 3.5786, + "step": 6880 + }, + { + "epoch": 0.4, + "learning_rate": 0.00015149560507696837, + "loss": 3.6595, + "step": 6885 + }, + { + "epoch": 0.4, + "learning_rate": 0.00015140973758732347, + "loss": 3.5752, + "step": 6890 + }, + { + "epoch": 0.4, + "learning_rate": 0.0001513238185438784, + "loss": 3.4613, + "step": 6895 + }, + { + "epoch": 0.4, + "learning_rate": 0.00015123784803279302, + "loss": 3.475, + "step": 6900 + }, + { + "epoch": 0.4, + "learning_rate": 0.00015115182614027872, + "loss": 3.4469, + "step": 6905 + }, + { + "epoch": 0.4, + "learning_rate": 0.00015106575295259847, + "loss": 3.489, + "step": 6910 + }, + { + "epoch": 0.4, + "learning_rate": 0.00015097962855606663, + "loss": 3.4058, + "step": 6915 + }, + { + "epoch": 0.4, + "learning_rate": 0.00015089345303704902, + "loss": 3.5599, + "step": 6920 + }, + { + "epoch": 0.4, + "learning_rate": 0.00015080722648196253, + "loss": 3.4849, + "step": 6925 + }, + { + "epoch": 0.4, + "learning_rate": 0.0001507209489772754, + "loss": 3.4747, + "step": 6930 + }, + { + "epoch": 0.4, + "learning_rate": 0.0001506346206095069, + "loss": 3.5436, + "step": 6935 + }, + { + "epoch": 0.4, + "learning_rate": 0.0001505482414652273, + "loss": 3.5455, + "step": 6940 + }, + { + "epoch": 0.4, + "learning_rate": 0.00015046181163105786, + "loss": 3.5934, + "step": 6945 + }, + { + "epoch": 0.4, + "learning_rate": 0.00015037533119367053, + "loss": 3.5427, + "step": 6950 + }, + { + "epoch": 0.4, + "learning_rate": 0.0001502888002397881, + "loss": 3.6125, + "step": 6955 + }, + { + "epoch": 0.4, + "learning_rate": 0.00015020221885618407, + "loss": 3.5157, + "step": 6960 + }, + { + "epoch": 0.4, + "learning_rate": 0.00015011558712968234, + "loss": 3.4037, + "step": 6965 + }, + { + "epoch": 0.4, + "learning_rate": 0.0001500289051471575, + "loss": 3.3257, + "step": 6970 + }, + { + "epoch": 0.4, + "learning_rate": 0.0001499421729955344, + "loss": 3.3597, + "step": 6975 + }, + { + "epoch": 0.4, + "learning_rate": 0.0001498553907617882, + "loss": 3.3435, + "step": 6980 + }, + { + "epoch": 0.4, + "learning_rate": 0.00014976855853294436, + "loss": 3.4584, + "step": 6985 + }, + { + "epoch": 0.4, + "learning_rate": 0.00014968167639607845, + "loss": 3.423, + "step": 6990 + }, + { + "epoch": 0.4, + "learning_rate": 0.00014959474443831597, + "loss": 3.5043, + "step": 6995 + }, + { + "epoch": 0.4, + "learning_rate": 0.00014950776274683266, + "loss": 3.3893, + "step": 7000 + }, + { + "epoch": 0.4, + "learning_rate": 0.00014942073140885377, + "loss": 3.3906, + "step": 7005 + }, + { + "epoch": 0.4, + "learning_rate": 0.0001493336505116546, + "loss": 3.5513, + "step": 7010 + }, + { + "epoch": 0.4, + "learning_rate": 0.00014924652014256014, + "loss": 3.441, + "step": 7015 + }, + { + "epoch": 0.4, + "learning_rate": 0.0001491593403889448, + "loss": 3.3788, + "step": 7020 + }, + { + "epoch": 0.4, + "learning_rate": 0.00014907211133823273, + "loss": 3.3586, + "step": 7025 + }, + { + "epoch": 0.4, + "learning_rate": 0.0001489848330778973, + "loss": 3.3128, + "step": 7030 + }, + { + "epoch": 0.4, + "learning_rate": 0.0001488975056954615, + "loss": 3.3312, + "step": 7035 + }, + { + "epoch": 0.4, + "learning_rate": 0.00014881012927849728, + "loss": 3.4789, + "step": 7040 + }, + { + "epoch": 0.4, + "learning_rate": 0.000148722703914626, + "loss": 3.4802, + "step": 7045 + }, + { + "epoch": 0.4, + "learning_rate": 0.00014863522969151796, + "loss": 3.459, + "step": 7050 + }, + { + "epoch": 0.4, + "learning_rate": 0.00014854770669689253, + "loss": 3.426, + "step": 7055 + }, + { + "epoch": 0.41, + "learning_rate": 0.00014846013501851796, + "loss": 3.3598, + "step": 7060 + }, + { + "epoch": 0.41, + "learning_rate": 0.00014837251474421133, + "loss": 3.4684, + "step": 7065 + }, + { + "epoch": 0.41, + "learning_rate": 0.00014828484596183844, + "loss": 3.2775, + "step": 7070 + }, + { + "epoch": 0.41, + "learning_rate": 0.0001481971287593138, + "loss": 3.269, + "step": 7075 + }, + { + "epoch": 0.41, + "learning_rate": 0.0001481093632246003, + "loss": 3.3682, + "step": 7080 + }, + { + "epoch": 0.41, + "learning_rate": 0.00014802154944570952, + "loss": 3.359, + "step": 7085 + }, + { + "epoch": 0.41, + "learning_rate": 0.00014793368751070125, + "loss": 3.4297, + "step": 7090 + }, + { + "epoch": 0.41, + "learning_rate": 0.00014784577750768363, + "loss": 3.3985, + "step": 7095 + }, + { + "epoch": 0.41, + "learning_rate": 0.0001477578195248131, + "loss": 3.4944, + "step": 7100 + }, + { + "epoch": 0.41, + "learning_rate": 0.000147669813650294, + "loss": 3.4472, + "step": 7105 + }, + { + "epoch": 0.41, + "learning_rate": 0.0001475817599723789, + "loss": 3.5856, + "step": 7110 + }, + { + "epoch": 0.41, + "learning_rate": 0.00014749365857936824, + "loss": 3.6229, + "step": 7115 + }, + { + "epoch": 0.41, + "learning_rate": 0.00014740550955961022, + "loss": 3.5199, + "step": 7120 + }, + { + "epoch": 0.41, + "learning_rate": 0.0001473173130015009, + "loss": 3.5306, + "step": 7125 + }, + { + "epoch": 0.41, + "learning_rate": 0.00014722906899348402, + "loss": 3.5089, + "step": 7130 + }, + { + "epoch": 0.41, + "learning_rate": 0.00014714077762405085, + "loss": 3.3489, + "step": 7135 + }, + { + "epoch": 0.41, + "learning_rate": 0.00014705243898174017, + "loss": 3.5303, + "step": 7140 + }, + { + "epoch": 0.41, + "learning_rate": 0.00014696405315513814, + "loss": 3.4564, + "step": 7145 + }, + { + "epoch": 0.41, + "learning_rate": 0.00014687562023287833, + "loss": 3.577, + "step": 7150 + }, + { + "epoch": 0.41, + "learning_rate": 0.00014678714030364143, + "loss": 3.5409, + "step": 7155 + }, + { + "epoch": 0.41, + "learning_rate": 0.00014669861345615532, + "loss": 3.5197, + "step": 7160 + }, + { + "epoch": 0.41, + "learning_rate": 0.00014661003977919492, + "loss": 3.7331, + "step": 7165 + }, + { + "epoch": 0.41, + "learning_rate": 0.0001465214193615821, + "loss": 3.5235, + "step": 7170 + }, + { + "epoch": 0.41, + "learning_rate": 0.00014643275229218563, + "loss": 3.5341, + "step": 7175 + }, + { + "epoch": 0.41, + "learning_rate": 0.00014634403865992107, + "loss": 3.4939, + "step": 7180 + }, + { + "epoch": 0.41, + "learning_rate": 0.0001462552785537506, + "loss": 3.5044, + "step": 7185 + }, + { + "epoch": 0.41, + "learning_rate": 0.00014616647206268306, + "loss": 3.5047, + "step": 7190 + }, + { + "epoch": 0.41, + "learning_rate": 0.0001460776192757738, + "loss": 3.5562, + "step": 7195 + }, + { + "epoch": 0.41, + "learning_rate": 0.00014598872028212463, + "loss": 3.4772, + "step": 7200 + }, + { + "epoch": 0.41, + "learning_rate": 0.00014589977517088365, + "loss": 3.4511, + "step": 7205 + }, + { + "epoch": 0.41, + "learning_rate": 0.0001458107840312452, + "loss": 3.4308, + "step": 7210 + }, + { + "epoch": 0.41, + "learning_rate": 0.00014572174695244976, + "loss": 3.4673, + "step": 7215 + }, + { + "epoch": 0.41, + "learning_rate": 0.000145632664023784, + "loss": 3.3752, + "step": 7220 + }, + { + "epoch": 0.41, + "learning_rate": 0.00014554353533458042, + "loss": 3.3931, + "step": 7225 + }, + { + "epoch": 0.41, + "learning_rate": 0.00014545436097421744, + "loss": 3.3601, + "step": 7230 + }, + { + "epoch": 0.42, + "learning_rate": 0.0001453651410321194, + "loss": 3.3539, + "step": 7235 + }, + { + "epoch": 0.42, + "learning_rate": 0.00014527587559775616, + "loss": 3.3549, + "step": 7240 + }, + { + "epoch": 0.42, + "learning_rate": 0.0001451865647606434, + "loss": 3.4563, + "step": 7245 + }, + { + "epoch": 0.42, + "learning_rate": 0.00014509720861034212, + "loss": 3.421, + "step": 7250 + }, + { + "epoch": 0.42, + "learning_rate": 0.00014500780723645897, + "loss": 3.4295, + "step": 7255 + }, + { + "epoch": 0.42, + "learning_rate": 0.00014491836072864578, + "loss": 3.4677, + "step": 7260 + }, + { + "epoch": 0.42, + "learning_rate": 0.0001448288691765997, + "loss": 3.4615, + "step": 7265 + }, + { + "epoch": 0.42, + "learning_rate": 0.0001447393326700631, + "loss": 3.4632, + "step": 7270 + }, + { + "epoch": 0.42, + "learning_rate": 0.0001446497512988234, + "loss": 3.4683, + "step": 7275 + }, + { + "epoch": 0.42, + "learning_rate": 0.00014456012515271294, + "loss": 3.4657, + "step": 7280 + }, + { + "epoch": 0.42, + "learning_rate": 0.0001444704543216091, + "loss": 3.4245, + "step": 7285 + }, + { + "epoch": 0.42, + "learning_rate": 0.0001443807388954339, + "loss": 3.4695, + "step": 7290 + }, + { + "epoch": 0.42, + "learning_rate": 0.00014429097896415425, + "loss": 3.52, + "step": 7295 + }, + { + "epoch": 0.42, + "learning_rate": 0.00014420117461778155, + "loss": 3.4714, + "step": 7300 + }, + { + "epoch": 0.42, + "learning_rate": 0.00014411132594637185, + "loss": 3.5128, + "step": 7305 + }, + { + "epoch": 0.42, + "learning_rate": 0.0001440214330400256, + "loss": 3.5545, + "step": 7310 + }, + { + "epoch": 0.42, + "learning_rate": 0.00014393149598888752, + "loss": 3.5879, + "step": 7315 + }, + { + "epoch": 0.42, + "learning_rate": 0.0001438415148831468, + "loss": 3.535, + "step": 7320 + }, + { + "epoch": 0.42, + "learning_rate": 0.00014375148981303663, + "loss": 3.4474, + "step": 7325 + }, + { + "epoch": 0.42, + "learning_rate": 0.00014366142086883436, + "loss": 3.4278, + "step": 7330 + }, + { + "epoch": 0.42, + "learning_rate": 0.00014357130814086135, + "loss": 3.4087, + "step": 7335 + }, + { + "epoch": 0.42, + "learning_rate": 0.00014348115171948283, + "loss": 3.3881, + "step": 7340 + }, + { + "epoch": 0.42, + "learning_rate": 0.00014339095169510786, + "loss": 3.4754, + "step": 7345 + }, + { + "epoch": 0.42, + "learning_rate": 0.00014330070815818922, + "loss": 3.4787, + "step": 7350 + }, + { + "epoch": 0.42, + "learning_rate": 0.00014321042119922337, + "loss": 3.4813, + "step": 7355 + }, + { + "epoch": 0.42, + "learning_rate": 0.00014312009090875025, + "loss": 3.4928, + "step": 7360 + }, + { + "epoch": 0.42, + "learning_rate": 0.00014302971737735324, + "loss": 3.5872, + "step": 7365 + }, + { + "epoch": 0.42, + "learning_rate": 0.0001429393006956592, + "loss": 3.6154, + "step": 7370 + }, + { + "epoch": 0.42, + "learning_rate": 0.0001428488409543381, + "loss": 3.4776, + "step": 7375 + }, + { + "epoch": 0.42, + "learning_rate": 0.0001427583382441032, + "loss": 3.4298, + "step": 7380 + }, + { + "epoch": 0.42, + "learning_rate": 0.00014266779265571087, + "loss": 3.4607, + "step": 7385 + }, + { + "epoch": 0.42, + "learning_rate": 0.00014257720427996037, + "loss": 3.4419, + "step": 7390 + }, + { + "epoch": 0.42, + "learning_rate": 0.00014248657320769392, + "loss": 3.4743, + "step": 7395 + }, + { + "epoch": 0.42, + "learning_rate": 0.00014239589952979662, + "loss": 3.4788, + "step": 7400 + }, + { + "epoch": 0.42, + "learning_rate": 0.00014230518333719616, + "loss": 3.4183, + "step": 7405 + }, + { + "epoch": 0.43, + "learning_rate": 0.00014221442472086304, + "loss": 3.3945, + "step": 7410 + }, + { + "epoch": 0.43, + "learning_rate": 0.0001421236237718101, + "loss": 3.5529, + "step": 7415 + }, + { + "epoch": 0.43, + "learning_rate": 0.00014203278058109282, + "loss": 3.4581, + "step": 7420 + }, + { + "epoch": 0.43, + "learning_rate": 0.0001419418952398089, + "loss": 3.5005, + "step": 7425 + }, + { + "epoch": 0.43, + "learning_rate": 0.00014185096783909837, + "loss": 3.4987, + "step": 7430 + }, + { + "epoch": 0.43, + "learning_rate": 0.00014175999847014346, + "loss": 3.4392, + "step": 7435 + }, + { + "epoch": 0.43, + "learning_rate": 0.00014166898722416845, + "loss": 3.4617, + "step": 7440 + }, + { + "epoch": 0.43, + "learning_rate": 0.00014157793419243962, + "loss": 3.4424, + "step": 7445 + }, + { + "epoch": 0.43, + "learning_rate": 0.00014148683946626516, + "loss": 3.3479, + "step": 7450 + }, + { + "epoch": 0.43, + "learning_rate": 0.00014139570313699502, + "loss": 3.4125, + "step": 7455 + }, + { + "epoch": 0.43, + "learning_rate": 0.00014130452529602096, + "loss": 3.3344, + "step": 7460 + }, + { + "epoch": 0.43, + "learning_rate": 0.00014121330603477633, + "loss": 3.4292, + "step": 7465 + }, + { + "epoch": 0.43, + "learning_rate": 0.00014112204544473598, + "loss": 3.4597, + "step": 7470 + }, + { + "epoch": 0.43, + "learning_rate": 0.00014103074361741623, + "loss": 3.4574, + "step": 7475 + }, + { + "epoch": 0.43, + "learning_rate": 0.00014093940064437477, + "loss": 3.4205, + "step": 7480 + }, + { + "epoch": 0.43, + "learning_rate": 0.0001408480166172106, + "loss": 3.4575, + "step": 7485 + }, + { + "epoch": 0.43, + "learning_rate": 0.00014075659162756372, + "loss": 3.4016, + "step": 7490 + }, + { + "epoch": 0.43, + "learning_rate": 0.00014066512576711536, + "loss": 3.4171, + "step": 7495 + }, + { + "epoch": 0.43, + "learning_rate": 0.0001405736191275877, + "loss": 3.4309, + "step": 7500 + }, + { + "epoch": 0.43, + "learning_rate": 0.00014048207180074383, + "loss": 3.4368, + "step": 7505 + }, + { + "epoch": 0.43, + "learning_rate": 0.00014039048387838756, + "loss": 3.5356, + "step": 7510 + }, + { + "epoch": 0.43, + "learning_rate": 0.00014029885545236348, + "loss": 3.4471, + "step": 7515 + }, + { + "epoch": 0.43, + "learning_rate": 0.00014020718661455678, + "loss": 3.3656, + "step": 7520 + }, + { + "epoch": 0.43, + "learning_rate": 0.0001401154774568932, + "loss": 3.4545, + "step": 7525 + }, + { + "epoch": 0.43, + "learning_rate": 0.00014002372807133887, + "loss": 3.4414, + "step": 7530 + }, + { + "epoch": 0.43, + "learning_rate": 0.00013993193854990027, + "loss": 3.4883, + "step": 7535 + }, + { + "epoch": 0.43, + "learning_rate": 0.00013984010898462416, + "loss": 3.5103, + "step": 7540 + }, + { + "epoch": 0.43, + "learning_rate": 0.00013974823946759742, + "loss": 3.497, + "step": 7545 + }, + { + "epoch": 0.43, + "learning_rate": 0.000139656330090947, + "loss": 3.3844, + "step": 7550 + }, + { + "epoch": 0.43, + "learning_rate": 0.00013956438094683986, + "loss": 3.4489, + "step": 7555 + }, + { + "epoch": 0.43, + "learning_rate": 0.00013947239212748277, + "loss": 3.4537, + "step": 7560 + }, + { + "epoch": 0.43, + "learning_rate": 0.00013938036372512235, + "loss": 3.5462, + "step": 7565 + }, + { + "epoch": 0.43, + "learning_rate": 0.0001392882958320449, + "loss": 3.4655, + "step": 7570 + }, + { + "epoch": 0.43, + "learning_rate": 0.00013919618854057626, + "loss": 3.4888, + "step": 7575 + }, + { + "epoch": 0.43, + "learning_rate": 0.00013910404194308188, + "loss": 3.4834, + "step": 7580 + }, + { + "epoch": 0.44, + "learning_rate": 0.00013901185613196654, + "loss": 3.5, + "step": 7585 + }, + { + "epoch": 0.44, + "learning_rate": 0.00013891963119967439, + "loss": 3.5461, + "step": 7590 + }, + { + "epoch": 0.44, + "learning_rate": 0.00013882736723868884, + "loss": 3.4856, + "step": 7595 + }, + { + "epoch": 0.44, + "learning_rate": 0.00013873506434153228, + "loss": 3.5023, + "step": 7600 + }, + { + "epoch": 0.44, + "learning_rate": 0.0001386427226007664, + "loss": 3.5596, + "step": 7605 + }, + { + "epoch": 0.44, + "learning_rate": 0.00013855034210899161, + "loss": 3.6012, + "step": 7610 + }, + { + "epoch": 0.44, + "learning_rate": 0.00013845792295884735, + "loss": 3.5641, + "step": 7615 + }, + { + "epoch": 0.44, + "learning_rate": 0.0001383654652430117, + "loss": 3.5753, + "step": 7620 + }, + { + "epoch": 0.44, + "learning_rate": 0.00013827296905420143, + "loss": 3.4504, + "step": 7625 + }, + { + "epoch": 0.44, + "learning_rate": 0.00013818043448517202, + "loss": 3.4865, + "step": 7630 + }, + { + "epoch": 0.44, + "learning_rate": 0.00013808786162871728, + "loss": 3.4065, + "step": 7635 + }, + { + "epoch": 0.44, + "learning_rate": 0.00013799525057766948, + "loss": 3.4177, + "step": 7640 + }, + { + "epoch": 0.44, + "learning_rate": 0.00013790260142489922, + "loss": 3.3931, + "step": 7645 + }, + { + "epoch": 0.44, + "learning_rate": 0.00013780991426331522, + "loss": 3.417, + "step": 7650 + }, + { + "epoch": 0.44, + "learning_rate": 0.00013771718918586444, + "loss": 3.3492, + "step": 7655 + }, + { + "epoch": 0.44, + "learning_rate": 0.00013762442628553179, + "loss": 3.3848, + "step": 7660 + }, + { + "epoch": 0.44, + "learning_rate": 0.00013753162565534004, + "loss": 3.4446, + "step": 7665 + }, + { + "epoch": 0.44, + "learning_rate": 0.00013743878738834998, + "loss": 3.3916, + "step": 7670 + }, + { + "epoch": 0.44, + "learning_rate": 0.00013734591157765994, + "loss": 3.3724, + "step": 7675 + }, + { + "epoch": 0.44, + "learning_rate": 0.000137252998316406, + "loss": 3.3976, + "step": 7680 + }, + { + "epoch": 0.44, + "learning_rate": 0.00013716004769776189, + "loss": 3.4344, + "step": 7685 + }, + { + "epoch": 0.44, + "learning_rate": 0.00013706705981493853, + "loss": 3.3135, + "step": 7690 + }, + { + "epoch": 0.44, + "learning_rate": 0.00013697403476118454, + "loss": 3.433, + "step": 7695 + }, + { + "epoch": 0.44, + "learning_rate": 0.00013688097262978555, + "loss": 3.5037, + "step": 7700 + }, + { + "epoch": 0.44, + "learning_rate": 0.0001367878735140645, + "loss": 3.5432, + "step": 7705 + }, + { + "epoch": 0.44, + "learning_rate": 0.00013669473750738142, + "loss": 3.4425, + "step": 7710 + }, + { + "epoch": 0.44, + "learning_rate": 0.00013660156470313327, + "loss": 3.4169, + "step": 7715 + }, + { + "epoch": 0.44, + "learning_rate": 0.00013650835519475395, + "loss": 3.4031, + "step": 7720 + }, + { + "epoch": 0.44, + "learning_rate": 0.0001364151090757142, + "loss": 3.396, + "step": 7725 + }, + { + "epoch": 0.44, + "learning_rate": 0.0001363218264395214, + "loss": 3.3657, + "step": 7730 + }, + { + "epoch": 0.44, + "learning_rate": 0.00013622850737971963, + "loss": 3.3624, + "step": 7735 + }, + { + "epoch": 0.44, + "learning_rate": 0.00013613515198988938, + "loss": 3.3581, + "step": 7740 + }, + { + "epoch": 0.44, + "learning_rate": 0.0001360417603636477, + "loss": 3.4531, + "step": 7745 + }, + { + "epoch": 0.44, + "learning_rate": 0.0001359483325946479, + "loss": 3.408, + "step": 7750 + }, + { + "epoch": 0.44, + "learning_rate": 0.00013585486877657957, + "loss": 3.6096, + "step": 7755 + }, + { + "epoch": 0.45, + "learning_rate": 0.00013576136900316844, + "loss": 3.4814, + "step": 7760 + }, + { + "epoch": 0.45, + "learning_rate": 0.00013566783336817627, + "loss": 3.5047, + "step": 7765 + }, + { + "epoch": 0.45, + "learning_rate": 0.00013557426196540083, + "loss": 3.5586, + "step": 7770 + }, + { + "epoch": 0.45, + "learning_rate": 0.00013548065488867573, + "loss": 3.6723, + "step": 7775 + }, + { + "epoch": 0.45, + "learning_rate": 0.00013538701223187033, + "loss": 3.5402, + "step": 7780 + }, + { + "epoch": 0.45, + "learning_rate": 0.0001352933340888897, + "loss": 3.5497, + "step": 7785 + }, + { + "epoch": 0.45, + "learning_rate": 0.0001351996205536745, + "loss": 3.3999, + "step": 7790 + }, + { + "epoch": 0.45, + "learning_rate": 0.0001351058717202009, + "loss": 3.5724, + "step": 7795 + }, + { + "epoch": 0.45, + "learning_rate": 0.00013501208768248042, + "loss": 3.452, + "step": 7800 + }, + { + "epoch": 0.45, + "learning_rate": 0.0001349182685345599, + "loss": 3.4706, + "step": 7805 + }, + { + "epoch": 0.45, + "learning_rate": 0.00013482441437052134, + "loss": 3.4409, + "step": 7810 + }, + { + "epoch": 0.45, + "learning_rate": 0.00013473052528448201, + "loss": 3.4, + "step": 7815 + }, + { + "epoch": 0.45, + "learning_rate": 0.00013463660137059407, + "loss": 3.3028, + "step": 7820 + }, + { + "epoch": 0.45, + "learning_rate": 0.0001345426427230446, + "loss": 3.4394, + "step": 7825 + }, + { + "epoch": 0.45, + "learning_rate": 0.0001344486494360555, + "loss": 3.4557, + "step": 7830 + }, + { + "epoch": 0.45, + "learning_rate": 0.00013435462160388351, + "loss": 3.451, + "step": 7835 + }, + { + "epoch": 0.45, + "learning_rate": 0.00013426055932081997, + "loss": 3.4238, + "step": 7840 + }, + { + "epoch": 0.45, + "learning_rate": 0.00013416646268119074, + "loss": 3.4676, + "step": 7845 + }, + { + "epoch": 0.45, + "learning_rate": 0.00013407233177935608, + "loss": 3.4631, + "step": 7850 + }, + { + "epoch": 0.45, + "learning_rate": 0.00013397816670971072, + "loss": 3.4243, + "step": 7855 + }, + { + "epoch": 0.45, + "learning_rate": 0.00013388396756668354, + "loss": 3.4205, + "step": 7860 + }, + { + "epoch": 0.45, + "learning_rate": 0.00013378973444473776, + "loss": 3.3703, + "step": 7865 + }, + { + "epoch": 0.45, + "learning_rate": 0.0001336954674383705, + "loss": 3.3447, + "step": 7870 + }, + { + "epoch": 0.45, + "learning_rate": 0.00013360116664211293, + "loss": 3.3976, + "step": 7875 + }, + { + "epoch": 0.45, + "learning_rate": 0.00013350683215053013, + "loss": 3.4977, + "step": 7880 + }, + { + "epoch": 0.45, + "learning_rate": 0.00013341246405822088, + "loss": 3.4343, + "step": 7885 + }, + { + "epoch": 0.45, + "learning_rate": 0.00013331806245981775, + "loss": 3.4122, + "step": 7890 + }, + { + "epoch": 0.45, + "learning_rate": 0.0001332236274499869, + "loss": 3.4713, + "step": 7895 + }, + { + "epoch": 0.45, + "learning_rate": 0.00013312915912342793, + "loss": 3.5454, + "step": 7900 + }, + { + "epoch": 0.45, + "learning_rate": 0.0001330346575748739, + "loss": 3.4461, + "step": 7905 + }, + { + "epoch": 0.45, + "learning_rate": 0.00013294012289909114, + "loss": 3.5587, + "step": 7910 + }, + { + "epoch": 0.45, + "learning_rate": 0.00013284555519087933, + "loss": 3.4918, + "step": 7915 + }, + { + "epoch": 0.45, + "learning_rate": 0.0001327509545450711, + "loss": 3.49, + "step": 7920 + }, + { + "epoch": 0.45, + "learning_rate": 0.0001326563210565322, + "loss": 3.4059, + "step": 7925 + }, + { + "epoch": 0.45, + "learning_rate": 0.00013256165482016137, + "loss": 3.515, + "step": 7930 + }, + { + "epoch": 0.46, + "learning_rate": 0.00013246695593089, + "loss": 3.4244, + "step": 7935 + }, + { + "epoch": 0.46, + "learning_rate": 0.00013237222448368247, + "loss": 3.544, + "step": 7940 + }, + { + "epoch": 0.46, + "learning_rate": 0.00013227746057353562, + "loss": 3.5153, + "step": 7945 + }, + { + "epoch": 0.46, + "learning_rate": 0.0001321826642954789, + "loss": 3.6766, + "step": 7950 + }, + { + "epoch": 0.46, + "learning_rate": 0.00013208783574457432, + "loss": 3.5054, + "step": 7955 + }, + { + "epoch": 0.46, + "learning_rate": 0.00013199297501591603, + "loss": 3.5346, + "step": 7960 + }, + { + "epoch": 0.46, + "learning_rate": 0.00013189808220463072, + "loss": 3.5431, + "step": 7965 + }, + { + "epoch": 0.46, + "learning_rate": 0.00013180315740587701, + "loss": 3.5907, + "step": 7970 + }, + { + "epoch": 0.46, + "learning_rate": 0.00013170820071484572, + "loss": 3.6089, + "step": 7975 + }, + { + "epoch": 0.46, + "learning_rate": 0.0001316132122267597, + "loss": 3.635, + "step": 7980 + }, + { + "epoch": 0.46, + "learning_rate": 0.00013151819203687356, + "loss": 3.5179, + "step": 7985 + }, + { + "epoch": 0.46, + "learning_rate": 0.00013142314024047375, + "loss": 3.4676, + "step": 7990 + }, + { + "epoch": 0.46, + "learning_rate": 0.00013132805693287844, + "loss": 3.4573, + "step": 7995 + }, + { + "epoch": 0.46, + "learning_rate": 0.0001312329422094374, + "loss": 3.5012, + "step": 8000 + }, + { + "epoch": 0.46, + "learning_rate": 0.0001311377961655319, + "loss": 3.5817, + "step": 8005 + }, + { + "epoch": 0.46, + "learning_rate": 0.00013104261889657453, + "loss": 3.4574, + "step": 8010 + }, + { + "epoch": 0.46, + "learning_rate": 0.00013094741049800936, + "loss": 3.4982, + "step": 8015 + }, + { + "epoch": 0.46, + "learning_rate": 0.00013085217106531153, + "loss": 3.4739, + "step": 8020 + }, + { + "epoch": 0.46, + "learning_rate": 0.00013075690069398738, + "loss": 3.4639, + "step": 8025 + }, + { + "epoch": 0.46, + "learning_rate": 0.00013066159947957426, + "loss": 3.4945, + "step": 8030 + }, + { + "epoch": 0.46, + "learning_rate": 0.0001305662675176404, + "loss": 3.4611, + "step": 8035 + }, + { + "epoch": 0.46, + "learning_rate": 0.00013047090490378495, + "loss": 3.4568, + "step": 8040 + }, + { + "epoch": 0.46, + "learning_rate": 0.00013037551173363774, + "loss": 3.4884, + "step": 8045 + }, + { + "epoch": 0.46, + "learning_rate": 0.00013028008810285924, + "loss": 3.5121, + "step": 8050 + }, + { + "epoch": 0.46, + "learning_rate": 0.00013018463410714048, + "loss": 3.4505, + "step": 8055 + }, + { + "epoch": 0.46, + "learning_rate": 0.00013008914984220294, + "loss": 3.4589, + "step": 8060 + }, + { + "epoch": 0.46, + "learning_rate": 0.00012999363540379852, + "loss": 3.4695, + "step": 8065 + }, + { + "epoch": 0.46, + "learning_rate": 0.00012989809088770923, + "loss": 3.5258, + "step": 8070 + }, + { + "epoch": 0.46, + "learning_rate": 0.00012980251638974733, + "loss": 3.5527, + "step": 8075 + }, + { + "epoch": 0.46, + "learning_rate": 0.0001297069120057552, + "loss": 3.4829, + "step": 8080 + }, + { + "epoch": 0.46, + "learning_rate": 0.0001296112778316051, + "loss": 3.4205, + "step": 8085 + }, + { + "epoch": 0.46, + "learning_rate": 0.00012951561396319918, + "loss": 3.4917, + "step": 8090 + }, + { + "epoch": 0.46, + "learning_rate": 0.00012941992049646936, + "loss": 3.435, + "step": 8095 + }, + { + "epoch": 0.46, + "learning_rate": 0.00012932419752737735, + "loss": 3.4664, + "step": 8100 + }, + { + "epoch": 0.47, + "learning_rate": 0.00012922844515191425, + "loss": 3.4601, + "step": 8105 + }, + { + "epoch": 0.47, + "learning_rate": 0.00012913266346610086, + "loss": 3.3784, + "step": 8110 + }, + { + "epoch": 0.47, + "learning_rate": 0.0001290368525659872, + "loss": 3.4292, + "step": 8115 + }, + { + "epoch": 0.47, + "learning_rate": 0.00012894101254765268, + "loss": 3.4623, + "step": 8120 + }, + { + "epoch": 0.47, + "learning_rate": 0.00012884514350720586, + "loss": 3.4684, + "step": 8125 + }, + { + "epoch": 0.47, + "learning_rate": 0.00012874924554078448, + "loss": 3.4219, + "step": 8130 + }, + { + "epoch": 0.47, + "learning_rate": 0.00012865331874455517, + "loss": 3.4366, + "step": 8135 + }, + { + "epoch": 0.47, + "learning_rate": 0.0001285573632147136, + "loss": 3.564, + "step": 8140 + }, + { + "epoch": 0.47, + "learning_rate": 0.00012846137904748414, + "loss": 3.5688, + "step": 8145 + }, + { + "epoch": 0.47, + "learning_rate": 0.00012836536633911995, + "loss": 3.524, + "step": 8150 + }, + { + "epoch": 0.47, + "learning_rate": 0.0001282693251859028, + "loss": 3.4872, + "step": 8155 + }, + { + "epoch": 0.47, + "learning_rate": 0.00012817325568414297, + "loss": 3.4709, + "step": 8160 + }, + { + "epoch": 0.47, + "learning_rate": 0.00012807715793017918, + "loss": 3.4763, + "step": 8165 + }, + { + "epoch": 0.47, + "learning_rate": 0.00012798103202037842, + "loss": 3.5505, + "step": 8170 + }, + { + "epoch": 0.47, + "learning_rate": 0.00012788487805113602, + "loss": 3.4407, + "step": 8175 + }, + { + "epoch": 0.47, + "learning_rate": 0.0001277886961188754, + "loss": 3.426, + "step": 8180 + }, + { + "epoch": 0.47, + "learning_rate": 0.00012769248632004795, + "loss": 3.4404, + "step": 8185 + }, + { + "epoch": 0.47, + "learning_rate": 0.0001275962487511332, + "loss": 3.4364, + "step": 8190 + }, + { + "epoch": 0.47, + "learning_rate": 0.00012749998350863827, + "loss": 3.4853, + "step": 8195 + }, + { + "epoch": 0.47, + "learning_rate": 0.0001274036906890982, + "loss": 3.4901, + "step": 8200 + }, + { + "epoch": 0.47, + "learning_rate": 0.00012730737038907567, + "loss": 3.4876, + "step": 8205 + }, + { + "epoch": 0.47, + "learning_rate": 0.00012721102270516087, + "loss": 3.4468, + "step": 8210 + }, + { + "epoch": 0.47, + "learning_rate": 0.00012711464773397152, + "loss": 3.5149, + "step": 8215 + }, + { + "epoch": 0.47, + "learning_rate": 0.0001270182455721526, + "loss": 3.4695, + "step": 8220 + }, + { + "epoch": 0.47, + "learning_rate": 0.00012692181631637642, + "loss": 3.553, + "step": 8225 + }, + { + "epoch": 0.47, + "learning_rate": 0.00012682536006334248, + "loss": 3.4484, + "step": 8230 + }, + { + "epoch": 0.47, + "learning_rate": 0.00012672887690977732, + "loss": 3.4058, + "step": 8235 + }, + { + "epoch": 0.47, + "learning_rate": 0.00012663236695243448, + "loss": 3.3824, + "step": 8240 + }, + { + "epoch": 0.47, + "learning_rate": 0.0001265358302880943, + "loss": 3.4013, + "step": 8245 + }, + { + "epoch": 0.47, + "learning_rate": 0.00012643926701356404, + "loss": 3.3883, + "step": 8250 + }, + { + "epoch": 0.47, + "learning_rate": 0.00012634267722567752, + "loss": 3.4381, + "step": 8255 + }, + { + "epoch": 0.47, + "learning_rate": 0.00012624606102129516, + "loss": 3.4436, + "step": 8260 + }, + { + "epoch": 0.47, + "learning_rate": 0.00012614941849730405, + "loss": 3.367, + "step": 8265 + }, + { + "epoch": 0.47, + "learning_rate": 0.00012605274975061736, + "loss": 3.4176, + "step": 8270 + }, + { + "epoch": 0.47, + "learning_rate": 0.00012595605487817482, + "loss": 3.4536, + "step": 8275 + }, + { + "epoch": 0.48, + "learning_rate": 0.00012585933397694224, + "loss": 3.4219, + "step": 8280 + }, + { + "epoch": 0.48, + "learning_rate": 0.00012576258714391155, + "loss": 3.3925, + "step": 8285 + }, + { + "epoch": 0.48, + "learning_rate": 0.00012566581447610072, + "loss": 3.3326, + "step": 8290 + }, + { + "epoch": 0.48, + "learning_rate": 0.0001255690160705536, + "loss": 3.3995, + "step": 8295 + }, + { + "epoch": 0.48, + "learning_rate": 0.0001254721920243398, + "loss": 3.3727, + "step": 8300 + }, + { + "epoch": 0.48, + "learning_rate": 0.00012537534243455472, + "loss": 3.3997, + "step": 8305 + }, + { + "epoch": 0.48, + "learning_rate": 0.00012527846739831934, + "loss": 3.3836, + "step": 8310 + }, + { + "epoch": 0.48, + "learning_rate": 0.00012518156701278019, + "loss": 3.3737, + "step": 8315 + }, + { + "epoch": 0.48, + "learning_rate": 0.0001250846413751092, + "loss": 3.4188, + "step": 8320 + }, + { + "epoch": 0.48, + "learning_rate": 0.00012498769058250355, + "loss": 3.4227, + "step": 8325 + }, + { + "epoch": 0.48, + "learning_rate": 0.00012489071473218574, + "loss": 3.4729, + "step": 8330 + }, + { + "epoch": 0.48, + "learning_rate": 0.0001247937139214034, + "loss": 3.4225, + "step": 8335 + }, + { + "epoch": 0.48, + "learning_rate": 0.00012469668824742914, + "loss": 3.5231, + "step": 8340 + }, + { + "epoch": 0.48, + "learning_rate": 0.00012459963780756054, + "loss": 3.4537, + "step": 8345 + }, + { + "epoch": 0.48, + "learning_rate": 0.00012450256269911996, + "loss": 3.4557, + "step": 8350 + }, + { + "epoch": 0.48, + "learning_rate": 0.0001244054630194546, + "loss": 3.4949, + "step": 8355 + }, + { + "epoch": 0.48, + "learning_rate": 0.00012430833886593613, + "loss": 3.3971, + "step": 8360 + }, + { + "epoch": 0.48, + "learning_rate": 0.00012421119033596102, + "loss": 3.4856, + "step": 8365 + }, + { + "epoch": 0.48, + "learning_rate": 0.0001241140175269499, + "loss": 3.5281, + "step": 8370 + }, + { + "epoch": 0.48, + "learning_rate": 0.00012401682053634792, + "loss": 3.5432, + "step": 8375 + }, + { + "epoch": 0.48, + "learning_rate": 0.00012391959946162447, + "loss": 3.5991, + "step": 8380 + }, + { + "epoch": 0.48, + "learning_rate": 0.00012382235440027307, + "loss": 3.5185, + "step": 8385 + }, + { + "epoch": 0.48, + "learning_rate": 0.0001237250854498112, + "loss": 3.4634, + "step": 8390 + }, + { + "epoch": 0.48, + "learning_rate": 0.00012362779270778048, + "loss": 3.445, + "step": 8395 + }, + { + "epoch": 0.48, + "learning_rate": 0.00012353047627174625, + "loss": 3.4523, + "step": 8400 + }, + { + "epoch": 0.48, + "learning_rate": 0.00012343313623929764, + "loss": 3.3927, + "step": 8405 + }, + { + "epoch": 0.48, + "learning_rate": 0.00012333577270804745, + "loss": 3.4183, + "step": 8410 + }, + { + "epoch": 0.48, + "learning_rate": 0.0001232383857756321, + "loss": 3.4643, + "step": 8415 + }, + { + "epoch": 0.48, + "learning_rate": 0.00012314097553971137, + "loss": 3.475, + "step": 8420 + }, + { + "epoch": 0.48, + "learning_rate": 0.00012304354209796846, + "loss": 3.458, + "step": 8425 + }, + { + "epoch": 0.48, + "learning_rate": 0.00012294608554810988, + "loss": 3.4628, + "step": 8430 + }, + { + "epoch": 0.48, + "learning_rate": 0.00012284860598786525, + "loss": 3.4764, + "step": 8435 + }, + { + "epoch": 0.48, + "learning_rate": 0.0001227511035149873, + "loss": 3.5294, + "step": 8440 + }, + { + "epoch": 0.48, + "learning_rate": 0.00012265357822725172, + "loss": 3.6069, + "step": 8445 + }, + { + "epoch": 0.48, + "learning_rate": 0.00012255603022245712, + "loss": 3.4769, + "step": 8450 + }, + { + "epoch": 0.49, + "learning_rate": 0.0001224584595984248, + "loss": 3.5978, + "step": 8455 + }, + { + "epoch": 0.49, + "learning_rate": 0.00012236086645299888, + "loss": 3.4736, + "step": 8460 + }, + { + "epoch": 0.49, + "learning_rate": 0.00012226325088404588, + "loss": 3.5129, + "step": 8465 + }, + { + "epoch": 0.49, + "learning_rate": 0.00012216561298945502, + "loss": 3.5887, + "step": 8470 + }, + { + "epoch": 0.49, + "learning_rate": 0.00012206795286713774, + "loss": 3.5517, + "step": 8475 + }, + { + "epoch": 0.49, + "learning_rate": 0.00012197027061502781, + "loss": 3.4093, + "step": 8480 + }, + { + "epoch": 0.49, + "learning_rate": 0.00012187256633108129, + "loss": 3.4541, + "step": 8485 + }, + { + "epoch": 0.49, + "learning_rate": 0.00012177484011327618, + "loss": 3.5046, + "step": 8490 + }, + { + "epoch": 0.49, + "learning_rate": 0.00012167709205961256, + "loss": 3.4509, + "step": 8495 + }, + { + "epoch": 0.49, + "learning_rate": 0.00012157932226811246, + "loss": 3.4786, + "step": 8500 + }, + { + "epoch": 0.49, + "learning_rate": 0.00012148153083681954, + "loss": 3.443, + "step": 8505 + }, + { + "epoch": 0.49, + "learning_rate": 0.00012138371786379938, + "loss": 3.393, + "step": 8510 + }, + { + "epoch": 0.49, + "learning_rate": 0.00012128588344713899, + "loss": 3.4577, + "step": 8515 + }, + { + "epoch": 0.49, + "learning_rate": 0.0001211880276849469, + "loss": 3.4403, + "step": 8520 + }, + { + "epoch": 0.49, + "learning_rate": 0.00012109015067535321, + "loss": 3.4695, + "step": 8525 + }, + { + "epoch": 0.49, + "learning_rate": 0.00012099225251650907, + "loss": 3.4281, + "step": 8530 + }, + { + "epoch": 0.49, + "learning_rate": 0.00012089433330658705, + "loss": 3.5161, + "step": 8535 + }, + { + "epoch": 0.49, + "learning_rate": 0.00012079639314378075, + "loss": 3.5009, + "step": 8540 + }, + { + "epoch": 0.49, + "learning_rate": 0.00012069843212630474, + "loss": 3.4722, + "step": 8545 + }, + { + "epoch": 0.49, + "learning_rate": 0.00012060045035239465, + "loss": 3.4772, + "step": 8550 + }, + { + "epoch": 0.49, + "learning_rate": 0.00012050244792030667, + "loss": 3.4992, + "step": 8555 + }, + { + "epoch": 0.49, + "learning_rate": 0.00012040442492831798, + "loss": 3.4334, + "step": 8560 + }, + { + "epoch": 0.49, + "learning_rate": 0.00012030638147472623, + "loss": 3.3973, + "step": 8565 + }, + { + "epoch": 0.49, + "learning_rate": 0.00012020831765784957, + "loss": 3.4616, + "step": 8570 + }, + { + "epoch": 0.49, + "learning_rate": 0.00012011023357602668, + "loss": 3.5077, + "step": 8575 + }, + { + "epoch": 0.49, + "learning_rate": 0.00012001212932761645, + "loss": 3.3947, + "step": 8580 + }, + { + "epoch": 0.49, + "learning_rate": 0.00011991400501099805, + "loss": 3.3951, + "step": 8585 + }, + { + "epoch": 0.49, + "learning_rate": 0.00011981586072457078, + "loss": 3.454, + "step": 8590 + }, + { + "epoch": 0.49, + "learning_rate": 0.00011971769656675391, + "loss": 3.5133, + "step": 8595 + }, + { + "epoch": 0.49, + "learning_rate": 0.00011961951263598677, + "loss": 3.4537, + "step": 8600 + }, + { + "epoch": 0.49, + "learning_rate": 0.00011952130903072832, + "loss": 3.3742, + "step": 8605 + }, + { + "epoch": 0.49, + "learning_rate": 0.00011942308584945741, + "loss": 3.5245, + "step": 8610 + }, + { + "epoch": 0.49, + "learning_rate": 0.00011932484319067245, + "loss": 3.5371, + "step": 8615 + }, + { + "epoch": 0.49, + "learning_rate": 0.00011922658115289141, + "loss": 3.4723, + "step": 8620 + }, + { + "epoch": 0.49, + "learning_rate": 0.00011912829983465168, + "loss": 3.4782, + "step": 8625 + }, + { + "epoch": 0.5, + "learning_rate": 0.00011902999933450997, + "loss": 3.5778, + "step": 8630 + }, + { + "epoch": 0.5, + "learning_rate": 0.0001189316797510423, + "loss": 3.567, + "step": 8635 + }, + { + "epoch": 0.5, + "learning_rate": 0.00011883334118284369, + "loss": 3.4798, + "step": 8640 + }, + { + "epoch": 0.5, + "learning_rate": 0.00011873498372852828, + "loss": 3.5305, + "step": 8645 + }, + { + "epoch": 0.5, + "learning_rate": 0.0001186366074867292, + "loss": 3.4151, + "step": 8650 + }, + { + "epoch": 0.5, + "learning_rate": 0.00011853821255609836, + "loss": 3.4176, + "step": 8655 + }, + { + "epoch": 0.5, + "learning_rate": 0.00011843979903530638, + "loss": 3.4367, + "step": 8660 + }, + { + "epoch": 0.5, + "learning_rate": 0.00011834136702304257, + "loss": 3.5757, + "step": 8665 + }, + { + "epoch": 0.5, + "learning_rate": 0.00011824291661801479, + "loss": 3.3508, + "step": 8670 + }, + { + "epoch": 0.5, + "learning_rate": 0.00011814444791894934, + "loss": 3.4016, + "step": 8675 + }, + { + "epoch": 0.5, + "learning_rate": 0.0001180459610245908, + "loss": 3.4411, + "step": 8680 + }, + { + "epoch": 0.5, + "learning_rate": 0.00011794745603370212, + "loss": 3.4093, + "step": 8685 + }, + { + "epoch": 0.5, + "learning_rate": 0.00011784893304506424, + "loss": 3.3866, + "step": 8690 + }, + { + "epoch": 0.5, + "learning_rate": 0.0001177503921574763, + "loss": 3.41, + "step": 8695 + }, + { + "epoch": 0.5, + "learning_rate": 0.00011765183346975528, + "loss": 3.448, + "step": 8700 + }, + { + "epoch": 0.5, + "learning_rate": 0.0001175532570807361, + "loss": 3.4959, + "step": 8705 + }, + { + "epoch": 0.5, + "learning_rate": 0.00011745466308927136, + "loss": 3.5446, + "step": 8710 + }, + { + "epoch": 0.5, + "learning_rate": 0.00011735605159423131, + "loss": 3.4133, + "step": 8715 + }, + { + "epoch": 0.5, + "learning_rate": 0.00011725742269450382, + "loss": 3.3382, + "step": 8720 + }, + { + "epoch": 0.5, + "learning_rate": 0.00011715877648899413, + "loss": 3.4214, + "step": 8725 + }, + { + "epoch": 0.5, + "learning_rate": 0.0001170601130766249, + "loss": 3.3563, + "step": 8730 + }, + { + "epoch": 0.5, + "learning_rate": 0.00011696143255633607, + "loss": 3.4294, + "step": 8735 + }, + { + "epoch": 0.5, + "learning_rate": 0.0001168627350270846, + "loss": 3.4487, + "step": 8740 + }, + { + "epoch": 0.5, + "learning_rate": 0.00011676402058784463, + "loss": 3.3951, + "step": 8745 + }, + { + "epoch": 0.5, + "learning_rate": 0.00011666528933760725, + "loss": 3.3864, + "step": 8750 + }, + { + "epoch": 0.5, + "learning_rate": 0.00011656654137538032, + "loss": 3.4799, + "step": 8755 + }, + { + "epoch": 0.5, + "learning_rate": 0.0001164677768001886, + "loss": 3.3755, + "step": 8760 + }, + { + "epoch": 0.5, + "learning_rate": 0.00011636899571107333, + "loss": 3.4174, + "step": 8765 + }, + { + "epoch": 0.5, + "learning_rate": 0.00011627019820709246, + "loss": 3.4656, + "step": 8770 + }, + { + "epoch": 0.5, + "learning_rate": 0.00011617138438732036, + "loss": 3.5169, + "step": 8775 + }, + { + "epoch": 0.5, + "learning_rate": 0.00011607255435084772, + "loss": 3.4928, + "step": 8780 + }, + { + "epoch": 0.5, + "learning_rate": 0.00011597370819678157, + "loss": 3.4291, + "step": 8785 + }, + { + "epoch": 0.5, + "learning_rate": 0.00011587484602424499, + "loss": 3.346, + "step": 8790 + }, + { + "epoch": 0.5, + "learning_rate": 0.00011577596793237722, + "loss": 3.5374, + "step": 8795 + }, + { + "epoch": 0.5, + "learning_rate": 0.00011567707402033345, + "loss": 3.5087, + "step": 8800 + }, + { + "epoch": 0.51, + "learning_rate": 0.00011557816438728467, + "loss": 3.5382, + "step": 8805 + }, + { + "epoch": 0.51, + "learning_rate": 0.00011547923913241774, + "loss": 3.5291, + "step": 8810 + }, + { + "epoch": 0.51, + "learning_rate": 0.00011538029835493507, + "loss": 3.5019, + "step": 8815 + }, + { + "epoch": 0.51, + "learning_rate": 0.00011528134215405473, + "loss": 3.4719, + "step": 8820 + }, + { + "epoch": 0.51, + "learning_rate": 0.00011518237062901023, + "loss": 3.4307, + "step": 8825 + }, + { + "epoch": 0.51, + "learning_rate": 0.00011508338387905038, + "loss": 3.4941, + "step": 8830 + }, + { + "epoch": 0.51, + "learning_rate": 0.0001149843820034394, + "loss": 3.4934, + "step": 8835 + }, + { + "epoch": 0.51, + "learning_rate": 0.00011488536510145651, + "loss": 3.4862, + "step": 8840 + }, + { + "epoch": 0.51, + "learning_rate": 0.00011478633327239614, + "loss": 3.5151, + "step": 8845 + }, + { + "epoch": 0.51, + "learning_rate": 0.0001146872866155676, + "loss": 3.4967, + "step": 8850 + }, + { + "epoch": 0.51, + "learning_rate": 0.00011458822523029509, + "loss": 3.4283, + "step": 8855 + }, + { + "epoch": 0.51, + "learning_rate": 0.00011448914921591765, + "loss": 3.5573, + "step": 8860 + }, + { + "epoch": 0.51, + "learning_rate": 0.00011439005867178884, + "loss": 3.4325, + "step": 8865 + }, + { + "epoch": 0.51, + "learning_rate": 0.00011429095369727696, + "loss": 3.3635, + "step": 8870 + }, + { + "epoch": 0.51, + "learning_rate": 0.00011419183439176464, + "loss": 3.3917, + "step": 8875 + }, + { + "epoch": 0.51, + "learning_rate": 0.00011409270085464898, + "loss": 3.4305, + "step": 8880 + }, + { + "epoch": 0.51, + "learning_rate": 0.0001139935531853413, + "loss": 3.4148, + "step": 8885 + }, + { + "epoch": 0.51, + "learning_rate": 0.0001138943914832671, + "loss": 3.4412, + "step": 8890 + }, + { + "epoch": 0.51, + "learning_rate": 0.00011379521584786599, + "loss": 3.4597, + "step": 8895 + }, + { + "epoch": 0.51, + "learning_rate": 0.0001136960263785915, + "loss": 3.4042, + "step": 8900 + }, + { + "epoch": 0.51, + "learning_rate": 0.00011359682317491098, + "loss": 3.4872, + "step": 8905 + }, + { + "epoch": 0.51, + "learning_rate": 0.00011349760633630575, + "loss": 3.4709, + "step": 8910 + }, + { + "epoch": 0.51, + "learning_rate": 0.00011339837596227061, + "loss": 3.3989, + "step": 8915 + }, + { + "epoch": 0.51, + "learning_rate": 0.00011329913215231401, + "loss": 3.388, + "step": 8920 + }, + { + "epoch": 0.51, + "learning_rate": 0.00011319987500595785, + "loss": 3.5418, + "step": 8925 + }, + { + "epoch": 0.51, + "learning_rate": 0.00011310060462273744, + "loss": 3.4708, + "step": 8930 + }, + { + "epoch": 0.51, + "learning_rate": 0.00011300132110220134, + "loss": 3.4527, + "step": 8935 + }, + { + "epoch": 0.51, + "learning_rate": 0.0001129020245439113, + "loss": 3.4219, + "step": 8940 + }, + { + "epoch": 0.51, + "learning_rate": 0.00011280271504744208, + "loss": 3.5115, + "step": 8945 + }, + { + "epoch": 0.51, + "learning_rate": 0.00011270339271238153, + "loss": 3.467, + "step": 8950 + }, + { + "epoch": 0.51, + "learning_rate": 0.00011260405763833029, + "loss": 3.4677, + "step": 8955 + }, + { + "epoch": 0.51, + "learning_rate": 0.00011250470992490176, + "loss": 3.4673, + "step": 8960 + }, + { + "epoch": 0.51, + "learning_rate": 0.0001124053496717221, + "loss": 3.4545, + "step": 8965 + }, + { + "epoch": 0.51, + "learning_rate": 0.00011230597697842998, + "loss": 3.3728, + "step": 8970 + }, + { + "epoch": 0.51, + "learning_rate": 0.0001122065919446765, + "loss": 3.4272, + "step": 8975 + }, + { + "epoch": 0.52, + "learning_rate": 0.00011210719467012529, + "loss": 3.433, + "step": 8980 + }, + { + "epoch": 0.52, + "learning_rate": 0.0001120077852544521, + "loss": 3.3992, + "step": 8985 + }, + { + "epoch": 0.52, + "learning_rate": 0.00011190836379734495, + "loss": 3.3951, + "step": 8990 + }, + { + "epoch": 0.52, + "learning_rate": 0.00011180893039850388, + "loss": 3.4045, + "step": 8995 + }, + { + "epoch": 0.52, + "learning_rate": 0.00011170948515764088, + "loss": 3.4196, + "step": 9000 + }, + { + "epoch": 0.52, + "learning_rate": 0.00011161002817447996, + "loss": 3.3977, + "step": 9005 + }, + { + "epoch": 0.52, + "learning_rate": 0.00011151055954875673, + "loss": 3.5185, + "step": 9010 + }, + { + "epoch": 0.52, + "learning_rate": 0.00011141107938021858, + "loss": 3.4191, + "step": 9015 + }, + { + "epoch": 0.52, + "learning_rate": 0.00011131158776862445, + "loss": 3.4764, + "step": 9020 + }, + { + "epoch": 0.52, + "learning_rate": 0.0001112120848137447, + "loss": 3.497, + "step": 9025 + }, + { + "epoch": 0.52, + "learning_rate": 0.0001111125706153612, + "loss": 3.4183, + "step": 9030 + }, + { + "epoch": 0.52, + "learning_rate": 0.00011101304527326695, + "loss": 3.4361, + "step": 9035 + }, + { + "epoch": 0.52, + "learning_rate": 0.00011091350888726619, + "loss": 3.3706, + "step": 9040 + }, + { + "epoch": 0.52, + "learning_rate": 0.0001108139615571743, + "loss": 3.4838, + "step": 9045 + }, + { + "epoch": 0.52, + "learning_rate": 0.00011071440338281745, + "loss": 3.4956, + "step": 9050 + }, + { + "epoch": 0.52, + "learning_rate": 0.00011061483446403289, + "loss": 3.4855, + "step": 9055 + }, + { + "epoch": 0.52, + "learning_rate": 0.00011051525490066852, + "loss": 3.4375, + "step": 9060 + }, + { + "epoch": 0.52, + "learning_rate": 0.00011041566479258294, + "loss": 3.4705, + "step": 9065 + }, + { + "epoch": 0.52, + "learning_rate": 0.0001103160642396454, + "loss": 3.4337, + "step": 9070 + }, + { + "epoch": 0.52, + "learning_rate": 0.00011021645334173547, + "loss": 3.4517, + "step": 9075 + }, + { + "epoch": 0.52, + "learning_rate": 0.00011011683219874323, + "loss": 3.4052, + "step": 9080 + }, + { + "epoch": 0.52, + "learning_rate": 0.00011001720091056897, + "loss": 3.4145, + "step": 9085 + }, + { + "epoch": 0.52, + "learning_rate": 0.00010991755957712318, + "loss": 3.4563, + "step": 9090 + }, + { + "epoch": 0.52, + "learning_rate": 0.00010981790829832641, + "loss": 3.4243, + "step": 9095 + }, + { + "epoch": 0.52, + "learning_rate": 0.00010971824717410917, + "loss": 3.4745, + "step": 9100 + }, + { + "epoch": 0.52, + "learning_rate": 0.00010961857630441187, + "loss": 3.3461, + "step": 9105 + }, + { + "epoch": 0.52, + "learning_rate": 0.00010951889578918471, + "loss": 3.3334, + "step": 9110 + }, + { + "epoch": 0.52, + "learning_rate": 0.00010941920572838747, + "loss": 3.3496, + "step": 9115 + }, + { + "epoch": 0.52, + "learning_rate": 0.00010931950622198965, + "loss": 3.3052, + "step": 9120 + }, + { + "epoch": 0.52, + "learning_rate": 0.00010921979736997006, + "loss": 3.3457, + "step": 9125 + }, + { + "epoch": 0.52, + "learning_rate": 0.000109120079272317, + "loss": 3.3322, + "step": 9130 + }, + { + "epoch": 0.52, + "learning_rate": 0.00010902035202902798, + "loss": 3.3435, + "step": 9135 + }, + { + "epoch": 0.52, + "learning_rate": 0.00010892061574010972, + "loss": 3.4383, + "step": 9140 + }, + { + "epoch": 0.52, + "learning_rate": 0.00010882087050557803, + "loss": 3.4166, + "step": 9145 + }, + { + "epoch": 0.52, + "learning_rate": 0.00010872111642545759, + "loss": 3.5438, + "step": 9150 + }, + { + "epoch": 0.53, + "learning_rate": 0.00010862135359978205, + "loss": 3.5156, + "step": 9155 + }, + { + "epoch": 0.53, + "learning_rate": 0.00010852158212859378, + "loss": 3.5473, + "step": 9160 + }, + { + "epoch": 0.53, + "learning_rate": 0.00010842180211194384, + "loss": 3.5342, + "step": 9165 + }, + { + "epoch": 0.53, + "learning_rate": 0.00010832201364989186, + "loss": 3.4987, + "step": 9170 + }, + { + "epoch": 0.53, + "learning_rate": 0.00010822221684250593, + "loss": 3.5329, + "step": 9175 + }, + { + "epoch": 0.53, + "learning_rate": 0.00010812241178986254, + "loss": 3.5798, + "step": 9180 + }, + { + "epoch": 0.53, + "learning_rate": 0.00010802259859204635, + "loss": 3.5865, + "step": 9185 + }, + { + "epoch": 0.53, + "learning_rate": 0.00010792277734915033, + "loss": 3.541, + "step": 9190 + }, + { + "epoch": 0.53, + "learning_rate": 0.0001078229481612754, + "loss": 3.5058, + "step": 9195 + }, + { + "epoch": 0.53, + "learning_rate": 0.00010772311112853053, + "loss": 3.5591, + "step": 9200 + }, + { + "epoch": 0.53, + "learning_rate": 0.00010762326635103251, + "loss": 3.5342, + "step": 9205 + }, + { + "epoch": 0.53, + "learning_rate": 0.00010752341392890587, + "loss": 3.4278, + "step": 9210 + }, + { + "epoch": 0.53, + "learning_rate": 0.00010742355396228287, + "loss": 3.5376, + "step": 9215 + }, + { + "epoch": 0.53, + "learning_rate": 0.00010732368655130333, + "loss": 3.3675, + "step": 9220 + }, + { + "epoch": 0.53, + "learning_rate": 0.00010722381179611449, + "loss": 3.3711, + "step": 9225 + }, + { + "epoch": 0.53, + "learning_rate": 0.000107123929796871, + "loss": 3.3635, + "step": 9230 + }, + { + "epoch": 0.53, + "learning_rate": 0.0001070240406537347, + "loss": 3.4032, + "step": 9235 + }, + { + "epoch": 0.53, + "learning_rate": 0.00010692414446687471, + "loss": 3.7505, + "step": 9240 + }, + { + "epoch": 0.53, + "learning_rate": 0.0001068242413364671, + "loss": 3.6249, + "step": 9245 + }, + { + "epoch": 0.53, + "learning_rate": 0.00010672433136269499, + "loss": 3.3861, + "step": 9250 + }, + { + "epoch": 0.53, + "learning_rate": 0.00010662441464574833, + "loss": 3.571, + "step": 9255 + }, + { + "epoch": 0.53, + "learning_rate": 0.00010652449128582376, + "loss": 3.4986, + "step": 9260 + }, + { + "epoch": 0.53, + "learning_rate": 0.00010642456138312473, + "loss": 3.4416, + "step": 9265 + }, + { + "epoch": 0.53, + "learning_rate": 0.00010632462503786114, + "loss": 3.4873, + "step": 9270 + }, + { + "epoch": 0.53, + "learning_rate": 0.00010622468235024936, + "loss": 3.5095, + "step": 9275 + }, + { + "epoch": 0.53, + "learning_rate": 0.00010612473342051219, + "loss": 3.4704, + "step": 9280 + }, + { + "epoch": 0.53, + "learning_rate": 0.00010602477834887858, + "loss": 3.5554, + "step": 9285 + }, + { + "epoch": 0.53, + "learning_rate": 0.00010592481723558374, + "loss": 3.4456, + "step": 9290 + }, + { + "epoch": 0.53, + "learning_rate": 0.00010582485018086891, + "loss": 3.4669, + "step": 9295 + }, + { + "epoch": 0.53, + "learning_rate": 0.00010572487728498127, + "loss": 3.4011, + "step": 9300 + }, + { + "epoch": 0.53, + "learning_rate": 0.00010562489864817382, + "loss": 3.4686, + "step": 9305 + }, + { + "epoch": 0.53, + "learning_rate": 0.00010552491437070537, + "loss": 3.5753, + "step": 9310 + }, + { + "epoch": 0.53, + "learning_rate": 0.00010542492455284043, + "loss": 3.4919, + "step": 9315 + }, + { + "epoch": 0.53, + "learning_rate": 0.00010532492929484898, + "loss": 3.5018, + "step": 9320 + }, + { + "epoch": 0.54, + "learning_rate": 0.00010522492869700648, + "loss": 3.4408, + "step": 9325 + }, + { + "epoch": 0.54, + "learning_rate": 0.00010512492285959382, + "loss": 3.4225, + "step": 9330 + }, + { + "epoch": 0.54, + "learning_rate": 0.00010502491188289695, + "loss": 3.4192, + "step": 9335 + }, + { + "epoch": 0.54, + "learning_rate": 0.00010492489586720724, + "loss": 3.4308, + "step": 9340 + }, + { + "epoch": 0.54, + "learning_rate": 0.00010482487491282089, + "loss": 3.4666, + "step": 9345 + }, + { + "epoch": 0.54, + "learning_rate": 0.00010472484912003913, + "loss": 3.557, + "step": 9350 + }, + { + "epoch": 0.54, + "learning_rate": 0.00010462481858916812, + "loss": 3.4161, + "step": 9355 + }, + { + "epoch": 0.54, + "learning_rate": 0.0001045247834205186, + "loss": 3.4066, + "step": 9360 + }, + { + "epoch": 0.54, + "learning_rate": 0.00010442474371440618, + "loss": 3.502, + "step": 9365 + }, + { + "epoch": 0.54, + "learning_rate": 0.00010432469957115083, + "loss": 3.5101, + "step": 9370 + }, + { + "epoch": 0.54, + "learning_rate": 0.00010422465109107702, + "loss": 3.4485, + "step": 9375 + }, + { + "epoch": 0.54, + "learning_rate": 0.00010412459837451367, + "loss": 3.447, + "step": 9380 + }, + { + "epoch": 0.54, + "learning_rate": 0.00010402454152179377, + "loss": 3.4666, + "step": 9385 + }, + { + "epoch": 0.54, + "learning_rate": 0.00010392448063325463, + "loss": 3.4728, + "step": 9390 + }, + { + "epoch": 0.54, + "learning_rate": 0.00010382441580923752, + "loss": 3.4636, + "step": 9395 + }, + { + "epoch": 0.54, + "learning_rate": 0.00010372434715008763, + "loss": 3.4724, + "step": 9400 + }, + { + "epoch": 0.54, + "learning_rate": 0.00010362427475615413, + "loss": 3.5011, + "step": 9405 + }, + { + "epoch": 0.54, + "learning_rate": 0.00010352419872778971, + "loss": 3.3862, + "step": 9410 + }, + { + "epoch": 0.54, + "learning_rate": 0.00010342411916535093, + "loss": 3.3817, + "step": 9415 + }, + { + "epoch": 0.54, + "learning_rate": 0.00010332403616919779, + "loss": 3.4104, + "step": 9420 + }, + { + "epoch": 0.54, + "learning_rate": 0.00010322394983969368, + "loss": 3.3997, + "step": 9425 + }, + { + "epoch": 0.54, + "learning_rate": 0.0001031238602772055, + "loss": 3.4299, + "step": 9430 + }, + { + "epoch": 0.54, + "learning_rate": 0.00010302376758210319, + "loss": 3.4613, + "step": 9435 + }, + { + "epoch": 0.54, + "learning_rate": 0.00010292367185475997, + "loss": 3.4249, + "step": 9440 + }, + { + "epoch": 0.54, + "learning_rate": 0.00010282357319555207, + "loss": 3.4632, + "step": 9445 + }, + { + "epoch": 0.54, + "learning_rate": 0.00010272347170485863, + "loss": 3.4284, + "step": 9450 + }, + { + "epoch": 0.54, + "learning_rate": 0.00010262336748306165, + "loss": 3.4313, + "step": 9455 + }, + { + "epoch": 0.54, + "learning_rate": 0.0001025232606305459, + "loss": 3.4485, + "step": 9460 + }, + { + "epoch": 0.54, + "learning_rate": 0.00010242315124769872, + "loss": 3.414, + "step": 9465 + }, + { + "epoch": 0.54, + "learning_rate": 0.00010232303943491004, + "loss": 3.4192, + "step": 9470 + }, + { + "epoch": 0.54, + "learning_rate": 0.00010222292529257217, + "loss": 3.3931, + "step": 9475 + }, + { + "epoch": 0.54, + "learning_rate": 0.00010212280892107988, + "loss": 3.4524, + "step": 9480 + }, + { + "epoch": 0.54, + "learning_rate": 0.00010202269042083001, + "loss": 3.4416, + "step": 9485 + }, + { + "epoch": 0.54, + "learning_rate": 0.00010192256989222169, + "loss": 3.398, + "step": 9490 + }, + { + "epoch": 0.54, + "learning_rate": 0.00010182244743565594, + "loss": 3.4301, + "step": 9495 + }, + { + "epoch": 0.55, + "learning_rate": 0.0001017223231515358, + "loss": 3.4463, + "step": 9500 + }, + { + "epoch": 0.55, + "learning_rate": 0.00010162219714026617, + "loss": 3.4039, + "step": 9505 + }, + { + "epoch": 0.55, + "learning_rate": 0.0001015220695022536, + "loss": 3.3085, + "step": 9510 + }, + { + "epoch": 0.55, + "learning_rate": 0.00010142194033790633, + "loss": 3.3191, + "step": 9515 + }, + { + "epoch": 0.55, + "learning_rate": 0.0001013218097476341, + "loss": 3.3856, + "step": 9520 + }, + { + "epoch": 0.55, + "learning_rate": 0.00010122167783184806, + "loss": 3.313, + "step": 9525 + }, + { + "epoch": 0.55, + "learning_rate": 0.00010112154469096078, + "loss": 3.4163, + "step": 9530 + }, + { + "epoch": 0.55, + "learning_rate": 0.00010102141042538597, + "loss": 3.3564, + "step": 9535 + }, + { + "epoch": 0.55, + "learning_rate": 0.0001009212751355385, + "loss": 3.3791, + "step": 9540 + }, + { + "epoch": 0.55, + "learning_rate": 0.00010082113892183423, + "loss": 3.3274, + "step": 9545 + }, + { + "epoch": 0.55, + "learning_rate": 0.00010072100188469002, + "loss": 3.3159, + "step": 9550 + }, + { + "epoch": 0.55, + "learning_rate": 0.00010062086412452352, + "loss": 3.4423, + "step": 9555 + }, + { + "epoch": 0.55, + "learning_rate": 0.00010052072574175306, + "loss": 3.4804, + "step": 9560 + }, + { + "epoch": 0.55, + "learning_rate": 0.00010042058683679769, + "loss": 3.5206, + "step": 9565 + }, + { + "epoch": 0.55, + "learning_rate": 0.00010032044751007685, + "loss": 3.5033, + "step": 9570 + }, + { + "epoch": 0.55, + "learning_rate": 0.00010022030786201058, + "loss": 3.499, + "step": 9575 + }, + { + "epoch": 0.55, + "learning_rate": 0.00010012016799301907, + "loss": 3.5253, + "step": 9580 + }, + { + "epoch": 0.55, + "learning_rate": 0.00010002002800352281, + "loss": 3.4599, + "step": 9585 + }, + { + "epoch": 0.55, + "learning_rate": 9.991988799394245e-05, + "loss": 3.5653, + "step": 9590 + }, + { + "epoch": 0.55, + "learning_rate": 9.981974806469858e-05, + "loss": 3.536, + "step": 9595 + }, + { + "epoch": 0.55, + "learning_rate": 9.971960831621173e-05, + "loss": 3.4623, + "step": 9600 + }, + { + "epoch": 0.55, + "learning_rate": 9.961946884890232e-05, + "loss": 3.4403, + "step": 9605 + }, + { + "epoch": 0.55, + "learning_rate": 9.951932976319041e-05, + "loss": 3.3946, + "step": 9610 + }, + { + "epoch": 0.55, + "learning_rate": 9.941919115949565e-05, + "loss": 3.4717, + "step": 9615 + }, + { + "epoch": 0.55, + "learning_rate": 9.931905313823733e-05, + "loss": 3.425, + "step": 9620 + }, + { + "epoch": 0.55, + "learning_rate": 9.921891579983404e-05, + "loss": 3.4353, + "step": 9625 + }, + { + "epoch": 0.55, + "learning_rate": 9.911877924470373e-05, + "loss": 3.4593, + "step": 9630 + }, + { + "epoch": 0.55, + "learning_rate": 9.901864357326358e-05, + "loss": 3.453, + "step": 9635 + }, + { + "epoch": 0.55, + "learning_rate": 9.891850888592987e-05, + "loss": 3.4532, + "step": 9640 + }, + { + "epoch": 0.55, + "learning_rate": 9.881837528311787e-05, + "loss": 3.5019, + "step": 9645 + }, + { + "epoch": 0.55, + "learning_rate": 9.871824286524175e-05, + "loss": 3.459, + "step": 9650 + }, + { + "epoch": 0.55, + "learning_rate": 9.861811173271459e-05, + "loss": 3.487, + "step": 9655 + }, + { + "epoch": 0.55, + "learning_rate": 9.851798198594809e-05, + "loss": 3.4984, + "step": 9660 + }, + { + "epoch": 0.55, + "learning_rate": 9.841785372535254e-05, + "loss": 3.4206, + "step": 9665 + }, + { + "epoch": 0.55, + "learning_rate": 9.831772705133685e-05, + "loss": 3.4782, + "step": 9670 + }, + { + "epoch": 0.56, + "learning_rate": 9.821760206430825e-05, + "loss": 3.5127, + "step": 9675 + }, + { + "epoch": 0.56, + "learning_rate": 9.811747886467226e-05, + "loss": 3.4766, + "step": 9680 + }, + { + "epoch": 0.56, + "learning_rate": 9.801735755283273e-05, + "loss": 3.511, + "step": 9685 + }, + { + "epoch": 0.56, + "learning_rate": 9.791723822919149e-05, + "loss": 3.5174, + "step": 9690 + }, + { + "epoch": 0.56, + "learning_rate": 9.781712099414842e-05, + "loss": 3.3848, + "step": 9695 + }, + { + "epoch": 0.56, + "learning_rate": 9.771700594810128e-05, + "loss": 3.3986, + "step": 9700 + }, + { + "epoch": 0.56, + "learning_rate": 9.761689319144573e-05, + "loss": 3.4746, + "step": 9705 + }, + { + "epoch": 0.56, + "learning_rate": 9.751678282457501e-05, + "loss": 3.3683, + "step": 9710 + }, + { + "epoch": 0.56, + "learning_rate": 9.741667494788003e-05, + "loss": 3.4235, + "step": 9715 + }, + { + "epoch": 0.56, + "learning_rate": 9.731656966174924e-05, + "loss": 3.5468, + "step": 9720 + }, + { + "epoch": 0.56, + "learning_rate": 9.721646706656839e-05, + "loss": 3.5306, + "step": 9725 + }, + { + "epoch": 0.56, + "learning_rate": 9.71163672627206e-05, + "loss": 3.4396, + "step": 9730 + }, + { + "epoch": 0.56, + "learning_rate": 9.70162703505862e-05, + "loss": 3.4224, + "step": 9735 + }, + { + "epoch": 0.56, + "learning_rate": 9.69161764305426e-05, + "loss": 3.3931, + "step": 9740 + }, + { + "epoch": 0.56, + "learning_rate": 9.681608560296413e-05, + "loss": 3.4806, + "step": 9745 + }, + { + "epoch": 0.56, + "learning_rate": 9.671599796822223e-05, + "loss": 3.4404, + "step": 9750 + }, + { + "epoch": 0.56, + "learning_rate": 9.661591362668491e-05, + "loss": 3.3803, + "step": 9755 + }, + { + "epoch": 0.56, + "learning_rate": 9.651583267871697e-05, + "loss": 3.4107, + "step": 9760 + }, + { + "epoch": 0.56, + "learning_rate": 9.641575522467984e-05, + "loss": 3.3617, + "step": 9765 + }, + { + "epoch": 0.56, + "learning_rate": 9.631568136493142e-05, + "loss": 3.3925, + "step": 9770 + }, + { + "epoch": 0.56, + "learning_rate": 9.621561119982598e-05, + "loss": 3.3395, + "step": 9775 + }, + { + "epoch": 0.56, + "learning_rate": 9.61155448297141e-05, + "loss": 3.33, + "step": 9780 + }, + { + "epoch": 0.56, + "learning_rate": 9.60154823549426e-05, + "loss": 3.409, + "step": 9785 + }, + { + "epoch": 0.56, + "learning_rate": 9.591542387585434e-05, + "loss": 3.3876, + "step": 9790 + }, + { + "epoch": 0.56, + "learning_rate": 9.581536949278814e-05, + "loss": 3.4272, + "step": 9795 + }, + { + "epoch": 0.56, + "learning_rate": 9.571531930607884e-05, + "loss": 3.4503, + "step": 9800 + }, + { + "epoch": 0.56, + "learning_rate": 9.561527341605691e-05, + "loss": 3.4269, + "step": 9805 + }, + { + "epoch": 0.56, + "learning_rate": 9.551523192304863e-05, + "loss": 3.3646, + "step": 9810 + }, + { + "epoch": 0.56, + "learning_rate": 9.541519492737586e-05, + "loss": 3.3592, + "step": 9815 + }, + { + "epoch": 0.56, + "learning_rate": 9.531516252935588e-05, + "loss": 3.4481, + "step": 9820 + }, + { + "epoch": 0.56, + "learning_rate": 9.521513482930144e-05, + "loss": 3.4373, + "step": 9825 + }, + { + "epoch": 0.56, + "learning_rate": 9.511511192752049e-05, + "loss": 3.4068, + "step": 9830 + }, + { + "epoch": 0.56, + "learning_rate": 9.501509392431627e-05, + "loss": 3.3923, + "step": 9835 + }, + { + "epoch": 0.56, + "learning_rate": 9.491508091998707e-05, + "loss": 3.4492, + "step": 9840 + }, + { + "epoch": 0.56, + "learning_rate": 9.481507301482604e-05, + "loss": 3.4388, + "step": 9845 + }, + { + "epoch": 0.57, + "learning_rate": 9.471507030912151e-05, + "loss": 3.3932, + "step": 9850 + }, + { + "epoch": 0.57, + "learning_rate": 9.46150729031563e-05, + "loss": 3.4571, + "step": 9855 + }, + { + "epoch": 0.57, + "learning_rate": 9.451508089720803e-05, + "loss": 3.4526, + "step": 9860 + }, + { + "epoch": 0.57, + "learning_rate": 9.441509439154895e-05, + "loss": 3.436, + "step": 9865 + }, + { + "epoch": 0.57, + "learning_rate": 9.431511348644575e-05, + "loss": 3.4363, + "step": 9870 + }, + { + "epoch": 0.57, + "learning_rate": 9.421513828215946e-05, + "loss": 3.4935, + "step": 9875 + }, + { + "epoch": 0.57, + "learning_rate": 9.41151688789455e-05, + "loss": 3.4785, + "step": 9880 + }, + { + "epoch": 0.57, + "learning_rate": 9.401520537705339e-05, + "loss": 3.4572, + "step": 9885 + }, + { + "epoch": 0.57, + "learning_rate": 9.391524787672676e-05, + "loss": 3.4368, + "step": 9890 + }, + { + "epoch": 0.57, + "learning_rate": 9.381529647820314e-05, + "loss": 3.4845, + "step": 9895 + }, + { + "epoch": 0.57, + "learning_rate": 9.371535128171416e-05, + "loss": 3.3886, + "step": 9900 + }, + { + "epoch": 0.57, + "learning_rate": 9.361541238748496e-05, + "loss": 3.3162, + "step": 9905 + }, + { + "epoch": 0.57, + "learning_rate": 9.351547989573453e-05, + "loss": 3.3372, + "step": 9910 + }, + { + "epoch": 0.57, + "learning_rate": 9.341555390667542e-05, + "loss": 3.4391, + "step": 9915 + }, + { + "epoch": 0.57, + "learning_rate": 9.331563452051362e-05, + "loss": 3.4464, + "step": 9920 + }, + { + "epoch": 0.57, + "learning_rate": 9.321572183744849e-05, + "loss": 3.3738, + "step": 9925 + }, + { + "epoch": 0.57, + "learning_rate": 9.311581595767273e-05, + "loss": 3.403, + "step": 9930 + }, + { + "epoch": 0.57, + "learning_rate": 9.301591698137217e-05, + "loss": 3.4138, + "step": 9935 + }, + { + "epoch": 0.57, + "learning_rate": 9.29160250087257e-05, + "loss": 3.4459, + "step": 9940 + }, + { + "epoch": 0.57, + "learning_rate": 9.281614013990526e-05, + "loss": 3.417, + "step": 9945 + }, + { + "epoch": 0.57, + "learning_rate": 9.271626247507561e-05, + "loss": 3.401, + "step": 9950 + }, + { + "epoch": 0.57, + "learning_rate": 9.261639211439427e-05, + "loss": 3.4619, + "step": 9955 + }, + { + "epoch": 0.57, + "learning_rate": 9.251652915801144e-05, + "loss": 3.4527, + "step": 9960 + }, + { + "epoch": 0.57, + "learning_rate": 9.241667370607e-05, + "loss": 3.3666, + "step": 9965 + }, + { + "epoch": 0.57, + "learning_rate": 9.231682585870514e-05, + "loss": 3.3911, + "step": 9970 + }, + { + "epoch": 0.57, + "learning_rate": 9.221698571604453e-05, + "loss": 3.425, + "step": 9975 + }, + { + "epoch": 0.57, + "learning_rate": 9.211715337820811e-05, + "loss": 3.4106, + "step": 9980 + }, + { + "epoch": 0.57, + "learning_rate": 9.201732894530797e-05, + "loss": 3.4019, + "step": 9985 + }, + { + "epoch": 0.57, + "learning_rate": 9.191751251744823e-05, + "loss": 3.4174, + "step": 9990 + }, + { + "epoch": 0.57, + "learning_rate": 9.181770419472509e-05, + "loss": 3.3719, + "step": 9995 + }, + { + "epoch": 0.57, + "learning_rate": 9.171790407722656e-05, + "loss": 3.3884, + "step": 10000 + }, + { + "epoch": 0.57, + "learning_rate": 9.161811226503233e-05, + "loss": 3.3333, + "step": 10005 + }, + { + "epoch": 0.57, + "learning_rate": 9.151832885821396e-05, + "loss": 3.4037, + "step": 10010 + }, + { + "epoch": 0.57, + "learning_rate": 9.141855395683444e-05, + "loss": 3.4492, + "step": 10015 + }, + { + "epoch": 0.57, + "learning_rate": 9.131878766094822e-05, + "loss": 3.3164, + "step": 10020 + }, + { + "epoch": 0.58, + "learning_rate": 9.121903007060121e-05, + "loss": 3.3646, + "step": 10025 + }, + { + "epoch": 0.58, + "learning_rate": 9.111928128583054e-05, + "loss": 3.4143, + "step": 10030 + }, + { + "epoch": 0.58, + "learning_rate": 9.101954140666451e-05, + "loss": 3.3719, + "step": 10035 + }, + { + "epoch": 0.58, + "learning_rate": 9.091981053312247e-05, + "loss": 3.3316, + "step": 10040 + }, + { + "epoch": 0.58, + "learning_rate": 9.082008876521481e-05, + "loss": 3.358, + "step": 10045 + }, + { + "epoch": 0.58, + "learning_rate": 9.072037620294275e-05, + "loss": 3.5402, + "step": 10050 + }, + { + "epoch": 0.58, + "learning_rate": 9.06206729462982e-05, + "loss": 3.4011, + "step": 10055 + }, + { + "epoch": 0.58, + "learning_rate": 9.052097909526388e-05, + "loss": 3.4199, + "step": 10060 + }, + { + "epoch": 0.58, + "learning_rate": 9.042129474981297e-05, + "loss": 3.3874, + "step": 10065 + }, + { + "epoch": 0.58, + "learning_rate": 9.032162000990914e-05, + "loss": 3.4635, + "step": 10070 + }, + { + "epoch": 0.58, + "learning_rate": 9.02219549755065e-05, + "loss": 3.3611, + "step": 10075 + }, + { + "epoch": 0.58, + "learning_rate": 9.012229974654932e-05, + "loss": 3.3858, + "step": 10080 + }, + { + "epoch": 0.58, + "learning_rate": 9.002265442297212e-05, + "loss": 3.501, + "step": 10085 + }, + { + "epoch": 0.58, + "learning_rate": 8.99230191046994e-05, + "loss": 3.4578, + "step": 10090 + }, + { + "epoch": 0.58, + "learning_rate": 8.982339389164575e-05, + "loss": 3.4302, + "step": 10095 + }, + { + "epoch": 0.58, + "learning_rate": 8.972377888371555e-05, + "loss": 3.3904, + "step": 10100 + }, + { + "epoch": 0.58, + "learning_rate": 8.962417418080285e-05, + "loss": 3.4465, + "step": 10105 + }, + { + "epoch": 0.58, + "learning_rate": 8.952457988279161e-05, + "loss": 3.4748, + "step": 10110 + }, + { + "epoch": 0.58, + "learning_rate": 8.942499608955516e-05, + "loss": 3.5204, + "step": 10115 + }, + { + "epoch": 0.58, + "learning_rate": 8.93254229009563e-05, + "loss": 3.4629, + "step": 10120 + }, + { + "epoch": 0.58, + "learning_rate": 8.922586041684732e-05, + "loss": 3.4275, + "step": 10125 + }, + { + "epoch": 0.58, + "learning_rate": 8.912630873706967e-05, + "loss": 3.4544, + "step": 10130 + }, + { + "epoch": 0.58, + "learning_rate": 8.902676796145403e-05, + "loss": 3.4336, + "step": 10135 + }, + { + "epoch": 0.58, + "learning_rate": 8.892723818982001e-05, + "loss": 3.4666, + "step": 10140 + }, + { + "epoch": 0.58, + "learning_rate": 8.882771952197642e-05, + "loss": 3.3364, + "step": 10145 + }, + { + "epoch": 0.58, + "learning_rate": 8.872821205772074e-05, + "loss": 3.4488, + "step": 10150 + }, + { + "epoch": 0.58, + "learning_rate": 8.862871589683924e-05, + "loss": 3.4661, + "step": 10155 + }, + { + "epoch": 0.58, + "learning_rate": 8.8529231139107e-05, + "loss": 3.5639, + "step": 10160 + }, + { + "epoch": 0.58, + "learning_rate": 8.842975788428748e-05, + "loss": 3.5045, + "step": 10165 + }, + { + "epoch": 0.58, + "learning_rate": 8.833029623213267e-05, + "loss": 3.4536, + "step": 10170 + }, + { + "epoch": 0.58, + "learning_rate": 8.823084628238298e-05, + "loss": 3.3744, + "step": 10175 + }, + { + "epoch": 0.58, + "learning_rate": 8.813140813476704e-05, + "loss": 3.5622, + "step": 10180 + }, + { + "epoch": 0.58, + "learning_rate": 8.803198188900161e-05, + "loss": 3.4703, + "step": 10185 + }, + { + "epoch": 0.58, + "learning_rate": 8.79325676447916e-05, + "loss": 3.4271, + "step": 10190 + }, + { + "epoch": 0.58, + "learning_rate": 8.783316550182982e-05, + "loss": 3.3598, + "step": 10195 + }, + { + "epoch": 0.59, + "learning_rate": 8.773377555979699e-05, + "loss": 3.4217, + "step": 10200 + }, + { + "epoch": 0.59, + "learning_rate": 8.763439791836145e-05, + "loss": 3.3361, + "step": 10205 + }, + { + "epoch": 0.59, + "learning_rate": 8.753503267717948e-05, + "loss": 3.317, + "step": 10210 + }, + { + "epoch": 0.59, + "learning_rate": 8.743567993589466e-05, + "loss": 3.352, + "step": 10215 + }, + { + "epoch": 0.59, + "learning_rate": 8.733633979413817e-05, + "loss": 3.2942, + "step": 10220 + }, + { + "epoch": 0.59, + "learning_rate": 8.723701235152854e-05, + "loss": 3.4149, + "step": 10225 + }, + { + "epoch": 0.59, + "learning_rate": 8.713769770767155e-05, + "loss": 3.406, + "step": 10230 + }, + { + "epoch": 0.59, + "learning_rate": 8.703839596216012e-05, + "loss": 3.3384, + "step": 10235 + }, + { + "epoch": 0.59, + "learning_rate": 8.69391072145743e-05, + "loss": 3.4647, + "step": 10240 + }, + { + "epoch": 0.59, + "learning_rate": 8.683983156448104e-05, + "loss": 3.3693, + "step": 10245 + }, + { + "epoch": 0.59, + "learning_rate": 8.67405691114342e-05, + "loss": 3.3358, + "step": 10250 + }, + { + "epoch": 0.59, + "learning_rate": 8.664131995497439e-05, + "loss": 3.3255, + "step": 10255 + }, + { + "epoch": 0.59, + "learning_rate": 8.654208419462893e-05, + "loss": 3.4213, + "step": 10260 + }, + { + "epoch": 0.59, + "learning_rate": 8.644286192991158e-05, + "loss": 3.297, + "step": 10265 + }, + { + "epoch": 0.59, + "learning_rate": 8.634365326032265e-05, + "loss": 3.3733, + "step": 10270 + }, + { + "epoch": 0.59, + "learning_rate": 8.62444582853489e-05, + "loss": 3.4788, + "step": 10275 + }, + { + "epoch": 0.59, + "learning_rate": 8.614527710446322e-05, + "loss": 3.3682, + "step": 10280 + }, + { + "epoch": 0.59, + "learning_rate": 8.604610981712471e-05, + "loss": 3.372, + "step": 10285 + }, + { + "epoch": 0.59, + "learning_rate": 8.594695652277858e-05, + "loss": 3.4457, + "step": 10290 + }, + { + "epoch": 0.59, + "learning_rate": 8.584781732085598e-05, + "loss": 3.4072, + "step": 10295 + }, + { + "epoch": 0.59, + "learning_rate": 8.574869231077383e-05, + "loss": 3.3953, + "step": 10300 + }, + { + "epoch": 0.59, + "learning_rate": 8.564958159193506e-05, + "loss": 3.5424, + "step": 10305 + }, + { + "epoch": 0.59, + "learning_rate": 8.555048526372805e-05, + "loss": 3.4545, + "step": 10310 + }, + { + "epoch": 0.59, + "learning_rate": 8.545140342552676e-05, + "loss": 3.511, + "step": 10315 + }, + { + "epoch": 0.59, + "learning_rate": 8.53523361766908e-05, + "loss": 3.5115, + "step": 10320 + }, + { + "epoch": 0.59, + "learning_rate": 8.525328361656494e-05, + "loss": 3.5528, + "step": 10325 + }, + { + "epoch": 0.59, + "learning_rate": 8.515424584447935e-05, + "loss": 3.5314, + "step": 10330 + }, + { + "epoch": 0.59, + "learning_rate": 8.505522295974929e-05, + "loss": 3.4791, + "step": 10335 + }, + { + "epoch": 0.59, + "learning_rate": 8.495621506167519e-05, + "loss": 3.5547, + "step": 10340 + }, + { + "epoch": 0.59, + "learning_rate": 8.485722224954237e-05, + "loss": 3.5492, + "step": 10345 + }, + { + "epoch": 0.59, + "learning_rate": 8.475824462262096e-05, + "loss": 3.5793, + "step": 10350 + }, + { + "epoch": 0.59, + "learning_rate": 8.465928228016608e-05, + "loss": 3.5645, + "step": 10355 + }, + { + "epoch": 0.59, + "learning_rate": 8.456033532141735e-05, + "loss": 3.6315, + "step": 10360 + }, + { + "epoch": 0.59, + "learning_rate": 8.44614038455989e-05, + "loss": 3.6001, + "step": 10365 + }, + { + "epoch": 0.59, + "learning_rate": 8.436248795191961e-05, + "loss": 3.5612, + "step": 10370 + }, + { + "epoch": 0.6, + "learning_rate": 8.426358773957243e-05, + "loss": 3.4017, + "step": 10375 + }, + { + "epoch": 0.6, + "learning_rate": 8.416470330773471e-05, + "loss": 3.534, + "step": 10380 + }, + { + "epoch": 0.6, + "learning_rate": 8.406583475556807e-05, + "loss": 3.5057, + "step": 10385 + }, + { + "epoch": 0.6, + "learning_rate": 8.396698218221807e-05, + "loss": 3.4671, + "step": 10390 + }, + { + "epoch": 0.6, + "learning_rate": 8.386814568681429e-05, + "loss": 3.4825, + "step": 10395 + }, + { + "epoch": 0.6, + "learning_rate": 8.376932536847014e-05, + "loss": 3.5642, + "step": 10400 + }, + { + "epoch": 0.6, + "learning_rate": 8.367052132628294e-05, + "loss": 3.507, + "step": 10405 + }, + { + "epoch": 0.6, + "learning_rate": 8.35717336593336e-05, + "loss": 3.4765, + "step": 10410 + }, + { + "epoch": 0.6, + "learning_rate": 8.347296246668653e-05, + "loss": 3.5383, + "step": 10415 + }, + { + "epoch": 0.6, + "learning_rate": 8.33742078473898e-05, + "loss": 3.4187, + "step": 10420 + }, + { + "epoch": 0.6, + "learning_rate": 8.327546990047471e-05, + "loss": 3.5604, + "step": 10425 + }, + { + "epoch": 0.6, + "learning_rate": 8.317674872495589e-05, + "loss": 3.4808, + "step": 10430 + }, + { + "epoch": 0.6, + "learning_rate": 8.30780444198312e-05, + "loss": 3.5421, + "step": 10435 + }, + { + "epoch": 0.6, + "learning_rate": 8.29793570840815e-05, + "loss": 3.4582, + "step": 10440 + }, + { + "epoch": 0.6, + "learning_rate": 8.288068681667065e-05, + "loss": 3.497, + "step": 10445 + }, + { + "epoch": 0.6, + "learning_rate": 8.278203371654549e-05, + "loss": 3.4858, + "step": 10450 + }, + { + "epoch": 0.6, + "learning_rate": 8.268339788263551e-05, + "loss": 3.4417, + "step": 10455 + }, + { + "epoch": 0.6, + "learning_rate": 8.2584779413853e-05, + "loss": 3.467, + "step": 10460 + }, + { + "epoch": 0.6, + "learning_rate": 8.248617840909268e-05, + "loss": 3.4081, + "step": 10465 + }, + { + "epoch": 0.6, + "learning_rate": 8.238759496723199e-05, + "loss": 3.475, + "step": 10470 + }, + { + "epoch": 0.6, + "learning_rate": 8.228902918713053e-05, + "loss": 3.4069, + "step": 10475 + }, + { + "epoch": 0.6, + "learning_rate": 8.21904811676303e-05, + "loss": 3.4962, + "step": 10480 + }, + { + "epoch": 0.6, + "learning_rate": 8.209195100755551e-05, + "loss": 3.4025, + "step": 10485 + }, + { + "epoch": 0.6, + "learning_rate": 8.199343880571241e-05, + "loss": 3.3879, + "step": 10490 + }, + { + "epoch": 0.6, + "learning_rate": 8.189494466088923e-05, + "loss": 3.5702, + "step": 10495 + }, + { + "epoch": 0.6, + "learning_rate": 8.179646867185617e-05, + "loss": 3.4021, + "step": 10500 + }, + { + "epoch": 0.6, + "learning_rate": 8.169801093736515e-05, + "loss": 3.4315, + "step": 10505 + }, + { + "epoch": 0.6, + "learning_rate": 8.159957155614974e-05, + "loss": 3.4108, + "step": 10510 + }, + { + "epoch": 0.6, + "learning_rate": 8.15011506269253e-05, + "loss": 3.4287, + "step": 10515 + }, + { + "epoch": 0.6, + "learning_rate": 8.140274824838849e-05, + "loss": 3.386, + "step": 10520 + }, + { + "epoch": 0.6, + "learning_rate": 8.130436451921743e-05, + "loss": 3.4984, + "step": 10525 + }, + { + "epoch": 0.6, + "learning_rate": 8.120599953807153e-05, + "loss": 3.5098, + "step": 10530 + }, + { + "epoch": 0.6, + "learning_rate": 8.110765340359145e-05, + "loss": 3.445, + "step": 10535 + }, + { + "epoch": 0.6, + "learning_rate": 8.10093262143989e-05, + "loss": 3.4813, + "step": 10540 + }, + { + "epoch": 0.61, + "learning_rate": 8.09110180690966e-05, + "loss": 3.4614, + "step": 10545 + }, + { + "epoch": 0.61, + "learning_rate": 8.08127290662682e-05, + "loss": 3.4539, + "step": 10550 + }, + { + "epoch": 0.61, + "learning_rate": 8.071445930447815e-05, + "loss": 3.5426, + "step": 10555 + }, + { + "epoch": 0.61, + "learning_rate": 8.061620888227145e-05, + "loss": 3.3623, + "step": 10560 + }, + { + "epoch": 0.61, + "learning_rate": 8.051797789817403e-05, + "loss": 3.3788, + "step": 10565 + }, + { + "epoch": 0.61, + "learning_rate": 8.041976645069207e-05, + "loss": 3.4262, + "step": 10570 + }, + { + "epoch": 0.61, + "learning_rate": 8.032157463831216e-05, + "loss": 3.4663, + "step": 10575 + }, + { + "epoch": 0.61, + "learning_rate": 8.022340255950138e-05, + "loss": 3.3835, + "step": 10580 + }, + { + "epoch": 0.61, + "learning_rate": 8.012525031270685e-05, + "loss": 3.4929, + "step": 10585 + }, + { + "epoch": 0.61, + "learning_rate": 8.002711799635588e-05, + "loss": 3.383, + "step": 10590 + }, + { + "epoch": 0.61, + "learning_rate": 7.992900570885572e-05, + "loss": 3.391, + "step": 10595 + }, + { + "epoch": 0.61, + "learning_rate": 7.983091354859369e-05, + "loss": 3.4463, + "step": 10600 + }, + { + "epoch": 0.61, + "learning_rate": 7.97328416139368e-05, + "loss": 3.5025, + "step": 10605 + }, + { + "epoch": 0.61, + "learning_rate": 7.963479000323171e-05, + "loss": 3.4358, + "step": 10610 + }, + { + "epoch": 0.61, + "learning_rate": 7.953675881480493e-05, + "loss": 3.3594, + "step": 10615 + }, + { + "epoch": 0.61, + "learning_rate": 7.94387481469623e-05, + "loss": 3.5577, + "step": 10620 + }, + { + "epoch": 0.61, + "learning_rate": 7.934075809798908e-05, + "loss": 3.4044, + "step": 10625 + }, + { + "epoch": 0.61, + "learning_rate": 7.924278876615004e-05, + "loss": 3.4446, + "step": 10630 + }, + { + "epoch": 0.61, + "learning_rate": 7.914484024968893e-05, + "loss": 3.4229, + "step": 10635 + }, + { + "epoch": 0.61, + "learning_rate": 7.90469126468288e-05, + "loss": 3.4395, + "step": 10640 + }, + { + "epoch": 0.61, + "learning_rate": 7.894900605577161e-05, + "loss": 3.4185, + "step": 10645 + }, + { + "epoch": 0.61, + "learning_rate": 7.885112057469839e-05, + "loss": 3.4847, + "step": 10650 + }, + { + "epoch": 0.61, + "learning_rate": 7.87532563017689e-05, + "loss": 3.5373, + "step": 10655 + }, + { + "epoch": 0.61, + "learning_rate": 7.865541333512157e-05, + "loss": 3.4387, + "step": 10660 + }, + { + "epoch": 0.61, + "learning_rate": 7.855759177287368e-05, + "loss": 3.5397, + "step": 10665 + }, + { + "epoch": 0.61, + "learning_rate": 7.84597917131208e-05, + "loss": 3.4661, + "step": 10670 + }, + { + "epoch": 0.61, + "learning_rate": 7.836201325393706e-05, + "loss": 3.5664, + "step": 10675 + }, + { + "epoch": 0.61, + "learning_rate": 7.826425649337501e-05, + "loss": 3.3854, + "step": 10680 + }, + { + "epoch": 0.61, + "learning_rate": 7.816652152946528e-05, + "loss": 3.4263, + "step": 10685 + }, + { + "epoch": 0.61, + "learning_rate": 7.806880846021669e-05, + "loss": 3.4713, + "step": 10690 + }, + { + "epoch": 0.61, + "learning_rate": 7.797111738361618e-05, + "loss": 3.4629, + "step": 10695 + }, + { + "epoch": 0.61, + "learning_rate": 7.787344839762855e-05, + "loss": 3.503, + "step": 10700 + }, + { + "epoch": 0.61, + "learning_rate": 7.777580160019649e-05, + "loss": 3.4162, + "step": 10705 + }, + { + "epoch": 0.61, + "learning_rate": 7.767817708924038e-05, + "loss": 3.4464, + "step": 10710 + }, + { + "epoch": 0.61, + "learning_rate": 7.758057496265839e-05, + "loss": 3.5447, + "step": 10715 + }, + { + "epoch": 0.62, + "learning_rate": 7.748299531832609e-05, + "loss": 3.5309, + "step": 10720 + }, + { + "epoch": 0.62, + "learning_rate": 7.738543825409652e-05, + "loss": 3.4894, + "step": 10725 + }, + { + "epoch": 0.62, + "learning_rate": 7.728790386780025e-05, + "loss": 3.4639, + "step": 10730 + }, + { + "epoch": 0.62, + "learning_rate": 7.71903922572449e-05, + "loss": 3.391, + "step": 10735 + }, + { + "epoch": 0.62, + "learning_rate": 7.70929035202153e-05, + "loss": 3.4486, + "step": 10740 + }, + { + "epoch": 0.62, + "learning_rate": 7.699543775447345e-05, + "loss": 3.4808, + "step": 10745 + }, + { + "epoch": 0.62, + "learning_rate": 7.689799505775822e-05, + "loss": 3.4177, + "step": 10750 + }, + { + "epoch": 0.62, + "learning_rate": 7.68005755277853e-05, + "loss": 3.4379, + "step": 10755 + }, + { + "epoch": 0.62, + "learning_rate": 7.67031792622473e-05, + "loss": 3.4123, + "step": 10760 + }, + { + "epoch": 0.62, + "learning_rate": 7.660580635881338e-05, + "loss": 3.5498, + "step": 10765 + }, + { + "epoch": 0.62, + "learning_rate": 7.65084569151293e-05, + "loss": 3.4582, + "step": 10770 + }, + { + "epoch": 0.62, + "learning_rate": 7.641113102881726e-05, + "loss": 3.4057, + "step": 10775 + }, + { + "epoch": 0.62, + "learning_rate": 7.631382879747597e-05, + "loss": 3.4094, + "step": 10780 + }, + { + "epoch": 0.62, + "learning_rate": 7.621655031868026e-05, + "loss": 3.3613, + "step": 10785 + }, + { + "epoch": 0.62, + "learning_rate": 7.61192956899812e-05, + "loss": 3.417, + "step": 10790 + }, + { + "epoch": 0.62, + "learning_rate": 7.6022065008906e-05, + "loss": 3.4533, + "step": 10795 + }, + { + "epoch": 0.62, + "learning_rate": 7.592485837295777e-05, + "loss": 3.47, + "step": 10800 + }, + { + "epoch": 0.62, + "learning_rate": 7.582767587961552e-05, + "loss": 3.3907, + "step": 10805 + }, + { + "epoch": 0.62, + "learning_rate": 7.573051762633414e-05, + "loss": 3.3771, + "step": 10810 + }, + { + "epoch": 0.62, + "learning_rate": 7.563338371054412e-05, + "loss": 3.4655, + "step": 10815 + }, + { + "epoch": 0.62, + "learning_rate": 7.553627422965148e-05, + "loss": 3.3781, + "step": 10820 + }, + { + "epoch": 0.62, + "learning_rate": 7.543918928103795e-05, + "loss": 3.4229, + "step": 10825 + }, + { + "epoch": 0.62, + "learning_rate": 7.534212896206051e-05, + "loss": 3.4061, + "step": 10830 + }, + { + "epoch": 0.62, + "learning_rate": 7.524509337005141e-05, + "loss": 3.3877, + "step": 10835 + }, + { + "epoch": 0.62, + "learning_rate": 7.514808260231818e-05, + "loss": 3.3792, + "step": 10840 + }, + { + "epoch": 0.62, + "learning_rate": 7.505109675614346e-05, + "loss": 3.473, + "step": 10845 + }, + { + "epoch": 0.62, + "learning_rate": 7.495413592878484e-05, + "loss": 3.391, + "step": 10850 + }, + { + "epoch": 0.62, + "learning_rate": 7.485720021747486e-05, + "loss": 3.4303, + "step": 10855 + }, + { + "epoch": 0.62, + "learning_rate": 7.476028971942093e-05, + "loss": 3.4607, + "step": 10860 + }, + { + "epoch": 0.62, + "learning_rate": 7.466340453180505e-05, + "loss": 3.4583, + "step": 10865 + }, + { + "epoch": 0.62, + "learning_rate": 7.456654475178389e-05, + "loss": 3.5336, + "step": 10870 + }, + { + "epoch": 0.62, + "learning_rate": 7.446971047648873e-05, + "loss": 3.3367, + "step": 10875 + }, + { + "epoch": 0.62, + "learning_rate": 7.437290180302512e-05, + "loss": 3.3926, + "step": 10880 + }, + { + "epoch": 0.62, + "learning_rate": 7.427611882847301e-05, + "loss": 3.4691, + "step": 10885 + }, + { + "epoch": 0.62, + "learning_rate": 7.41793616498867e-05, + "loss": 3.3938, + "step": 10890 + }, + { + "epoch": 0.63, + "learning_rate": 7.40826303642944e-05, + "loss": 3.4652, + "step": 10895 + }, + { + "epoch": 0.63, + "learning_rate": 7.398592506869849e-05, + "loss": 3.4003, + "step": 10900 + }, + { + "epoch": 0.63, + "learning_rate": 7.388924586007523e-05, + "loss": 3.38, + "step": 10905 + }, + { + "epoch": 0.63, + "learning_rate": 7.379259283537479e-05, + "loss": 3.412, + "step": 10910 + }, + { + "epoch": 0.63, + "learning_rate": 7.369596609152105e-05, + "loss": 3.403, + "step": 10915 + }, + { + "epoch": 0.63, + "learning_rate": 7.359936572541142e-05, + "loss": 3.4365, + "step": 10920 + }, + { + "epoch": 0.63, + "learning_rate": 7.350279183391712e-05, + "loss": 3.4292, + "step": 10925 + }, + { + "epoch": 0.63, + "learning_rate": 7.340624451388257e-05, + "loss": 3.3731, + "step": 10930 + }, + { + "epoch": 0.63, + "learning_rate": 7.330972386212558e-05, + "loss": 3.3804, + "step": 10935 + }, + { + "epoch": 0.63, + "learning_rate": 7.321322997543743e-05, + "loss": 3.3717, + "step": 10940 + }, + { + "epoch": 0.63, + "learning_rate": 7.311676295058232e-05, + "loss": 3.3671, + "step": 10945 + }, + { + "epoch": 0.63, + "learning_rate": 7.302032288429756e-05, + "loss": 3.4532, + "step": 10950 + }, + { + "epoch": 0.63, + "learning_rate": 7.292390987329356e-05, + "loss": 3.431, + "step": 10955 + }, + { + "epoch": 0.63, + "learning_rate": 7.282752401425343e-05, + "loss": 3.4105, + "step": 10960 + }, + { + "epoch": 0.63, + "learning_rate": 7.273116540383319e-05, + "loss": 3.4186, + "step": 10965 + }, + { + "epoch": 0.63, + "learning_rate": 7.263483413866135e-05, + "loss": 3.4403, + "step": 10970 + }, + { + "epoch": 0.63, + "learning_rate": 7.253853031533928e-05, + "loss": 3.4506, + "step": 10975 + }, + { + "epoch": 0.63, + "learning_rate": 7.244225403044056e-05, + "loss": 3.4044, + "step": 10980 + }, + { + "epoch": 0.63, + "learning_rate": 7.234600538051124e-05, + "loss": 3.3981, + "step": 10985 + }, + { + "epoch": 0.63, + "learning_rate": 7.22497844620698e-05, + "loss": 3.4186, + "step": 10990 + }, + { + "epoch": 0.63, + "learning_rate": 7.215359137160673e-05, + "loss": 3.4113, + "step": 10995 + }, + { + "epoch": 0.63, + "learning_rate": 7.205742620558464e-05, + "loss": 3.4839, + "step": 11000 + }, + { + "epoch": 0.63, + "learning_rate": 7.196128906043822e-05, + "loss": 3.4613, + "step": 11005 + }, + { + "epoch": 0.63, + "learning_rate": 7.1865180032574e-05, + "loss": 3.5436, + "step": 11010 + }, + { + "epoch": 0.63, + "learning_rate": 7.176909921837033e-05, + "loss": 3.3653, + "step": 11015 + }, + { + "epoch": 0.63, + "learning_rate": 7.167304671417729e-05, + "loss": 3.4557, + "step": 11020 + }, + { + "epoch": 0.63, + "learning_rate": 7.157702261631653e-05, + "loss": 3.4844, + "step": 11025 + }, + { + "epoch": 0.63, + "learning_rate": 7.148102702108122e-05, + "loss": 3.3948, + "step": 11030 + }, + { + "epoch": 0.63, + "learning_rate": 7.138506002473591e-05, + "loss": 3.4026, + "step": 11035 + }, + { + "epoch": 0.63, + "learning_rate": 7.128912172351664e-05, + "loss": 3.5242, + "step": 11040 + }, + { + "epoch": 0.63, + "learning_rate": 7.119321221363047e-05, + "loss": 3.4407, + "step": 11045 + }, + { + "epoch": 0.63, + "learning_rate": 7.109733159125566e-05, + "loss": 3.3768, + "step": 11050 + }, + { + "epoch": 0.63, + "learning_rate": 7.100147995254156e-05, + "loss": 3.4979, + "step": 11055 + }, + { + "epoch": 0.63, + "learning_rate": 7.09056573936084e-05, + "loss": 3.4611, + "step": 11060 + }, + { + "epoch": 0.63, + "learning_rate": 7.080986401054721e-05, + "loss": 3.393, + "step": 11065 + }, + { + "epoch": 0.64, + "learning_rate": 7.071409989941989e-05, + "loss": 3.4694, + "step": 11070 + }, + { + "epoch": 0.64, + "learning_rate": 7.061836515625886e-05, + "loss": 3.4676, + "step": 11075 + }, + { + "epoch": 0.64, + "learning_rate": 7.052265987706708e-05, + "loss": 3.4752, + "step": 11080 + }, + { + "epoch": 0.64, + "learning_rate": 7.042698415781813e-05, + "loss": 3.5297, + "step": 11085 + }, + { + "epoch": 0.64, + "learning_rate": 7.033133809445577e-05, + "loss": 3.4373, + "step": 11090 + }, + { + "epoch": 0.64, + "learning_rate": 7.02357217828941e-05, + "loss": 3.3685, + "step": 11095 + }, + { + "epoch": 0.64, + "learning_rate": 7.014013531901733e-05, + "loss": 3.4258, + "step": 11100 + }, + { + "epoch": 0.64, + "learning_rate": 7.004457879867986e-05, + "loss": 3.4328, + "step": 11105 + }, + { + "epoch": 0.64, + "learning_rate": 6.994905231770593e-05, + "loss": 3.5577, + "step": 11110 + }, + { + "epoch": 0.64, + "learning_rate": 6.985355597188971e-05, + "loss": 3.4142, + "step": 11115 + }, + { + "epoch": 0.64, + "learning_rate": 6.975808985699518e-05, + "loss": 3.4338, + "step": 11120 + }, + { + "epoch": 0.64, + "learning_rate": 6.966265406875597e-05, + "loss": 3.4854, + "step": 11125 + }, + { + "epoch": 0.64, + "learning_rate": 6.956724870287524e-05, + "loss": 3.4536, + "step": 11130 + }, + { + "epoch": 0.64, + "learning_rate": 6.94718738550258e-05, + "loss": 3.2955, + "step": 11135 + }, + { + "epoch": 0.64, + "learning_rate": 6.93765296208497e-05, + "loss": 3.4547, + "step": 11140 + }, + { + "epoch": 0.64, + "learning_rate": 6.928121609595835e-05, + "loss": 3.4441, + "step": 11145 + }, + { + "epoch": 0.64, + "learning_rate": 6.918593337593238e-05, + "loss": 3.537, + "step": 11150 + }, + { + "epoch": 0.64, + "learning_rate": 6.909068155632153e-05, + "loss": 3.5565, + "step": 11155 + }, + { + "epoch": 0.64, + "learning_rate": 6.899546073264454e-05, + "loss": 3.429, + "step": 11160 + }, + { + "epoch": 0.64, + "learning_rate": 6.890027100038901e-05, + "loss": 3.4186, + "step": 11165 + }, + { + "epoch": 0.64, + "learning_rate": 6.880511245501149e-05, + "loss": 3.4329, + "step": 11170 + }, + { + "epoch": 0.64, + "learning_rate": 6.870998519193717e-05, + "loss": 3.4657, + "step": 11175 + }, + { + "epoch": 0.64, + "learning_rate": 6.861488930655979e-05, + "loss": 3.442, + "step": 11180 + }, + { + "epoch": 0.64, + "learning_rate": 6.851982489424187e-05, + "loss": 3.486, + "step": 11185 + }, + { + "epoch": 0.64, + "learning_rate": 6.842479205031411e-05, + "loss": 3.4021, + "step": 11190 + }, + { + "epoch": 0.64, + "learning_rate": 6.832979087007565e-05, + "loss": 3.4643, + "step": 11195 + }, + { + "epoch": 0.64, + "learning_rate": 6.823482144879398e-05, + "loss": 3.4441, + "step": 11200 + }, + { + "epoch": 0.64, + "learning_rate": 6.813988388170456e-05, + "loss": 3.4551, + "step": 11205 + }, + { + "epoch": 0.64, + "learning_rate": 6.804497826401105e-05, + "loss": 3.4461, + "step": 11210 + }, + { + "epoch": 0.64, + "learning_rate": 6.795010469088495e-05, + "loss": 3.4256, + "step": 11215 + }, + { + "epoch": 0.64, + "learning_rate": 6.785526325746576e-05, + "loss": 3.4731, + "step": 11220 + }, + { + "epoch": 0.64, + "learning_rate": 6.776045405886066e-05, + "loss": 3.3837, + "step": 11225 + }, + { + "epoch": 0.64, + "learning_rate": 6.766567719014449e-05, + "loss": 3.4065, + "step": 11230 + }, + { + "epoch": 0.64, + "learning_rate": 6.75709327463598e-05, + "loss": 3.4959, + "step": 11235 + }, + { + "epoch": 0.64, + "learning_rate": 6.747622082251643e-05, + "loss": 3.4477, + "step": 11240 + }, + { + "epoch": 0.65, + "learning_rate": 6.738154151359172e-05, + "loss": 3.4453, + "step": 11245 + }, + { + "epoch": 0.65, + "learning_rate": 6.728689491453039e-05, + "loss": 3.4632, + "step": 11250 + }, + { + "epoch": 0.65, + "learning_rate": 6.719228112024417e-05, + "loss": 3.4852, + "step": 11255 + }, + { + "epoch": 0.65, + "learning_rate": 6.709770022561198e-05, + "loss": 3.4135, + "step": 11260 + }, + { + "epoch": 0.65, + "learning_rate": 6.700315232547981e-05, + "loss": 3.4759, + "step": 11265 + }, + { + "epoch": 0.65, + "learning_rate": 6.690863751466048e-05, + "loss": 3.4847, + "step": 11270 + }, + { + "epoch": 0.65, + "learning_rate": 6.681415588793367e-05, + "loss": 3.4493, + "step": 11275 + }, + { + "epoch": 0.65, + "learning_rate": 6.67197075400457e-05, + "loss": 3.4629, + "step": 11280 + }, + { + "epoch": 0.65, + "learning_rate": 6.662529256570969e-05, + "loss": 3.4686, + "step": 11285 + }, + { + "epoch": 0.65, + "learning_rate": 6.653091105960512e-05, + "loss": 3.445, + "step": 11290 + }, + { + "epoch": 0.65, + "learning_rate": 6.643656311637796e-05, + "loss": 3.4797, + "step": 11295 + }, + { + "epoch": 0.65, + "learning_rate": 6.634224883064059e-05, + "loss": 3.4727, + "step": 11300 + }, + { + "epoch": 0.65, + "learning_rate": 6.624796829697158e-05, + "loss": 3.3793, + "step": 11305 + }, + { + "epoch": 0.65, + "learning_rate": 6.615372160991561e-05, + "loss": 3.4989, + "step": 11310 + }, + { + "epoch": 0.65, + "learning_rate": 6.605950886398353e-05, + "loss": 3.4154, + "step": 11315 + }, + { + "epoch": 0.65, + "learning_rate": 6.596533015365207e-05, + "loss": 3.4803, + "step": 11320 + }, + { + "epoch": 0.65, + "learning_rate": 6.587118557336382e-05, + "loss": 3.5228, + "step": 11325 + }, + { + "epoch": 0.65, + "learning_rate": 6.577707521752725e-05, + "loss": 3.4514, + "step": 11330 + }, + { + "epoch": 0.65, + "learning_rate": 6.56829991805164e-05, + "loss": 3.51, + "step": 11335 + }, + { + "epoch": 0.65, + "learning_rate": 6.558895755667091e-05, + "loss": 3.5296, + "step": 11340 + }, + { + "epoch": 0.65, + "learning_rate": 6.549495044029592e-05, + "loss": 3.4889, + "step": 11345 + }, + { + "epoch": 0.65, + "learning_rate": 6.540097792566202e-05, + "loss": 3.4241, + "step": 11350 + }, + { + "epoch": 0.65, + "learning_rate": 6.530704010700504e-05, + "loss": 3.4034, + "step": 11355 + }, + { + "epoch": 0.65, + "learning_rate": 6.521313707852601e-05, + "loss": 3.3991, + "step": 11360 + }, + { + "epoch": 0.65, + "learning_rate": 6.511926893439115e-05, + "loss": 3.5144, + "step": 11365 + }, + { + "epoch": 0.65, + "learning_rate": 6.502543576873163e-05, + "loss": 3.4728, + "step": 11370 + }, + { + "epoch": 0.65, + "learning_rate": 6.493163767564352e-05, + "loss": 3.3865, + "step": 11375 + }, + { + "epoch": 0.65, + "learning_rate": 6.483787474918779e-05, + "loss": 3.4376, + "step": 11380 + }, + { + "epoch": 0.65, + "learning_rate": 6.474414708339013e-05, + "loss": 3.4098, + "step": 11385 + }, + { + "epoch": 0.65, + "learning_rate": 6.465045477224079e-05, + "loss": 3.4165, + "step": 11390 + }, + { + "epoch": 0.65, + "learning_rate": 6.455679790969473e-05, + "loss": 3.4513, + "step": 11395 + }, + { + "epoch": 0.65, + "learning_rate": 6.446317658967119e-05, + "loss": 3.4158, + "step": 11400 + }, + { + "epoch": 0.65, + "learning_rate": 6.436959090605383e-05, + "loss": 3.3591, + "step": 11405 + }, + { + "epoch": 0.65, + "learning_rate": 6.42760409526906e-05, + "loss": 3.4458, + "step": 11410 + }, + { + "epoch": 0.65, + "learning_rate": 6.418252682339361e-05, + "loss": 3.4662, + "step": 11415 + }, + { + "epoch": 0.66, + "learning_rate": 6.408904861193906e-05, + "loss": 3.4544, + "step": 11420 + }, + { + "epoch": 0.66, + "learning_rate": 6.399560641206706e-05, + "loss": 3.3752, + "step": 11425 + }, + { + "epoch": 0.66, + "learning_rate": 6.39022003174817e-05, + "loss": 3.4972, + "step": 11430 + }, + { + "epoch": 0.66, + "learning_rate": 6.380883042185084e-05, + "loss": 3.4263, + "step": 11435 + }, + { + "epoch": 0.66, + "learning_rate": 6.371549681880593e-05, + "loss": 3.4885, + "step": 11440 + }, + { + "epoch": 0.66, + "learning_rate": 6.362219960194223e-05, + "loss": 3.4345, + "step": 11445 + }, + { + "epoch": 0.66, + "learning_rate": 6.352893886481829e-05, + "loss": 3.3657, + "step": 11450 + }, + { + "epoch": 0.66, + "learning_rate": 6.343571470095625e-05, + "loss": 3.3585, + "step": 11455 + }, + { + "epoch": 0.66, + "learning_rate": 6.334252720384153e-05, + "loss": 3.4423, + "step": 11460 + }, + { + "epoch": 0.66, + "learning_rate": 6.32493764669227e-05, + "loss": 3.3574, + "step": 11465 + }, + { + "epoch": 0.66, + "learning_rate": 6.315626258361158e-05, + "loss": 3.3472, + "step": 11470 + }, + { + "epoch": 0.66, + "learning_rate": 6.306318564728294e-05, + "loss": 3.4906, + "step": 11475 + }, + { + "epoch": 0.66, + "learning_rate": 6.297014575127455e-05, + "loss": 3.4221, + "step": 11480 + }, + { + "epoch": 0.66, + "learning_rate": 6.287714298888709e-05, + "loss": 3.4076, + "step": 11485 + }, + { + "epoch": 0.66, + "learning_rate": 6.27841774533838e-05, + "loss": 3.3956, + "step": 11490 + }, + { + "epoch": 0.66, + "learning_rate": 6.26912492379909e-05, + "loss": 3.3515, + "step": 11495 + }, + { + "epoch": 0.66, + "learning_rate": 6.259835843589688e-05, + "loss": 3.3825, + "step": 11500 + }, + { + "epoch": 0.66, + "learning_rate": 6.250550514025287e-05, + "loss": 3.3199, + "step": 11505 + }, + { + "epoch": 0.66, + "learning_rate": 6.24126894441724e-05, + "loss": 3.3946, + "step": 11510 + }, + { + "epoch": 0.66, + "learning_rate": 6.231991144073126e-05, + "loss": 3.4146, + "step": 11515 + }, + { + "epoch": 0.66, + "learning_rate": 6.222717122296739e-05, + "loss": 3.3709, + "step": 11520 + }, + { + "epoch": 0.66, + "learning_rate": 6.213446888388093e-05, + "loss": 3.2866, + "step": 11525 + }, + { + "epoch": 0.66, + "learning_rate": 6.204180451643399e-05, + "loss": 3.3664, + "step": 11530 + }, + { + "epoch": 0.66, + "learning_rate": 6.194917821355062e-05, + "loss": 3.4697, + "step": 11535 + }, + { + "epoch": 0.66, + "learning_rate": 6.18565900681166e-05, + "loss": 3.4193, + "step": 11540 + }, + { + "epoch": 0.66, + "learning_rate": 6.176404017297965e-05, + "loss": 3.4256, + "step": 11545 + }, + { + "epoch": 0.66, + "learning_rate": 6.167152862094893e-05, + "loss": 3.4797, + "step": 11550 + }, + { + "epoch": 0.66, + "learning_rate": 6.157905550479525e-05, + "loss": 3.4303, + "step": 11555 + }, + { + "epoch": 0.66, + "learning_rate": 6.148662091725087e-05, + "loss": 3.3707, + "step": 11560 + }, + { + "epoch": 0.66, + "learning_rate": 6.139422495100939e-05, + "loss": 3.4022, + "step": 11565 + }, + { + "epoch": 0.66, + "learning_rate": 6.13018676987257e-05, + "loss": 3.4739, + "step": 11570 + }, + { + "epoch": 0.66, + "learning_rate": 6.120954925301587e-05, + "loss": 3.4261, + "step": 11575 + }, + { + "epoch": 0.66, + "learning_rate": 6.111726970645703e-05, + "loss": 3.4282, + "step": 11580 + }, + { + "epoch": 0.66, + "learning_rate": 6.102502915158733e-05, + "loss": 3.2973, + "step": 11585 + }, + { + "epoch": 0.66, + "learning_rate": 6.093282768090574e-05, + "loss": 3.4744, + "step": 11590 + }, + { + "epoch": 0.67, + "learning_rate": 6.084066538687222e-05, + "loss": 3.4135, + "step": 11595 + }, + { + "epoch": 0.67, + "learning_rate": 6.074854236190723e-05, + "loss": 3.4237, + "step": 11600 + }, + { + "epoch": 0.67, + "learning_rate": 6.065645869839196e-05, + "loss": 3.4466, + "step": 11605 + }, + { + "epoch": 0.67, + "learning_rate": 6.0564414488668165e-05, + "loss": 3.475, + "step": 11610 + }, + { + "epoch": 0.67, + "learning_rate": 6.0472409825037926e-05, + "loss": 3.3962, + "step": 11615 + }, + { + "epoch": 0.67, + "learning_rate": 6.038044479976375e-05, + "loss": 3.2949, + "step": 11620 + }, + { + "epoch": 0.67, + "learning_rate": 6.0288519505068375e-05, + "loss": 3.4998, + "step": 11625 + }, + { + "epoch": 0.67, + "learning_rate": 6.01966340331347e-05, + "loss": 3.4259, + "step": 11630 + }, + { + "epoch": 0.67, + "learning_rate": 6.010478847610565e-05, + "loss": 3.4416, + "step": 11635 + }, + { + "epoch": 0.67, + "learning_rate": 6.0012982926084195e-05, + "loss": 3.4436, + "step": 11640 + }, + { + "epoch": 0.67, + "learning_rate": 5.992121747513315e-05, + "loss": 3.4195, + "step": 11645 + }, + { + "epoch": 0.67, + "learning_rate": 5.982949221527506e-05, + "loss": 3.4703, + "step": 11650 + }, + { + "epoch": 0.67, + "learning_rate": 5.973780723849225e-05, + "loss": 3.4013, + "step": 11655 + }, + { + "epoch": 0.67, + "learning_rate": 5.9646162636726634e-05, + "loss": 3.4248, + "step": 11660 + }, + { + "epoch": 0.67, + "learning_rate": 5.955455850187962e-05, + "loss": 3.3009, + "step": 11665 + }, + { + "epoch": 0.67, + "learning_rate": 5.946299492581201e-05, + "loss": 3.4256, + "step": 11670 + }, + { + "epoch": 0.67, + "learning_rate": 5.9371472000344006e-05, + "loss": 3.392, + "step": 11675 + }, + { + "epoch": 0.67, + "learning_rate": 5.9279989817255e-05, + "loss": 3.4602, + "step": 11680 + }, + { + "epoch": 0.67, + "learning_rate": 5.9188548468283475e-05, + "loss": 3.371, + "step": 11685 + }, + { + "epoch": 0.67, + "learning_rate": 5.9097148045127095e-05, + "loss": 3.4414, + "step": 11690 + }, + { + "epoch": 0.67, + "learning_rate": 5.9005788639442394e-05, + "loss": 3.3925, + "step": 11695 + }, + { + "epoch": 0.67, + "learning_rate": 5.8914470342844694e-05, + "loss": 3.4386, + "step": 11700 + }, + { + "epoch": 0.67, + "learning_rate": 5.8823193246908346e-05, + "loss": 3.3821, + "step": 11705 + }, + { + "epoch": 0.67, + "learning_rate": 5.873195744316611e-05, + "loss": 3.4906, + "step": 11710 + }, + { + "epoch": 0.67, + "learning_rate": 5.86407630231095e-05, + "loss": 3.3811, + "step": 11715 + }, + { + "epoch": 0.67, + "learning_rate": 5.8549610078188446e-05, + "loss": 3.5176, + "step": 11720 + }, + { + "epoch": 0.67, + "learning_rate": 5.845849869981137e-05, + "loss": 3.4476, + "step": 11725 + }, + { + "epoch": 0.67, + "learning_rate": 5.836742897934497e-05, + "loss": 3.4053, + "step": 11730 + }, + { + "epoch": 0.67, + "learning_rate": 5.827640100811409e-05, + "loss": 3.466, + "step": 11735 + }, + { + "epoch": 0.67, + "learning_rate": 5.8185414877401876e-05, + "loss": 3.4372, + "step": 11740 + }, + { + "epoch": 0.67, + "learning_rate": 5.80944706784494e-05, + "loss": 3.3834, + "step": 11745 + }, + { + "epoch": 0.67, + "learning_rate": 5.8003568502455676e-05, + "loss": 3.3965, + "step": 11750 + }, + { + "epoch": 0.67, + "learning_rate": 5.7912708440577635e-05, + "loss": 3.397, + "step": 11755 + }, + { + "epoch": 0.67, + "learning_rate": 5.782189058392995e-05, + "loss": 3.5251, + "step": 11760 + }, + { + "epoch": 0.68, + "learning_rate": 5.773111502358492e-05, + "loss": 3.5267, + "step": 11765 + }, + { + "epoch": 0.68, + "learning_rate": 5.764038185057259e-05, + "loss": 3.3989, + "step": 11770 + }, + { + "epoch": 0.68, + "learning_rate": 5.754969115588034e-05, + "loss": 3.4337, + "step": 11775 + }, + { + "epoch": 0.68, + "learning_rate": 5.7459043030452966e-05, + "loss": 3.469, + "step": 11780 + }, + { + "epoch": 0.68, + "learning_rate": 5.736843756519259e-05, + "loss": 3.4211, + "step": 11785 + }, + { + "epoch": 0.68, + "learning_rate": 5.727787485095866e-05, + "loss": 3.4247, + "step": 11790 + }, + { + "epoch": 0.68, + "learning_rate": 5.718735497856762e-05, + "loss": 3.4502, + "step": 11795 + }, + { + "epoch": 0.68, + "learning_rate": 5.709687803879301e-05, + "loss": 3.4649, + "step": 11800 + }, + { + "epoch": 0.68, + "learning_rate": 5.700644412236531e-05, + "loss": 3.385, + "step": 11805 + }, + { + "epoch": 0.68, + "learning_rate": 5.691605331997185e-05, + "loss": 3.4822, + "step": 11810 + }, + { + "epoch": 0.68, + "learning_rate": 5.682570572225671e-05, + "loss": 3.4928, + "step": 11815 + }, + { + "epoch": 0.68, + "learning_rate": 5.67354014198207e-05, + "loss": 3.4464, + "step": 11820 + }, + { + "epoch": 0.68, + "learning_rate": 5.664514050322122e-05, + "loss": 3.3819, + "step": 11825 + }, + { + "epoch": 0.68, + "learning_rate": 5.6554923062971966e-05, + "loss": 3.3406, + "step": 11830 + }, + { + "epoch": 0.68, + "learning_rate": 5.646474918954334e-05, + "loss": 3.4917, + "step": 11835 + }, + { + "epoch": 0.68, + "learning_rate": 5.637461897336185e-05, + "loss": 3.5118, + "step": 11840 + }, + { + "epoch": 0.68, + "learning_rate": 5.628453250481026e-05, + "loss": 3.367, + "step": 11845 + }, + { + "epoch": 0.68, + "learning_rate": 5.6194489874227504e-05, + "loss": 3.4495, + "step": 11850 + }, + { + "epoch": 0.68, + "learning_rate": 5.610449117190855e-05, + "loss": 3.5048, + "step": 11855 + }, + { + "epoch": 0.68, + "learning_rate": 5.601453648810426e-05, + "loss": 3.4683, + "step": 11860 + }, + { + "epoch": 0.68, + "learning_rate": 5.5924625913021386e-05, + "loss": 3.4911, + "step": 11865 + }, + { + "epoch": 0.68, + "learning_rate": 5.583475953682251e-05, + "loss": 3.4188, + "step": 11870 + }, + { + "epoch": 0.68, + "learning_rate": 5.5744937449625854e-05, + "loss": 3.4387, + "step": 11875 + }, + { + "epoch": 0.68, + "learning_rate": 5.565515974150508e-05, + "loss": 3.4269, + "step": 11880 + }, + { + "epoch": 0.68, + "learning_rate": 5.556542650248959e-05, + "loss": 3.4387, + "step": 11885 + }, + { + "epoch": 0.68, + "learning_rate": 5.547573782256403e-05, + "loss": 3.403, + "step": 11890 + }, + { + "epoch": 0.68, + "learning_rate": 5.538609379166845e-05, + "loss": 3.4218, + "step": 11895 + }, + { + "epoch": 0.68, + "learning_rate": 5.529649449969804e-05, + "loss": 3.4166, + "step": 11900 + }, + { + "epoch": 0.68, + "learning_rate": 5.5206940036503194e-05, + "loss": 3.3965, + "step": 11905 + }, + { + "epoch": 0.68, + "learning_rate": 5.511743049188931e-05, + "loss": 3.4845, + "step": 11910 + }, + { + "epoch": 0.68, + "learning_rate": 5.5027965955616743e-05, + "loss": 3.4237, + "step": 11915 + }, + { + "epoch": 0.68, + "learning_rate": 5.49385465174008e-05, + "loss": 3.4743, + "step": 11920 + }, + { + "epoch": 0.68, + "learning_rate": 5.48491722669115e-05, + "loss": 3.4059, + "step": 11925 + }, + { + "epoch": 0.68, + "learning_rate": 5.47598432937734e-05, + "loss": 3.3823, + "step": 11930 + }, + { + "epoch": 0.68, + "learning_rate": 5.467055968756595e-05, + "loss": 3.3649, + "step": 11935 + }, + { + "epoch": 0.69, + "learning_rate": 5.4581321537822875e-05, + "loss": 3.414, + "step": 11940 + }, + { + "epoch": 0.69, + "learning_rate": 5.4492128934032416e-05, + "loss": 3.4893, + "step": 11945 + }, + { + "epoch": 0.69, + "learning_rate": 5.440298196563711e-05, + "loss": 3.504, + "step": 11950 + }, + { + "epoch": 0.69, + "learning_rate": 5.431388072203373e-05, + "loss": 3.3406, + "step": 11955 + }, + { + "epoch": 0.69, + "learning_rate": 5.4224825292573154e-05, + "loss": 3.4897, + "step": 11960 + }, + { + "epoch": 0.69, + "learning_rate": 5.4135815766560486e-05, + "loss": 3.4618, + "step": 11965 + }, + { + "epoch": 0.69, + "learning_rate": 5.40468522332546e-05, + "loss": 3.4703, + "step": 11970 + }, + { + "epoch": 0.69, + "learning_rate": 5.395793478186838e-05, + "loss": 3.3449, + "step": 11975 + }, + { + "epoch": 0.69, + "learning_rate": 5.386906350156833e-05, + "loss": 3.3473, + "step": 11980 + }, + { + "epoch": 0.69, + "learning_rate": 5.378023848147487e-05, + "loss": 3.42, + "step": 11985 + }, + { + "epoch": 0.69, + "learning_rate": 5.36914598106619e-05, + "loss": 3.3635, + "step": 11990 + }, + { + "epoch": 0.69, + "learning_rate": 5.3602727578156895e-05, + "loss": 3.3623, + "step": 11995 + }, + { + "epoch": 0.69, + "learning_rate": 5.35140418729407e-05, + "loss": 3.4905, + "step": 12000 + }, + { + "epoch": 0.69, + "learning_rate": 5.3425402783947564e-05, + "loss": 3.5037, + "step": 12005 + }, + { + "epoch": 0.69, + "learning_rate": 5.3336810400064904e-05, + "loss": 3.4495, + "step": 12010 + }, + { + "epoch": 0.69, + "learning_rate": 5.324826481013345e-05, + "loss": 3.3432, + "step": 12015 + }, + { + "epoch": 0.69, + "learning_rate": 5.315976610294689e-05, + "loss": 3.4057, + "step": 12020 + }, + { + "epoch": 0.69, + "learning_rate": 5.307131436725191e-05, + "loss": 3.3668, + "step": 12025 + }, + { + "epoch": 0.69, + "learning_rate": 5.298290969174812e-05, + "loss": 3.4558, + "step": 12030 + }, + { + "epoch": 0.69, + "learning_rate": 5.2894552165087916e-05, + "loss": 3.3819, + "step": 12035 + }, + { + "epoch": 0.69, + "learning_rate": 5.2806241875876426e-05, + "loss": 3.3791, + "step": 12040 + }, + { + "epoch": 0.69, + "learning_rate": 5.271797891267142e-05, + "loss": 3.33, + "step": 12045 + }, + { + "epoch": 0.69, + "learning_rate": 5.262976336398318e-05, + "loss": 3.3616, + "step": 12050 + }, + { + "epoch": 0.69, + "learning_rate": 5.254159531827445e-05, + "loss": 3.3364, + "step": 12055 + }, + { + "epoch": 0.69, + "learning_rate": 5.245347486396033e-05, + "loss": 3.5023, + "step": 12060 + }, + { + "epoch": 0.69, + "learning_rate": 5.236540208940827e-05, + "loss": 3.3692, + "step": 12065 + }, + { + "epoch": 0.69, + "learning_rate": 5.2277377082937806e-05, + "loss": 3.425, + "step": 12070 + }, + { + "epoch": 0.69, + "learning_rate": 5.2189399932820616e-05, + "loss": 3.4607, + "step": 12075 + }, + { + "epoch": 0.69, + "learning_rate": 5.210147072728038e-05, + "loss": 3.329, + "step": 12080 + }, + { + "epoch": 0.69, + "learning_rate": 5.2013589554492714e-05, + "loss": 3.3323, + "step": 12085 + }, + { + "epoch": 0.69, + "learning_rate": 5.192575650258503e-05, + "loss": 3.4905, + "step": 12090 + }, + { + "epoch": 0.69, + "learning_rate": 5.1837971659636545e-05, + "loss": 3.4058, + "step": 12095 + }, + { + "epoch": 0.69, + "learning_rate": 5.175023511367807e-05, + "loss": 3.4603, + "step": 12100 + }, + { + "epoch": 0.69, + "learning_rate": 5.1662546952692015e-05, + "loss": 3.5054, + "step": 12105 + }, + { + "epoch": 0.69, + "learning_rate": 5.1574907264612224e-05, + "loss": 3.4549, + "step": 12110 + }, + { + "epoch": 0.7, + "learning_rate": 5.148731613732407e-05, + "loss": 3.423, + "step": 12115 + }, + { + "epoch": 0.7, + "learning_rate": 5.139977365866406e-05, + "loss": 3.3626, + "step": 12120 + }, + { + "epoch": 0.7, + "learning_rate": 5.131227991642001e-05, + "loss": 3.4474, + "step": 12125 + }, + { + "epoch": 0.7, + "learning_rate": 5.122483499833084e-05, + "loss": 3.4126, + "step": 12130 + }, + { + "epoch": 0.7, + "learning_rate": 5.1137438992086506e-05, + "loss": 3.4001, + "step": 12135 + }, + { + "epoch": 0.7, + "learning_rate": 5.1050091985327884e-05, + "loss": 3.462, + "step": 12140 + }, + { + "epoch": 0.7, + "learning_rate": 5.096279406564686e-05, + "loss": 3.3863, + "step": 12145 + }, + { + "epoch": 0.7, + "learning_rate": 5.087554532058586e-05, + "loss": 3.385, + "step": 12150 + }, + { + "epoch": 0.7, + "learning_rate": 5.078834583763817e-05, + "loss": 3.4652, + "step": 12155 + }, + { + "epoch": 0.7, + "learning_rate": 5.0701195704247595e-05, + "loss": 3.3696, + "step": 12160 + }, + { + "epoch": 0.7, + "learning_rate": 5.061409500780854e-05, + "loss": 3.4675, + "step": 12165 + }, + { + "epoch": 0.7, + "learning_rate": 5.052704383566577e-05, + "loss": 3.4232, + "step": 12170 + }, + { + "epoch": 0.7, + "learning_rate": 5.044004227511436e-05, + "loss": 3.4355, + "step": 12175 + }, + { + "epoch": 0.7, + "learning_rate": 5.0353090413399705e-05, + "loss": 3.3166, + "step": 12180 + }, + { + "epoch": 0.7, + "learning_rate": 5.02661883377173e-05, + "loss": 3.4568, + "step": 12185 + }, + { + "epoch": 0.7, + "learning_rate": 5.017933613521273e-05, + "loss": 3.3627, + "step": 12190 + }, + { + "epoch": 0.7, + "learning_rate": 5.009253389298165e-05, + "loss": 3.4372, + "step": 12195 + }, + { + "epoch": 0.7, + "learning_rate": 5.0005781698069474e-05, + "loss": 3.4063, + "step": 12200 + }, + { + "epoch": 0.7, + "learning_rate": 4.991907963747148e-05, + "loss": 3.3947, + "step": 12205 + }, + { + "epoch": 0.7, + "learning_rate": 4.983242779813276e-05, + "loss": 3.4777, + "step": 12210 + }, + { + "epoch": 0.7, + "learning_rate": 4.9745826266947934e-05, + "loss": 3.4502, + "step": 12215 + }, + { + "epoch": 0.7, + "learning_rate": 4.965927513076123e-05, + "loss": 3.4368, + "step": 12220 + }, + { + "epoch": 0.7, + "learning_rate": 4.957277447636629e-05, + "loss": 3.4979, + "step": 12225 + }, + { + "epoch": 0.7, + "learning_rate": 4.94863243905062e-05, + "loss": 3.4374, + "step": 12230 + }, + { + "epoch": 0.7, + "learning_rate": 4.939992495987327e-05, + "loss": 3.2904, + "step": 12235 + }, + { + "epoch": 0.7, + "learning_rate": 4.931357627110902e-05, + "loss": 3.3877, + "step": 12240 + }, + { + "epoch": 0.7, + "learning_rate": 4.9227278410804225e-05, + "loss": 3.4203, + "step": 12245 + }, + { + "epoch": 0.7, + "learning_rate": 4.914103146549844e-05, + "loss": 3.4669, + "step": 12250 + }, + { + "epoch": 0.7, + "learning_rate": 4.905483552168032e-05, + "loss": 3.4255, + "step": 12255 + }, + { + "epoch": 0.7, + "learning_rate": 4.896869066578741e-05, + "loss": 3.4052, + "step": 12260 + }, + { + "epoch": 0.7, + "learning_rate": 4.888259698420594e-05, + "loss": 3.2722, + "step": 12265 + }, + { + "epoch": 0.7, + "learning_rate": 4.879655456327083e-05, + "loss": 3.3265, + "step": 12270 + }, + { + "epoch": 0.7, + "learning_rate": 4.8710563489265624e-05, + "loss": 3.4165, + "step": 12275 + }, + { + "epoch": 0.7, + "learning_rate": 4.862462384842237e-05, + "loss": 3.434, + "step": 12280 + }, + { + "epoch": 0.7, + "learning_rate": 4.853873572692151e-05, + "loss": 3.4655, + "step": 12285 + }, + { + "epoch": 0.71, + "learning_rate": 4.845289921089182e-05, + "loss": 3.4223, + "step": 12290 + }, + { + "epoch": 0.71, + "learning_rate": 4.8367114386410486e-05, + "loss": 3.4676, + "step": 12295 + }, + { + "epoch": 0.71, + "learning_rate": 4.8281381339502565e-05, + "loss": 3.4124, + "step": 12300 + }, + { + "epoch": 0.71, + "learning_rate": 4.8195700156141386e-05, + "loss": 3.3337, + "step": 12305 + }, + { + "epoch": 0.71, + "learning_rate": 4.8110070922248284e-05, + "loss": 3.3714, + "step": 12310 + }, + { + "epoch": 0.71, + "learning_rate": 4.802449372369242e-05, + "loss": 3.3965, + "step": 12315 + }, + { + "epoch": 0.71, + "learning_rate": 4.79389686462908e-05, + "loss": 3.3495, + "step": 12320 + }, + { + "epoch": 0.71, + "learning_rate": 4.785349577580817e-05, + "loss": 3.4524, + "step": 12325 + }, + { + "epoch": 0.71, + "learning_rate": 4.77680751979569e-05, + "loss": 3.4715, + "step": 12330 + }, + { + "epoch": 0.71, + "learning_rate": 4.768270699839691e-05, + "loss": 3.3759, + "step": 12335 + }, + { + "epoch": 0.71, + "learning_rate": 4.759739126273569e-05, + "loss": 3.4551, + "step": 12340 + }, + { + "epoch": 0.71, + "learning_rate": 4.751212807652806e-05, + "loss": 3.4238, + "step": 12345 + }, + { + "epoch": 0.71, + "learning_rate": 4.742691752527606e-05, + "loss": 3.447, + "step": 12350 + }, + { + "epoch": 0.71, + "learning_rate": 4.7341759694429014e-05, + "loss": 3.4534, + "step": 12355 + }, + { + "epoch": 0.71, + "learning_rate": 4.725665466938346e-05, + "loss": 3.3696, + "step": 12360 + }, + { + "epoch": 0.71, + "learning_rate": 4.717160253548287e-05, + "loss": 3.4005, + "step": 12365 + }, + { + "epoch": 0.71, + "learning_rate": 4.708660337801773e-05, + "loss": 3.4423, + "step": 12370 + }, + { + "epoch": 0.71, + "learning_rate": 4.700165728222538e-05, + "loss": 3.4279, + "step": 12375 + }, + { + "epoch": 0.71, + "learning_rate": 4.6916764333289934e-05, + "loss": 3.3313, + "step": 12380 + }, + { + "epoch": 0.71, + "learning_rate": 4.6831924616342217e-05, + "loss": 3.4458, + "step": 12385 + }, + { + "epoch": 0.71, + "learning_rate": 4.674713821645975e-05, + "loss": 3.4747, + "step": 12390 + }, + { + "epoch": 0.71, + "learning_rate": 4.6662405218666525e-05, + "loss": 3.3762, + "step": 12395 + }, + { + "epoch": 0.71, + "learning_rate": 4.657772570793289e-05, + "loss": 3.4377, + "step": 12400 + }, + { + "epoch": 0.71, + "learning_rate": 4.649309976917574e-05, + "loss": 3.4851, + "step": 12405 + }, + { + "epoch": 0.71, + "learning_rate": 4.6408527487258124e-05, + "loss": 3.3974, + "step": 12410 + }, + { + "epoch": 0.71, + "learning_rate": 4.6324008946989314e-05, + "loss": 3.459, + "step": 12415 + }, + { + "epoch": 0.71, + "learning_rate": 4.62395442331247e-05, + "loss": 3.4655, + "step": 12420 + }, + { + "epoch": 0.71, + "learning_rate": 4.615513343036567e-05, + "loss": 3.4901, + "step": 12425 + }, + { + "epoch": 0.71, + "learning_rate": 4.607077662335959e-05, + "loss": 3.4094, + "step": 12430 + }, + { + "epoch": 0.71, + "learning_rate": 4.59864738966996e-05, + "loss": 3.3699, + "step": 12435 + }, + { + "epoch": 0.71, + "learning_rate": 4.590222533492473e-05, + "loss": 3.4407, + "step": 12440 + }, + { + "epoch": 0.71, + "learning_rate": 4.581803102251966e-05, + "loss": 3.4523, + "step": 12445 + }, + { + "epoch": 0.71, + "learning_rate": 4.573389104391449e-05, + "loss": 3.4302, + "step": 12450 + }, + { + "epoch": 0.71, + "learning_rate": 4.564980548348511e-05, + "loss": 3.4404, + "step": 12455 + }, + { + "epoch": 0.71, + "learning_rate": 4.556577442555265e-05, + "loss": 3.4195, + "step": 12460 + }, + { + "epoch": 0.72, + "learning_rate": 4.5481797954383674e-05, + "loss": 3.5431, + "step": 12465 + }, + { + "epoch": 0.72, + "learning_rate": 4.5397876154189956e-05, + "loss": 3.3908, + "step": 12470 + }, + { + "epoch": 0.72, + "learning_rate": 4.5314009109128464e-05, + "loss": 3.3795, + "step": 12475 + }, + { + "epoch": 0.72, + "learning_rate": 4.5230196903301266e-05, + "loss": 3.4046, + "step": 12480 + }, + { + "epoch": 0.72, + "learning_rate": 4.51464396207554e-05, + "loss": 3.439, + "step": 12485 + }, + { + "epoch": 0.72, + "learning_rate": 4.506273734548292e-05, + "loss": 3.3901, + "step": 12490 + }, + { + "epoch": 0.72, + "learning_rate": 4.4979090161420645e-05, + "loss": 3.4594, + "step": 12495 + }, + { + "epoch": 0.72, + "learning_rate": 4.489549815245008e-05, + "loss": 3.4072, + "step": 12500 + }, + { + "epoch": 0.72, + "learning_rate": 4.4811961402397554e-05, + "loss": 3.4482, + "step": 12505 + }, + { + "epoch": 0.72, + "learning_rate": 4.472847999503389e-05, + "loss": 3.3897, + "step": 12510 + }, + { + "epoch": 0.72, + "learning_rate": 4.4645054014074426e-05, + "loss": 3.3642, + "step": 12515 + }, + { + "epoch": 0.72, + "learning_rate": 4.456168354317892e-05, + "loss": 3.4536, + "step": 12520 + }, + { + "epoch": 0.72, + "learning_rate": 4.4478368665951476e-05, + "loss": 3.4515, + "step": 12525 + }, + { + "epoch": 0.72, + "learning_rate": 4.43951094659404e-05, + "loss": 3.4916, + "step": 12530 + }, + { + "epoch": 0.72, + "learning_rate": 4.431190602663827e-05, + "loss": 3.4085, + "step": 12535 + }, + { + "epoch": 0.72, + "learning_rate": 4.422875843148165e-05, + "loss": 3.4196, + "step": 12540 + }, + { + "epoch": 0.72, + "learning_rate": 4.414566676385118e-05, + "loss": 3.5058, + "step": 12545 + }, + { + "epoch": 0.72, + "learning_rate": 4.406263110707125e-05, + "loss": 3.5372, + "step": 12550 + }, + { + "epoch": 0.72, + "learning_rate": 4.39796515444103e-05, + "loss": 3.4284, + "step": 12555 + }, + { + "epoch": 0.72, + "learning_rate": 4.3896728159080424e-05, + "loss": 3.3515, + "step": 12560 + }, + { + "epoch": 0.72, + "learning_rate": 4.381386103423735e-05, + "loss": 3.3939, + "step": 12565 + }, + { + "epoch": 0.72, + "learning_rate": 4.373105025298041e-05, + "loss": 3.4603, + "step": 12570 + }, + { + "epoch": 0.72, + "learning_rate": 4.364829589835245e-05, + "loss": 3.4805, + "step": 12575 + }, + { + "epoch": 0.72, + "learning_rate": 4.356559805333971e-05, + "loss": 3.3778, + "step": 12580 + }, + { + "epoch": 0.72, + "learning_rate": 4.348295680087181e-05, + "loss": 3.4858, + "step": 12585 + }, + { + "epoch": 0.72, + "learning_rate": 4.340037222382156e-05, + "loss": 3.5436, + "step": 12590 + }, + { + "epoch": 0.72, + "learning_rate": 4.3317844405005e-05, + "loss": 3.4001, + "step": 12595 + }, + { + "epoch": 0.72, + "learning_rate": 4.323537342718111e-05, + "loss": 3.4307, + "step": 12600 + }, + { + "epoch": 0.72, + "learning_rate": 4.315295937305207e-05, + "loss": 3.4018, + "step": 12605 + }, + { + "epoch": 0.72, + "learning_rate": 4.307060232526283e-05, + "loss": 3.4222, + "step": 12610 + }, + { + "epoch": 0.72, + "learning_rate": 4.2988302366401254e-05, + "loss": 3.3999, + "step": 12615 + }, + { + "epoch": 0.72, + "learning_rate": 4.2906059578997896e-05, + "loss": 3.4324, + "step": 12620 + }, + { + "epoch": 0.72, + "learning_rate": 4.2823874045526026e-05, + "loss": 3.5599, + "step": 12625 + }, + { + "epoch": 0.72, + "learning_rate": 4.274174584840143e-05, + "loss": 3.4724, + "step": 12630 + }, + { + "epoch": 0.72, + "learning_rate": 4.265967506998253e-05, + "loss": 3.4982, + "step": 12635 + }, + { + "epoch": 0.73, + "learning_rate": 4.257766179257005e-05, + "loss": 3.4577, + "step": 12640 + }, + { + "epoch": 0.73, + "learning_rate": 4.2495706098407085e-05, + "loss": 3.5103, + "step": 12645 + }, + { + "epoch": 0.73, + "learning_rate": 4.2413808069678996e-05, + "loss": 3.4502, + "step": 12650 + }, + { + "epoch": 0.73, + "learning_rate": 4.2331967788513295e-05, + "loss": 3.5103, + "step": 12655 + }, + { + "epoch": 0.73, + "learning_rate": 4.225018533697962e-05, + "loss": 3.4122, + "step": 12660 + }, + { + "epoch": 0.73, + "learning_rate": 4.216846079708958e-05, + "loss": 3.4944, + "step": 12665 + }, + { + "epoch": 0.73, + "learning_rate": 4.2086794250796734e-05, + "loss": 3.3778, + "step": 12670 + }, + { + "epoch": 0.73, + "learning_rate": 4.2005185779996484e-05, + "loss": 3.4573, + "step": 12675 + }, + { + "epoch": 0.73, + "learning_rate": 4.1923635466525936e-05, + "loss": 3.4365, + "step": 12680 + }, + { + "epoch": 0.73, + "learning_rate": 4.1842143392164004e-05, + "loss": 3.4388, + "step": 12685 + }, + { + "epoch": 0.73, + "learning_rate": 4.17607096386311e-05, + "loss": 3.4097, + "step": 12690 + }, + { + "epoch": 0.73, + "learning_rate": 4.167933428758916e-05, + "loss": 3.4335, + "step": 12695 + }, + { + "epoch": 0.73, + "learning_rate": 4.159801742064158e-05, + "loss": 3.3369, + "step": 12700 + }, + { + "epoch": 0.73, + "learning_rate": 4.151675911933308e-05, + "loss": 3.3873, + "step": 12705 + }, + { + "epoch": 0.73, + "learning_rate": 4.143555946514964e-05, + "loss": 3.4301, + "step": 12710 + }, + { + "epoch": 0.73, + "learning_rate": 4.135441853951857e-05, + "loss": 3.4915, + "step": 12715 + }, + { + "epoch": 0.73, + "learning_rate": 4.1273336423808065e-05, + "loss": 3.4126, + "step": 12720 + }, + { + "epoch": 0.73, + "learning_rate": 4.119231319932747e-05, + "loss": 3.4103, + "step": 12725 + }, + { + "epoch": 0.73, + "learning_rate": 4.1111348947327034e-05, + "loss": 3.4326, + "step": 12730 + }, + { + "epoch": 0.73, + "learning_rate": 4.1030443748997974e-05, + "loss": 3.4294, + "step": 12735 + }, + { + "epoch": 0.73, + "learning_rate": 4.094959768547214e-05, + "loss": 3.5006, + "step": 12740 + }, + { + "epoch": 0.73, + "learning_rate": 4.086881083782216e-05, + "loss": 3.4587, + "step": 12745 + }, + { + "epoch": 0.73, + "learning_rate": 4.078808328706127e-05, + "loss": 3.4071, + "step": 12750 + }, + { + "epoch": 0.73, + "learning_rate": 4.070741511414323e-05, + "loss": 3.4122, + "step": 12755 + }, + { + "epoch": 0.73, + "learning_rate": 4.062680639996225e-05, + "loss": 3.4174, + "step": 12760 + }, + { + "epoch": 0.73, + "learning_rate": 4.054625722535301e-05, + "loss": 3.4378, + "step": 12765 + }, + { + "epoch": 0.73, + "learning_rate": 4.0465767671090304e-05, + "loss": 3.4344, + "step": 12770 + }, + { + "epoch": 0.73, + "learning_rate": 4.038533781788924e-05, + "loss": 3.4297, + "step": 12775 + }, + { + "epoch": 0.73, + "learning_rate": 4.030496774640514e-05, + "loss": 3.4925, + "step": 12780 + }, + { + "epoch": 0.73, + "learning_rate": 4.022465753723323e-05, + "loss": 3.4498, + "step": 12785 + }, + { + "epoch": 0.73, + "learning_rate": 4.014440727090879e-05, + "loss": 3.3666, + "step": 12790 + }, + { + "epoch": 0.73, + "learning_rate": 4.0064217027906945e-05, + "loss": 3.4716, + "step": 12795 + }, + { + "epoch": 0.73, + "learning_rate": 3.998408688864267e-05, + "loss": 3.428, + "step": 12800 + }, + { + "epoch": 0.73, + "learning_rate": 3.990401693347065e-05, + "loss": 3.4751, + "step": 12805 + }, + { + "epoch": 0.73, + "learning_rate": 3.982400724268516e-05, + "loss": 3.4432, + "step": 12810 + }, + { + "epoch": 0.74, + "learning_rate": 3.974405789652022e-05, + "loss": 3.4092, + "step": 12815 + }, + { + "epoch": 0.74, + "learning_rate": 3.96641689751491e-05, + "loss": 3.4849, + "step": 12820 + }, + { + "epoch": 0.74, + "learning_rate": 3.95843405586846e-05, + "loss": 3.5055, + "step": 12825 + }, + { + "epoch": 0.74, + "learning_rate": 3.950457272717889e-05, + "loss": 3.3467, + "step": 12830 + }, + { + "epoch": 0.74, + "learning_rate": 3.9424865560623305e-05, + "loss": 3.4317, + "step": 12835 + }, + { + "epoch": 0.74, + "learning_rate": 3.9345219138948365e-05, + "loss": 3.3582, + "step": 12840 + }, + { + "epoch": 0.74, + "learning_rate": 3.9265633542023684e-05, + "loss": 3.4635, + "step": 12845 + }, + { + "epoch": 0.74, + "learning_rate": 3.9186108849657885e-05, + "loss": 3.359, + "step": 12850 + }, + { + "epoch": 0.74, + "learning_rate": 3.91066451415985e-05, + "loss": 3.4359, + "step": 12855 + }, + { + "epoch": 0.74, + "learning_rate": 3.9027242497531865e-05, + "loss": 3.4464, + "step": 12860 + }, + { + "epoch": 0.74, + "learning_rate": 3.8947900997083255e-05, + "loss": 3.3677, + "step": 12865 + }, + { + "epoch": 0.74, + "learning_rate": 3.8868620719816395e-05, + "loss": 3.4575, + "step": 12870 + }, + { + "epoch": 0.74, + "learning_rate": 3.878940174523371e-05, + "loss": 3.4131, + "step": 12875 + }, + { + "epoch": 0.74, + "learning_rate": 3.8710244152776264e-05, + "loss": 3.4459, + "step": 12880 + }, + { + "epoch": 0.74, + "learning_rate": 3.8631148021823406e-05, + "loss": 3.4743, + "step": 12885 + }, + { + "epoch": 0.74, + "learning_rate": 3.8552113431692925e-05, + "loss": 3.4034, + "step": 12890 + }, + { + "epoch": 0.74, + "learning_rate": 3.847314046164089e-05, + "loss": 3.3817, + "step": 12895 + }, + { + "epoch": 0.74, + "learning_rate": 3.8394229190861567e-05, + "loss": 3.3604, + "step": 12900 + }, + { + "epoch": 0.74, + "learning_rate": 3.831537969848731e-05, + "loss": 3.5609, + "step": 12905 + }, + { + "epoch": 0.74, + "learning_rate": 3.823659206358865e-05, + "loss": 3.3017, + "step": 12910 + }, + { + "epoch": 0.74, + "learning_rate": 3.8157866365174e-05, + "loss": 3.3909, + "step": 12915 + }, + { + "epoch": 0.74, + "learning_rate": 3.807920268218961e-05, + "loss": 3.4834, + "step": 12920 + }, + { + "epoch": 0.74, + "learning_rate": 3.800060109351957e-05, + "loss": 3.3578, + "step": 12925 + }, + { + "epoch": 0.74, + "learning_rate": 3.792206167798582e-05, + "loss": 3.4263, + "step": 12930 + }, + { + "epoch": 0.74, + "learning_rate": 3.784358451434783e-05, + "loss": 3.4707, + "step": 12935 + }, + { + "epoch": 0.74, + "learning_rate": 3.776516968130266e-05, + "loss": 3.4061, + "step": 12940 + }, + { + "epoch": 0.74, + "learning_rate": 3.768681725748488e-05, + "loss": 3.3753, + "step": 12945 + }, + { + "epoch": 0.74, + "learning_rate": 3.760852732146649e-05, + "loss": 3.5493, + "step": 12950 + }, + { + "epoch": 0.74, + "learning_rate": 3.753029995175677e-05, + "loss": 3.336, + "step": 12955 + }, + { + "epoch": 0.74, + "learning_rate": 3.7452135226802385e-05, + "loss": 3.4704, + "step": 12960 + }, + { + "epoch": 0.74, + "learning_rate": 3.7374033224987084e-05, + "loss": 3.5301, + "step": 12965 + }, + { + "epoch": 0.74, + "learning_rate": 3.729599402463162e-05, + "loss": 3.4371, + "step": 12970 + }, + { + "epoch": 0.74, + "learning_rate": 3.7218017703993994e-05, + "loss": 3.3675, + "step": 12975 + }, + { + "epoch": 0.74, + "learning_rate": 3.714010434126899e-05, + "loss": 3.3916, + "step": 12980 + }, + { + "epoch": 0.75, + "learning_rate": 3.706225401458831e-05, + "loss": 3.4445, + "step": 12985 + }, + { + "epoch": 0.75, + "learning_rate": 3.6984466802020436e-05, + "loss": 3.4208, + "step": 12990 + }, + { + "epoch": 0.75, + "learning_rate": 3.690674278157056e-05, + "loss": 3.4449, + "step": 12995 + }, + { + "epoch": 0.75, + "learning_rate": 3.6829082031180496e-05, + "loss": 3.4187, + "step": 13000 + }, + { + "epoch": 0.75, + "learning_rate": 3.6751484628728594e-05, + "loss": 3.3511, + "step": 13005 + }, + { + "epoch": 0.75, + "learning_rate": 3.6673950652029766e-05, + "loss": 3.4735, + "step": 13010 + }, + { + "epoch": 0.75, + "learning_rate": 3.659648017883526e-05, + "loss": 3.5323, + "step": 13015 + }, + { + "epoch": 0.75, + "learning_rate": 3.651907328683254e-05, + "loss": 3.4325, + "step": 13020 + }, + { + "epoch": 0.75, + "learning_rate": 3.6441730053645506e-05, + "loss": 3.3847, + "step": 13025 + }, + { + "epoch": 0.75, + "learning_rate": 3.6364450556834097e-05, + "loss": 3.4291, + "step": 13030 + }, + { + "epoch": 0.75, + "learning_rate": 3.628723487389437e-05, + "loss": 3.4373, + "step": 13035 + }, + { + "epoch": 0.75, + "learning_rate": 3.621008308225837e-05, + "loss": 3.3928, + "step": 13040 + }, + { + "epoch": 0.75, + "learning_rate": 3.61329952592941e-05, + "loss": 3.3976, + "step": 13045 + }, + { + "epoch": 0.75, + "learning_rate": 3.605597148230541e-05, + "loss": 3.3787, + "step": 13050 + }, + { + "epoch": 0.75, + "learning_rate": 3.597901182853185e-05, + "loss": 3.4422, + "step": 13055 + }, + { + "epoch": 0.75, + "learning_rate": 3.590211637514884e-05, + "loss": 3.4278, + "step": 13060 + }, + { + "epoch": 0.75, + "learning_rate": 3.582528519926729e-05, + "loss": 3.4047, + "step": 13065 + }, + { + "epoch": 0.75, + "learning_rate": 3.574851837793357e-05, + "loss": 3.4911, + "step": 13070 + }, + { + "epoch": 0.75, + "learning_rate": 3.567181598812973e-05, + "loss": 3.4252, + "step": 13075 + }, + { + "epoch": 0.75, + "learning_rate": 3.559517810677308e-05, + "loss": 3.4642, + "step": 13080 + }, + { + "epoch": 0.75, + "learning_rate": 3.551860481071624e-05, + "loss": 3.3949, + "step": 13085 + }, + { + "epoch": 0.75, + "learning_rate": 3.544209617674707e-05, + "loss": 3.3708, + "step": 13090 + }, + { + "epoch": 0.75, + "learning_rate": 3.536565228158864e-05, + "loss": 3.4549, + "step": 13095 + }, + { + "epoch": 0.75, + "learning_rate": 3.528927320189903e-05, + "loss": 3.5864, + "step": 13100 + }, + { + "epoch": 0.75, + "learning_rate": 3.521295901427132e-05, + "loss": 3.3629, + "step": 13105 + }, + { + "epoch": 0.75, + "learning_rate": 3.5136709795233626e-05, + "loss": 3.4732, + "step": 13110 + }, + { + "epoch": 0.75, + "learning_rate": 3.506052562124883e-05, + "loss": 3.4384, + "step": 13115 + }, + { + "epoch": 0.75, + "learning_rate": 3.498440656871449e-05, + "loss": 3.5115, + "step": 13120 + }, + { + "epoch": 0.75, + "learning_rate": 3.4908352713963077e-05, + "loss": 3.4633, + "step": 13125 + }, + { + "epoch": 0.75, + "learning_rate": 3.483236413326151e-05, + "loss": 3.5703, + "step": 13130 + }, + { + "epoch": 0.75, + "learning_rate": 3.475644090281133e-05, + "loss": 3.5147, + "step": 13135 + }, + { + "epoch": 0.75, + "learning_rate": 3.468058309874851e-05, + "loss": 3.444, + "step": 13140 + }, + { + "epoch": 0.75, + "learning_rate": 3.460479079714343e-05, + "loss": 3.4569, + "step": 13145 + }, + { + "epoch": 0.75, + "learning_rate": 3.452906407400074e-05, + "loss": 3.4465, + "step": 13150 + }, + { + "epoch": 0.75, + "learning_rate": 3.4453403005259444e-05, + "loss": 3.3716, + "step": 13155 + }, + { + "epoch": 0.76, + "learning_rate": 3.43778076667926e-05, + "loss": 3.4576, + "step": 13160 + }, + { + "epoch": 0.76, + "learning_rate": 3.43022781344074e-05, + "loss": 3.4954, + "step": 13165 + }, + { + "epoch": 0.76, + "learning_rate": 3.4226814483844946e-05, + "loss": 3.3581, + "step": 13170 + }, + { + "epoch": 0.76, + "learning_rate": 3.4151416790780456e-05, + "loss": 3.4964, + "step": 13175 + }, + { + "epoch": 0.76, + "learning_rate": 3.4076085130822866e-05, + "loss": 3.463, + "step": 13180 + }, + { + "epoch": 0.76, + "learning_rate": 3.400081957951492e-05, + "loss": 3.4221, + "step": 13185 + }, + { + "epoch": 0.76, + "learning_rate": 3.392562021233311e-05, + "loss": 3.3691, + "step": 13190 + }, + { + "epoch": 0.76, + "learning_rate": 3.38504871046875e-05, + "loss": 3.4659, + "step": 13195 + }, + { + "epoch": 0.76, + "learning_rate": 3.3775420331921736e-05, + "loss": 3.3342, + "step": 13200 + }, + { + "epoch": 0.76, + "learning_rate": 3.3700419969312994e-05, + "loss": 3.3964, + "step": 13205 + }, + { + "epoch": 0.76, + "learning_rate": 3.362548609207177e-05, + "loss": 3.4668, + "step": 13210 + }, + { + "epoch": 0.76, + "learning_rate": 3.355061877534192e-05, + "loss": 3.3761, + "step": 13215 + }, + { + "epoch": 0.76, + "learning_rate": 3.3475818094200585e-05, + "loss": 3.466, + "step": 13220 + }, + { + "epoch": 0.76, + "learning_rate": 3.340108412365803e-05, + "loss": 3.3765, + "step": 13225 + }, + { + "epoch": 0.76, + "learning_rate": 3.332641693865766e-05, + "loss": 3.4859, + "step": 13230 + }, + { + "epoch": 0.76, + "learning_rate": 3.3251816614075884e-05, + "loss": 3.3735, + "step": 13235 + }, + { + "epoch": 0.76, + "learning_rate": 3.317728322472209e-05, + "loss": 3.3621, + "step": 13240 + }, + { + "epoch": 0.76, + "learning_rate": 3.310281684533852e-05, + "loss": 3.3748, + "step": 13245 + }, + { + "epoch": 0.76, + "learning_rate": 3.302841755060018e-05, + "loss": 3.5278, + "step": 13250 + }, + { + "epoch": 0.76, + "learning_rate": 3.2954085415114946e-05, + "loss": 3.5288, + "step": 13255 + }, + { + "epoch": 0.76, + "learning_rate": 3.2879820513423184e-05, + "loss": 3.4456, + "step": 13260 + }, + { + "epoch": 0.76, + "learning_rate": 3.2805622919997934e-05, + "loss": 3.4631, + "step": 13265 + }, + { + "epoch": 0.76, + "learning_rate": 3.273149270924468e-05, + "loss": 3.5066, + "step": 13270 + }, + { + "epoch": 0.76, + "learning_rate": 3.2657429955501394e-05, + "loss": 3.4205, + "step": 13275 + }, + { + "epoch": 0.76, + "learning_rate": 3.258343473303832e-05, + "loss": 3.4313, + "step": 13280 + }, + { + "epoch": 0.76, + "learning_rate": 3.2509507116058134e-05, + "loss": 3.3916, + "step": 13285 + }, + { + "epoch": 0.76, + "learning_rate": 3.243564717869552e-05, + "loss": 3.4521, + "step": 13290 + }, + { + "epoch": 0.76, + "learning_rate": 3.2361854995017416e-05, + "loss": 3.4749, + "step": 13295 + }, + { + "epoch": 0.76, + "learning_rate": 3.228813063902276e-05, + "loss": 3.4534, + "step": 13300 + }, + { + "epoch": 0.76, + "learning_rate": 3.2214474184642574e-05, + "loss": 3.453, + "step": 13305 + }, + { + "epoch": 0.76, + "learning_rate": 3.2140885705739674e-05, + "loss": 3.515, + "step": 13310 + }, + { + "epoch": 0.76, + "learning_rate": 3.2067365276108754e-05, + "loss": 3.3841, + "step": 13315 + }, + { + "epoch": 0.76, + "learning_rate": 3.199391296947627e-05, + "loss": 3.4814, + "step": 13320 + }, + { + "epoch": 0.76, + "learning_rate": 3.192052885950034e-05, + "loss": 3.3602, + "step": 13325 + }, + { + "epoch": 0.76, + "learning_rate": 3.1847213019770716e-05, + "loss": 3.484, + "step": 13330 + }, + { + "epoch": 0.77, + "learning_rate": 3.1773965523808754e-05, + "loss": 3.4659, + "step": 13335 + }, + { + "epoch": 0.77, + "learning_rate": 3.1700786445067135e-05, + "loss": 3.3604, + "step": 13340 + }, + { + "epoch": 0.77, + "learning_rate": 3.162767585692997e-05, + "loss": 3.4001, + "step": 13345 + }, + { + "epoch": 0.77, + "learning_rate": 3.155463383271282e-05, + "loss": 3.5798, + "step": 13350 + }, + { + "epoch": 0.77, + "learning_rate": 3.148166044566233e-05, + "loss": 3.3826, + "step": 13355 + }, + { + "epoch": 0.77, + "learning_rate": 3.14087557689564e-05, + "loss": 3.4659, + "step": 13360 + }, + { + "epoch": 0.77, + "learning_rate": 3.133591987570399e-05, + "loss": 3.4531, + "step": 13365 + }, + { + "epoch": 0.77, + "learning_rate": 3.1263152838945095e-05, + "loss": 3.4191, + "step": 13370 + }, + { + "epoch": 0.77, + "learning_rate": 3.1190454731650675e-05, + "loss": 3.3692, + "step": 13375 + }, + { + "epoch": 0.77, + "learning_rate": 3.111782562672251e-05, + "loss": 3.4698, + "step": 13380 + }, + { + "epoch": 0.77, + "learning_rate": 3.104526559699333e-05, + "loss": 3.3677, + "step": 13385 + }, + { + "epoch": 0.77, + "learning_rate": 3.0972774715226406e-05, + "loss": 3.4207, + "step": 13390 + }, + { + "epoch": 0.77, + "learning_rate": 3.090035305411575e-05, + "loss": 3.4267, + "step": 13395 + }, + { + "epoch": 0.77, + "learning_rate": 3.0828000686286027e-05, + "loss": 3.3806, + "step": 13400 + }, + { + "epoch": 0.77, + "learning_rate": 3.075571768429233e-05, + "loss": 3.3986, + "step": 13405 + }, + { + "epoch": 0.77, + "learning_rate": 3.06835041206202e-05, + "loss": 3.4173, + "step": 13410 + }, + { + "epoch": 0.77, + "learning_rate": 3.0611360067685576e-05, + "loss": 3.3747, + "step": 13415 + }, + { + "epoch": 0.77, + "learning_rate": 3.0539285597834675e-05, + "loss": 3.3857, + "step": 13420 + }, + { + "epoch": 0.77, + "learning_rate": 3.0467280783343944e-05, + "loss": 3.4542, + "step": 13425 + }, + { + "epoch": 0.77, + "learning_rate": 3.0395345696419918e-05, + "loss": 3.4552, + "step": 13430 + }, + { + "epoch": 0.77, + "learning_rate": 3.0323480409199378e-05, + "loss": 3.5176, + "step": 13435 + }, + { + "epoch": 0.77, + "learning_rate": 3.0251684993748886e-05, + "loss": 3.481, + "step": 13440 + }, + { + "epoch": 0.77, + "learning_rate": 3.017995952206506e-05, + "loss": 3.4387, + "step": 13445 + }, + { + "epoch": 0.77, + "learning_rate": 3.010830406607441e-05, + "loss": 3.4442, + "step": 13450 + }, + { + "epoch": 0.77, + "learning_rate": 3.003671869763317e-05, + "loss": 3.4148, + "step": 13455 + }, + { + "epoch": 0.77, + "learning_rate": 2.9965203488527317e-05, + "loss": 3.5284, + "step": 13460 + }, + { + "epoch": 0.77, + "learning_rate": 2.9893758510472436e-05, + "loss": 3.4433, + "step": 13465 + }, + { + "epoch": 0.77, + "learning_rate": 2.982238383511373e-05, + "loss": 3.4068, + "step": 13470 + }, + { + "epoch": 0.77, + "learning_rate": 2.975107953402585e-05, + "loss": 3.4897, + "step": 13475 + }, + { + "epoch": 0.77, + "learning_rate": 2.967984567871297e-05, + "loss": 3.4903, + "step": 13480 + }, + { + "epoch": 0.77, + "learning_rate": 2.960868234060855e-05, + "loss": 3.5319, + "step": 13485 + }, + { + "epoch": 0.77, + "learning_rate": 2.9537589591075298e-05, + "loss": 3.4193, + "step": 13490 + }, + { + "epoch": 0.77, + "learning_rate": 2.9466567501405185e-05, + "loss": 3.3527, + "step": 13495 + }, + { + "epoch": 0.77, + "learning_rate": 2.939561614281936e-05, + "loss": 3.3385, + "step": 13500 + }, + { + "epoch": 0.77, + "learning_rate": 2.9324735586468e-05, + "loss": 3.4873, + "step": 13505 + }, + { + "epoch": 0.78, + "learning_rate": 2.9253925903430267e-05, + "loss": 3.4693, + "step": 13510 + }, + { + "epoch": 0.78, + "learning_rate": 2.9183187164714288e-05, + "loss": 3.4874, + "step": 13515 + }, + { + "epoch": 0.78, + "learning_rate": 2.9112519441257e-05, + "loss": 3.4882, + "step": 13520 + }, + { + "epoch": 0.78, + "learning_rate": 2.9041922803924158e-05, + "loss": 3.385, + "step": 13525 + }, + { + "epoch": 0.78, + "learning_rate": 2.8971397323510275e-05, + "loss": 3.41, + "step": 13530 + }, + { + "epoch": 0.78, + "learning_rate": 2.890094307073845e-05, + "loss": 3.4329, + "step": 13535 + }, + { + "epoch": 0.78, + "learning_rate": 2.883056011626032e-05, + "loss": 3.4824, + "step": 13540 + }, + { + "epoch": 0.78, + "learning_rate": 2.8760248530656063e-05, + "loss": 3.4619, + "step": 13545 + }, + { + "epoch": 0.78, + "learning_rate": 2.8690008384434363e-05, + "loss": 3.4815, + "step": 13550 + }, + { + "epoch": 0.78, + "learning_rate": 2.861983974803215e-05, + "loss": 3.3843, + "step": 13555 + }, + { + "epoch": 0.78, + "learning_rate": 2.8549742691814705e-05, + "loss": 3.3387, + "step": 13560 + }, + { + "epoch": 0.78, + "learning_rate": 2.8479717286075502e-05, + "loss": 3.3733, + "step": 13565 + }, + { + "epoch": 0.78, + "learning_rate": 2.8409763601036188e-05, + "loss": 3.4172, + "step": 13570 + }, + { + "epoch": 0.78, + "learning_rate": 2.8339881706846427e-05, + "loss": 3.5058, + "step": 13575 + }, + { + "epoch": 0.78, + "learning_rate": 2.8270071673584008e-05, + "loss": 3.4697, + "step": 13580 + }, + { + "epoch": 0.78, + "learning_rate": 2.82003335712546e-05, + "loss": 3.397, + "step": 13585 + }, + { + "epoch": 0.78, + "learning_rate": 2.8130667469791626e-05, + "loss": 3.3922, + "step": 13590 + }, + { + "epoch": 0.78, + "learning_rate": 2.8061073439056507e-05, + "loss": 3.4303, + "step": 13595 + }, + { + "epoch": 0.78, + "learning_rate": 2.799155154883826e-05, + "loss": 3.3739, + "step": 13600 + }, + { + "epoch": 0.78, + "learning_rate": 2.7922101868853577e-05, + "loss": 3.3829, + "step": 13605 + }, + { + "epoch": 0.78, + "learning_rate": 2.785272446874677e-05, + "loss": 3.5141, + "step": 13610 + }, + { + "epoch": 0.78, + "learning_rate": 2.778341941808965e-05, + "loss": 3.5435, + "step": 13615 + }, + { + "epoch": 0.78, + "learning_rate": 2.771418678638147e-05, + "loss": 3.4333, + "step": 13620 + }, + { + "epoch": 0.78, + "learning_rate": 2.7645026643048855e-05, + "loss": 3.525, + "step": 13625 + }, + { + "epoch": 0.78, + "learning_rate": 2.7575939057445786e-05, + "loss": 3.4927, + "step": 13630 + }, + { + "epoch": 0.78, + "learning_rate": 2.750692409885347e-05, + "loss": 3.4071, + "step": 13635 + }, + { + "epoch": 0.78, + "learning_rate": 2.7437981836480166e-05, + "loss": 3.3944, + "step": 13640 + }, + { + "epoch": 0.78, + "learning_rate": 2.736911233946141e-05, + "loss": 3.4253, + "step": 13645 + }, + { + "epoch": 0.78, + "learning_rate": 2.730031567685968e-05, + "loss": 3.404, + "step": 13650 + }, + { + "epoch": 0.78, + "learning_rate": 2.723159191766439e-05, + "loss": 3.4327, + "step": 13655 + }, + { + "epoch": 0.78, + "learning_rate": 2.716294113079192e-05, + "loss": 3.3446, + "step": 13660 + }, + { + "epoch": 0.78, + "learning_rate": 2.7094363385085398e-05, + "loss": 3.481, + "step": 13665 + }, + { + "epoch": 0.78, + "learning_rate": 2.7025858749314758e-05, + "loss": 3.4406, + "step": 13670 + }, + { + "epoch": 0.78, + "learning_rate": 2.6957427292176572e-05, + "loss": 3.4653, + "step": 13675 + }, + { + "epoch": 0.78, + "learning_rate": 2.6889069082294114e-05, + "loss": 3.4196, + "step": 13680 + }, + { + "epoch": 0.79, + "learning_rate": 2.6820784188217164e-05, + "loss": 3.4672, + "step": 13685 + }, + { + "epoch": 0.79, + "learning_rate": 2.675257267842185e-05, + "loss": 3.4957, + "step": 13690 + }, + { + "epoch": 0.79, + "learning_rate": 2.668443462131094e-05, + "loss": 3.344, + "step": 13695 + }, + { + "epoch": 0.79, + "learning_rate": 2.6616370085213394e-05, + "loss": 3.3648, + "step": 13700 + }, + { + "epoch": 0.79, + "learning_rate": 2.6548379138384483e-05, + "loss": 3.3936, + "step": 13705 + }, + { + "epoch": 0.79, + "learning_rate": 2.648046184900568e-05, + "loss": 3.4392, + "step": 13710 + }, + { + "epoch": 0.79, + "learning_rate": 2.6412618285184587e-05, + "loss": 3.4115, + "step": 13715 + }, + { + "epoch": 0.79, + "learning_rate": 2.6344848514954856e-05, + "loss": 3.4271, + "step": 13720 + }, + { + "epoch": 0.79, + "learning_rate": 2.6277152606276234e-05, + "loss": 3.416, + "step": 13725 + }, + { + "epoch": 0.79, + "learning_rate": 2.6209530627034295e-05, + "loss": 3.4698, + "step": 13730 + }, + { + "epoch": 0.79, + "learning_rate": 2.614198264504053e-05, + "loss": 3.4536, + "step": 13735 + }, + { + "epoch": 0.79, + "learning_rate": 2.607450872803213e-05, + "loss": 3.403, + "step": 13740 + }, + { + "epoch": 0.79, + "learning_rate": 2.600710894367219e-05, + "loss": 3.4069, + "step": 13745 + }, + { + "epoch": 0.79, + "learning_rate": 2.5939783359549306e-05, + "loss": 3.4021, + "step": 13750 + }, + { + "epoch": 0.79, + "learning_rate": 2.5872532043177743e-05, + "loss": 3.3974, + "step": 13755 + }, + { + "epoch": 0.79, + "learning_rate": 2.580535506199727e-05, + "loss": 3.3361, + "step": 13760 + }, + { + "epoch": 0.79, + "learning_rate": 2.5738252483373117e-05, + "loss": 3.4486, + "step": 13765 + }, + { + "epoch": 0.79, + "learning_rate": 2.567122437459586e-05, + "loss": 3.4401, + "step": 13770 + }, + { + "epoch": 0.79, + "learning_rate": 2.5604270802881503e-05, + "loss": 3.2842, + "step": 13775 + }, + { + "epoch": 0.79, + "learning_rate": 2.5537391835371217e-05, + "loss": 3.4002, + "step": 13780 + }, + { + "epoch": 0.79, + "learning_rate": 2.5470587539131362e-05, + "loss": 3.4572, + "step": 13785 + }, + { + "epoch": 0.79, + "learning_rate": 2.5403857981153457e-05, + "loss": 3.4028, + "step": 13790 + }, + { + "epoch": 0.79, + "learning_rate": 2.5337203228354035e-05, + "loss": 3.4156, + "step": 13795 + }, + { + "epoch": 0.79, + "learning_rate": 2.527062334757464e-05, + "loss": 3.3197, + "step": 13800 + }, + { + "epoch": 0.79, + "learning_rate": 2.5204118405581724e-05, + "loss": 3.4788, + "step": 13805 + }, + { + "epoch": 0.79, + "learning_rate": 2.513768846906659e-05, + "loss": 3.4155, + "step": 13810 + }, + { + "epoch": 0.79, + "learning_rate": 2.507133360464533e-05, + "loss": 3.3215, + "step": 13815 + }, + { + "epoch": 0.79, + "learning_rate": 2.500505387885872e-05, + "loss": 3.4519, + "step": 13820 + }, + { + "epoch": 0.79, + "learning_rate": 2.493884935817228e-05, + "loss": 3.3837, + "step": 13825 + }, + { + "epoch": 0.79, + "learning_rate": 2.487272010897601e-05, + "loss": 3.3798, + "step": 13830 + }, + { + "epoch": 0.79, + "learning_rate": 2.4806666197584483e-05, + "loss": 3.4635, + "step": 13835 + }, + { + "epoch": 0.79, + "learning_rate": 2.474068769023671e-05, + "loss": 3.3723, + "step": 13840 + }, + { + "epoch": 0.79, + "learning_rate": 2.4674784653096083e-05, + "loss": 3.3135, + "step": 13845 + }, + { + "epoch": 0.79, + "learning_rate": 2.460895715225028e-05, + "loss": 3.4588, + "step": 13850 + }, + { + "epoch": 0.79, + "learning_rate": 2.4543205253711355e-05, + "loss": 3.3511, + "step": 13855 + }, + { + "epoch": 0.8, + "learning_rate": 2.447752902341538e-05, + "loss": 3.3753, + "step": 13860 + }, + { + "epoch": 0.8, + "learning_rate": 2.441192852722265e-05, + "loss": 3.395, + "step": 13865 + }, + { + "epoch": 0.8, + "learning_rate": 2.4346403830917464e-05, + "loss": 3.4534, + "step": 13870 + }, + { + "epoch": 0.8, + "learning_rate": 2.4280955000208184e-05, + "loss": 3.4165, + "step": 13875 + }, + { + "epoch": 0.8, + "learning_rate": 2.421558210072702e-05, + "loss": 3.4198, + "step": 13880 + }, + { + "epoch": 0.8, + "learning_rate": 2.4150285198030066e-05, + "loss": 3.4805, + "step": 13885 + }, + { + "epoch": 0.8, + "learning_rate": 2.4085064357597197e-05, + "loss": 3.4468, + "step": 13890 + }, + { + "epoch": 0.8, + "learning_rate": 2.4019919644832023e-05, + "loss": 3.425, + "step": 13895 + }, + { + "epoch": 0.8, + "learning_rate": 2.395485112506177e-05, + "loss": 3.3638, + "step": 13900 + }, + { + "epoch": 0.8, + "learning_rate": 2.3889858863537396e-05, + "loss": 3.5091, + "step": 13905 + }, + { + "epoch": 0.8, + "learning_rate": 2.382494292543319e-05, + "loss": 3.4622, + "step": 13910 + }, + { + "epoch": 0.8, + "learning_rate": 2.376010337584701e-05, + "loss": 3.4499, + "step": 13915 + }, + { + "epoch": 0.8, + "learning_rate": 2.369534027980015e-05, + "loss": 3.4294, + "step": 13920 + }, + { + "epoch": 0.8, + "learning_rate": 2.363065370223716e-05, + "loss": 3.3564, + "step": 13925 + }, + { + "epoch": 0.8, + "learning_rate": 2.3566043708025874e-05, + "loss": 3.3961, + "step": 13930 + }, + { + "epoch": 0.8, + "learning_rate": 2.3501510361957367e-05, + "loss": 3.4948, + "step": 13935 + }, + { + "epoch": 0.8, + "learning_rate": 2.3437053728745807e-05, + "loss": 3.4406, + "step": 13940 + }, + { + "epoch": 0.8, + "learning_rate": 2.337267387302844e-05, + "loss": 3.4303, + "step": 13945 + }, + { + "epoch": 0.8, + "learning_rate": 2.3308370859365523e-05, + "loss": 3.4901, + "step": 13950 + }, + { + "epoch": 0.8, + "learning_rate": 2.324414475224034e-05, + "loss": 3.3908, + "step": 13955 + }, + { + "epoch": 0.8, + "learning_rate": 2.317999561605888e-05, + "loss": 3.4473, + "step": 13960 + }, + { + "epoch": 0.8, + "learning_rate": 2.311592351515004e-05, + "loss": 3.3866, + "step": 13965 + }, + { + "epoch": 0.8, + "learning_rate": 2.3051928513765542e-05, + "loss": 3.4441, + "step": 13970 + }, + { + "epoch": 0.8, + "learning_rate": 2.2988010676079674e-05, + "loss": 3.4323, + "step": 13975 + }, + { + "epoch": 0.8, + "learning_rate": 2.292417006618939e-05, + "loss": 3.3951, + "step": 13980 + }, + { + "epoch": 0.8, + "learning_rate": 2.2860406748114195e-05, + "loss": 3.4342, + "step": 13985 + }, + { + "epoch": 0.8, + "learning_rate": 2.279672078579609e-05, + "loss": 3.4855, + "step": 13990 + }, + { + "epoch": 0.8, + "learning_rate": 2.2733112243099507e-05, + "loss": 3.5198, + "step": 13995 + }, + { + "epoch": 0.8, + "learning_rate": 2.2669581183811196e-05, + "loss": 3.3637, + "step": 14000 + }, + { + "epoch": 0.8, + "learning_rate": 2.2606127671640333e-05, + "loss": 3.4896, + "step": 14005 + }, + { + "epoch": 0.8, + "learning_rate": 2.254275177021816e-05, + "loss": 3.4494, + "step": 14010 + }, + { + "epoch": 0.8, + "learning_rate": 2.247945354309817e-05, + "loss": 3.3785, + "step": 14015 + }, + { + "epoch": 0.8, + "learning_rate": 2.2416233053756032e-05, + "loss": 3.3874, + "step": 14020 + }, + { + "epoch": 0.8, + "learning_rate": 2.2353090365589348e-05, + "loss": 3.355, + "step": 14025 + }, + { + "epoch": 0.8, + "learning_rate": 2.2290025541917768e-05, + "loss": 3.3877, + "step": 14030 + }, + { + "epoch": 0.81, + "learning_rate": 2.2227038645982833e-05, + "loss": 3.3963, + "step": 14035 + }, + { + "epoch": 0.81, + "learning_rate": 2.2164129740947935e-05, + "loss": 3.4826, + "step": 14040 + }, + { + "epoch": 0.81, + "learning_rate": 2.210129888989827e-05, + "loss": 3.375, + "step": 14045 + }, + { + "epoch": 0.81, + "learning_rate": 2.2038546155840735e-05, + "loss": 3.5578, + "step": 14050 + }, + { + "epoch": 0.81, + "learning_rate": 2.1975871601703977e-05, + "loss": 3.4041, + "step": 14055 + }, + { + "epoch": 0.81, + "learning_rate": 2.191327529033812e-05, + "loss": 3.4386, + "step": 14060 + }, + { + "epoch": 0.81, + "learning_rate": 2.1850757284514877e-05, + "loss": 3.4653, + "step": 14065 + }, + { + "epoch": 0.81, + "learning_rate": 2.178831764692749e-05, + "loss": 3.467, + "step": 14070 + }, + { + "epoch": 0.81, + "learning_rate": 2.1725956440190542e-05, + "loss": 3.4013, + "step": 14075 + }, + { + "epoch": 0.81, + "learning_rate": 2.1663673726840006e-05, + "loss": 3.4173, + "step": 14080 + }, + { + "epoch": 0.81, + "learning_rate": 2.160146956933311e-05, + "loss": 3.4734, + "step": 14085 + }, + { + "epoch": 0.81, + "learning_rate": 2.1539344030048337e-05, + "loss": 3.3504, + "step": 14090 + }, + { + "epoch": 0.81, + "learning_rate": 2.1477297171285282e-05, + "loss": 3.3729, + "step": 14095 + }, + { + "epoch": 0.81, + "learning_rate": 2.141532905526472e-05, + "loss": 3.482, + "step": 14100 + }, + { + "epoch": 0.81, + "learning_rate": 2.1353439744128434e-05, + "loss": 3.5459, + "step": 14105 + }, + { + "epoch": 0.81, + "learning_rate": 2.1291629299939097e-05, + "loss": 3.4397, + "step": 14110 + }, + { + "epoch": 0.81, + "learning_rate": 2.1229897784680365e-05, + "loss": 3.461, + "step": 14115 + }, + { + "epoch": 0.81, + "learning_rate": 2.116824526025679e-05, + "loss": 3.5427, + "step": 14120 + }, + { + "epoch": 0.81, + "learning_rate": 2.1106671788493636e-05, + "loss": 3.4621, + "step": 14125 + }, + { + "epoch": 0.81, + "learning_rate": 2.104517743113693e-05, + "loss": 3.3904, + "step": 14130 + }, + { + "epoch": 0.81, + "learning_rate": 2.0983762249853344e-05, + "loss": 3.4069, + "step": 14135 + }, + { + "epoch": 0.81, + "learning_rate": 2.092242630623016e-05, + "loss": 3.3439, + "step": 14140 + }, + { + "epoch": 0.81, + "learning_rate": 2.086116966177516e-05, + "loss": 3.4757, + "step": 14145 + }, + { + "epoch": 0.81, + "learning_rate": 2.079999237791672e-05, + "loss": 3.4535, + "step": 14150 + }, + { + "epoch": 0.81, + "learning_rate": 2.0738894516003536e-05, + "loss": 3.4249, + "step": 14155 + }, + { + "epoch": 0.81, + "learning_rate": 2.067787613730462e-05, + "loss": 3.4831, + "step": 14160 + }, + { + "epoch": 0.81, + "learning_rate": 2.0616937303009408e-05, + "loss": 3.3863, + "step": 14165 + }, + { + "epoch": 0.81, + "learning_rate": 2.055607807422748e-05, + "loss": 3.4013, + "step": 14170 + }, + { + "epoch": 0.81, + "learning_rate": 2.0495298511988602e-05, + "loss": 3.4698, + "step": 14175 + }, + { + "epoch": 0.81, + "learning_rate": 2.0434598677242656e-05, + "loss": 3.4213, + "step": 14180 + }, + { + "epoch": 0.81, + "learning_rate": 2.037397863085957e-05, + "loss": 3.4132, + "step": 14185 + }, + { + "epoch": 0.81, + "learning_rate": 2.0313438433629263e-05, + "loss": 3.4241, + "step": 14190 + }, + { + "epoch": 0.81, + "learning_rate": 2.0252978146261557e-05, + "loss": 3.3805, + "step": 14195 + }, + { + "epoch": 0.81, + "learning_rate": 2.0192597829386217e-05, + "loss": 3.4466, + "step": 14200 + }, + { + "epoch": 0.82, + "learning_rate": 2.0132297543552757e-05, + "loss": 3.3628, + "step": 14205 + }, + { + "epoch": 0.82, + "learning_rate": 2.0072077349230357e-05, + "loss": 3.4779, + "step": 14210 + }, + { + "epoch": 0.82, + "learning_rate": 2.0011937306808048e-05, + "loss": 3.3846, + "step": 14215 + }, + { + "epoch": 0.82, + "learning_rate": 1.9951877476594382e-05, + "loss": 3.4497, + "step": 14220 + }, + { + "epoch": 0.82, + "learning_rate": 1.9891897918817472e-05, + "loss": 3.4313, + "step": 14225 + }, + { + "epoch": 0.82, + "learning_rate": 1.9831998693624964e-05, + "loss": 3.418, + "step": 14230 + }, + { + "epoch": 0.82, + "learning_rate": 1.977217986108393e-05, + "loss": 3.5431, + "step": 14235 + }, + { + "epoch": 0.82, + "learning_rate": 1.9712441481180833e-05, + "loss": 3.4099, + "step": 14240 + }, + { + "epoch": 0.82, + "learning_rate": 1.9652783613821435e-05, + "loss": 3.4722, + "step": 14245 + }, + { + "epoch": 0.82, + "learning_rate": 1.9593206318830815e-05, + "loss": 3.4351, + "step": 14250 + }, + { + "epoch": 0.82, + "learning_rate": 1.9533709655953235e-05, + "loss": 3.3911, + "step": 14255 + }, + { + "epoch": 0.82, + "learning_rate": 1.9474293684851984e-05, + "loss": 3.4257, + "step": 14260 + }, + { + "epoch": 0.82, + "learning_rate": 1.9414958465109635e-05, + "loss": 3.4322, + "step": 14265 + }, + { + "epoch": 0.82, + "learning_rate": 1.9355704056227632e-05, + "loss": 3.3928, + "step": 14270 + }, + { + "epoch": 0.82, + "learning_rate": 1.9296530517626445e-05, + "loss": 3.4135, + "step": 14275 + }, + { + "epoch": 0.82, + "learning_rate": 1.9237437908645417e-05, + "loss": 3.4219, + "step": 14280 + }, + { + "epoch": 0.82, + "learning_rate": 1.917842628854275e-05, + "loss": 3.4789, + "step": 14285 + }, + { + "epoch": 0.82, + "learning_rate": 1.9119495716495417e-05, + "loss": 3.506, + "step": 14290 + }, + { + "epoch": 0.82, + "learning_rate": 1.9060646251599157e-05, + "loss": 3.4243, + "step": 14295 + }, + { + "epoch": 0.82, + "learning_rate": 1.900187795286834e-05, + "loss": 3.42, + "step": 14300 + }, + { + "epoch": 0.82, + "learning_rate": 1.8943190879235972e-05, + "loss": 3.4216, + "step": 14305 + }, + { + "epoch": 0.82, + "learning_rate": 1.8884585089553498e-05, + "loss": 3.4604, + "step": 14310 + }, + { + "epoch": 0.82, + "learning_rate": 1.8826060642591005e-05, + "loss": 3.4383, + "step": 14315 + }, + { + "epoch": 0.82, + "learning_rate": 1.8767617597036925e-05, + "loss": 3.4727, + "step": 14320 + }, + { + "epoch": 0.82, + "learning_rate": 1.8709256011498076e-05, + "loss": 3.483, + "step": 14325 + }, + { + "epoch": 0.82, + "learning_rate": 1.865097594449958e-05, + "loss": 3.5216, + "step": 14330 + }, + { + "epoch": 0.82, + "learning_rate": 1.8592777454484835e-05, + "loss": 3.445, + "step": 14335 + }, + { + "epoch": 0.82, + "learning_rate": 1.8534660599815368e-05, + "loss": 3.4595, + "step": 14340 + }, + { + "epoch": 0.82, + "learning_rate": 1.8476625438770944e-05, + "loss": 3.5099, + "step": 14345 + }, + { + "epoch": 0.82, + "learning_rate": 1.8418672029549355e-05, + "loss": 3.3344, + "step": 14350 + }, + { + "epoch": 0.82, + "learning_rate": 1.836080043026638e-05, + "loss": 3.4814, + "step": 14355 + }, + { + "epoch": 0.82, + "learning_rate": 1.8303010698955804e-05, + "loss": 3.5051, + "step": 14360 + }, + { + "epoch": 0.82, + "learning_rate": 1.8245302893569295e-05, + "loss": 3.5077, + "step": 14365 + }, + { + "epoch": 0.82, + "learning_rate": 1.818767707197636e-05, + "loss": 3.3886, + "step": 14370 + }, + { + "epoch": 0.82, + "learning_rate": 1.8130133291964323e-05, + "loss": 3.4814, + "step": 14375 + }, + { + "epoch": 0.83, + "learning_rate": 1.80726716112382e-05, + "loss": 3.4338, + "step": 14380 + }, + { + "epoch": 0.83, + "learning_rate": 1.80152920874207e-05, + "loss": 3.4751, + "step": 14385 + }, + { + "epoch": 0.83, + "learning_rate": 1.7957994778052112e-05, + "loss": 3.4174, + "step": 14390 + }, + { + "epoch": 0.83, + "learning_rate": 1.7900779740590344e-05, + "loss": 3.4163, + "step": 14395 + }, + { + "epoch": 0.83, + "learning_rate": 1.784364703241076e-05, + "loss": 3.4301, + "step": 14400 + }, + { + "epoch": 0.83, + "learning_rate": 1.778659671080616e-05, + "loss": 3.3914, + "step": 14405 + }, + { + "epoch": 0.83, + "learning_rate": 1.7729628832986722e-05, + "loss": 3.4694, + "step": 14410 + }, + { + "epoch": 0.83, + "learning_rate": 1.7672743456079976e-05, + "loss": 3.4456, + "step": 14415 + }, + { + "epoch": 0.83, + "learning_rate": 1.761594063713068e-05, + "loss": 3.4836, + "step": 14420 + }, + { + "epoch": 0.83, + "learning_rate": 1.75592204331009e-05, + "loss": 3.4763, + "step": 14425 + }, + { + "epoch": 0.83, + "learning_rate": 1.7502582900869702e-05, + "loss": 3.3957, + "step": 14430 + }, + { + "epoch": 0.83, + "learning_rate": 1.744602809723337e-05, + "loss": 3.36, + "step": 14435 + }, + { + "epoch": 0.83, + "learning_rate": 1.7389556078905144e-05, + "loss": 3.4606, + "step": 14440 + }, + { + "epoch": 0.83, + "learning_rate": 1.7333166902515363e-05, + "loss": 3.3859, + "step": 14445 + }, + { + "epoch": 0.83, + "learning_rate": 1.727686062461118e-05, + "loss": 3.464, + "step": 14450 + }, + { + "epoch": 0.83, + "learning_rate": 1.722063730165665e-05, + "loss": 3.4611, + "step": 14455 + }, + { + "epoch": 0.83, + "learning_rate": 1.7164496990032665e-05, + "loss": 3.4874, + "step": 14460 + }, + { + "epoch": 0.83, + "learning_rate": 1.7108439746036842e-05, + "loss": 3.419, + "step": 14465 + }, + { + "epoch": 0.83, + "learning_rate": 1.7052465625883494e-05, + "loss": 3.4372, + "step": 14470 + }, + { + "epoch": 0.83, + "learning_rate": 1.699657468570367e-05, + "loss": 3.4152, + "step": 14475 + }, + { + "epoch": 0.83, + "learning_rate": 1.694076698154484e-05, + "loss": 3.4146, + "step": 14480 + }, + { + "epoch": 0.83, + "learning_rate": 1.6885042569371146e-05, + "loss": 3.417, + "step": 14485 + }, + { + "epoch": 0.83, + "learning_rate": 1.68294015050631e-05, + "loss": 3.4524, + "step": 14490 + }, + { + "epoch": 0.83, + "learning_rate": 1.677384384441776e-05, + "loss": 3.4683, + "step": 14495 + }, + { + "epoch": 0.83, + "learning_rate": 1.6718369643148435e-05, + "loss": 3.3752, + "step": 14500 + }, + { + "epoch": 0.83, + "learning_rate": 1.6662978956884778e-05, + "loss": 3.4058, + "step": 14505 + }, + { + "epoch": 0.83, + "learning_rate": 1.66076718411727e-05, + "loss": 3.4853, + "step": 14510 + }, + { + "epoch": 0.83, + "learning_rate": 1.6552448351474304e-05, + "loss": 3.3861, + "step": 14515 + }, + { + "epoch": 0.83, + "learning_rate": 1.649730854316779e-05, + "loss": 3.5503, + "step": 14520 + }, + { + "epoch": 0.83, + "learning_rate": 1.644225247154756e-05, + "loss": 3.4072, + "step": 14525 + }, + { + "epoch": 0.83, + "learning_rate": 1.6387280191823896e-05, + "loss": 3.4415, + "step": 14530 + }, + { + "epoch": 0.83, + "learning_rate": 1.6332391759123123e-05, + "loss": 3.3839, + "step": 14535 + }, + { + "epoch": 0.83, + "learning_rate": 1.6277587228487533e-05, + "loss": 3.433, + "step": 14540 + }, + { + "epoch": 0.83, + "learning_rate": 1.6222866654875213e-05, + "loss": 3.5072, + "step": 14545 + }, + { + "epoch": 0.83, + "learning_rate": 1.6168230093160062e-05, + "loss": 3.4123, + "step": 14550 + }, + { + "epoch": 0.84, + "learning_rate": 1.611367759813176e-05, + "loss": 3.3892, + "step": 14555 + }, + { + "epoch": 0.84, + "learning_rate": 1.6059209224495676e-05, + "loss": 3.4171, + "step": 14560 + }, + { + "epoch": 0.84, + "learning_rate": 1.6004825026872806e-05, + "loss": 3.4167, + "step": 14565 + }, + { + "epoch": 0.84, + "learning_rate": 1.5950525059799714e-05, + "loss": 3.399, + "step": 14570 + }, + { + "epoch": 0.84, + "learning_rate": 1.5896309377728624e-05, + "loss": 3.4173, + "step": 14575 + }, + { + "epoch": 0.84, + "learning_rate": 1.5842178035027044e-05, + "loss": 3.4675, + "step": 14580 + }, + { + "epoch": 0.84, + "learning_rate": 1.5788131085978032e-05, + "loss": 3.3305, + "step": 14585 + }, + { + "epoch": 0.84, + "learning_rate": 1.573416858478003e-05, + "loss": 3.4459, + "step": 14590 + }, + { + "epoch": 0.84, + "learning_rate": 1.568029058554672e-05, + "loss": 3.4384, + "step": 14595 + }, + { + "epoch": 0.84, + "learning_rate": 1.5626497142307084e-05, + "loss": 3.4213, + "step": 14600 + }, + { + "epoch": 0.84, + "learning_rate": 1.5572788309005315e-05, + "loss": 3.4246, + "step": 14605 + }, + { + "epoch": 0.84, + "learning_rate": 1.5519164139500743e-05, + "loss": 3.4161, + "step": 14610 + }, + { + "epoch": 0.84, + "learning_rate": 1.5465624687567816e-05, + "loss": 3.479, + "step": 14615 + }, + { + "epoch": 0.84, + "learning_rate": 1.5412170006895986e-05, + "loss": 3.3769, + "step": 14620 + }, + { + "epoch": 0.84, + "learning_rate": 1.5358800151089803e-05, + "loss": 3.428, + "step": 14625 + }, + { + "epoch": 0.84, + "learning_rate": 1.5305515173668594e-05, + "loss": 3.3479, + "step": 14630 + }, + { + "epoch": 0.84, + "learning_rate": 1.5252315128066663e-05, + "loss": 3.3877, + "step": 14635 + }, + { + "epoch": 0.84, + "learning_rate": 1.519920006763319e-05, + "loss": 3.4366, + "step": 14640 + }, + { + "epoch": 0.84, + "learning_rate": 1.5146170045632035e-05, + "loss": 3.418, + "step": 14645 + }, + { + "epoch": 0.84, + "learning_rate": 1.5093225115241838e-05, + "loss": 3.4155, + "step": 14650 + }, + { + "epoch": 0.84, + "learning_rate": 1.5040365329555895e-05, + "loss": 3.4507, + "step": 14655 + }, + { + "epoch": 0.84, + "learning_rate": 1.4987590741582102e-05, + "loss": 3.464, + "step": 14660 + }, + { + "epoch": 0.84, + "learning_rate": 1.493490140424293e-05, + "loss": 3.4153, + "step": 14665 + }, + { + "epoch": 0.84, + "learning_rate": 1.4882297370375387e-05, + "loss": 3.4218, + "step": 14670 + }, + { + "epoch": 0.84, + "learning_rate": 1.4829778692730944e-05, + "loss": 3.3416, + "step": 14675 + }, + { + "epoch": 0.84, + "learning_rate": 1.4777345423975375e-05, + "loss": 3.3855, + "step": 14680 + }, + { + "epoch": 0.84, + "learning_rate": 1.4724997616688907e-05, + "loss": 3.4864, + "step": 14685 + }, + { + "epoch": 0.84, + "learning_rate": 1.4672735323366061e-05, + "loss": 3.4367, + "step": 14690 + }, + { + "epoch": 0.84, + "learning_rate": 1.4620558596415578e-05, + "loss": 3.4543, + "step": 14695 + }, + { + "epoch": 0.84, + "learning_rate": 1.4568467488160386e-05, + "loss": 3.4182, + "step": 14700 + }, + { + "epoch": 0.84, + "learning_rate": 1.4516462050837564e-05, + "loss": 3.3946, + "step": 14705 + }, + { + "epoch": 0.84, + "learning_rate": 1.4464542336598274e-05, + "loss": 3.4149, + "step": 14710 + }, + { + "epoch": 0.84, + "learning_rate": 1.4412708397507724e-05, + "loss": 3.4045, + "step": 14715 + }, + { + "epoch": 0.84, + "learning_rate": 1.4360960285545133e-05, + "loss": 3.3899, + "step": 14720 + }, + { + "epoch": 0.84, + "learning_rate": 1.4309298052603626e-05, + "loss": 3.4212, + "step": 14725 + }, + { + "epoch": 0.85, + "learning_rate": 1.4257721750490127e-05, + "loss": 3.3206, + "step": 14730 + }, + { + "epoch": 0.85, + "learning_rate": 1.4206231430925553e-05, + "loss": 3.4083, + "step": 14735 + }, + { + "epoch": 0.85, + "learning_rate": 1.4154827145544492e-05, + "loss": 3.3937, + "step": 14740 + }, + { + "epoch": 0.85, + "learning_rate": 1.410350894589525e-05, + "loss": 3.4312, + "step": 14745 + }, + { + "epoch": 0.85, + "learning_rate": 1.4052276883439864e-05, + "loss": 3.5616, + "step": 14750 + }, + { + "epoch": 0.85, + "learning_rate": 1.4001131009553936e-05, + "loss": 3.3679, + "step": 14755 + }, + { + "epoch": 0.85, + "learning_rate": 1.3950071375526685e-05, + "loss": 3.3709, + "step": 14760 + }, + { + "epoch": 0.85, + "learning_rate": 1.3899098032560787e-05, + "loss": 3.4623, + "step": 14765 + }, + { + "epoch": 0.85, + "learning_rate": 1.3848211031772473e-05, + "loss": 3.4438, + "step": 14770 + }, + { + "epoch": 0.85, + "learning_rate": 1.3797410424191337e-05, + "loss": 3.407, + "step": 14775 + }, + { + "epoch": 0.85, + "learning_rate": 1.3746696260760295e-05, + "loss": 3.4802, + "step": 14780 + }, + { + "epoch": 0.85, + "learning_rate": 1.3696068592335676e-05, + "loss": 3.4063, + "step": 14785 + }, + { + "epoch": 0.85, + "learning_rate": 1.3645527469686992e-05, + "loss": 3.3345, + "step": 14790 + }, + { + "epoch": 0.85, + "learning_rate": 1.3595072943497011e-05, + "loss": 3.4694, + "step": 14795 + }, + { + "epoch": 0.85, + "learning_rate": 1.3544705064361629e-05, + "loss": 3.392, + "step": 14800 + }, + { + "epoch": 0.85, + "learning_rate": 1.3494423882789874e-05, + "loss": 3.3785, + "step": 14805 + }, + { + "epoch": 0.85, + "learning_rate": 1.3444229449203827e-05, + "loss": 3.423, + "step": 14810 + }, + { + "epoch": 0.85, + "learning_rate": 1.3394121813938554e-05, + "loss": 3.3379, + "step": 14815 + }, + { + "epoch": 0.85, + "learning_rate": 1.3344101027242161e-05, + "loss": 3.4533, + "step": 14820 + }, + { + "epoch": 0.85, + "learning_rate": 1.3294167139275593e-05, + "loss": 3.4545, + "step": 14825 + }, + { + "epoch": 0.85, + "learning_rate": 1.3244320200112592e-05, + "loss": 3.4008, + "step": 14830 + }, + { + "epoch": 0.85, + "learning_rate": 1.3194560259739863e-05, + "loss": 3.3769, + "step": 14835 + }, + { + "epoch": 0.85, + "learning_rate": 1.3144887368056757e-05, + "loss": 3.4203, + "step": 14840 + }, + { + "epoch": 0.85, + "learning_rate": 1.3095301574875363e-05, + "loss": 3.3891, + "step": 14845 + }, + { + "epoch": 0.85, + "learning_rate": 1.3045802929920414e-05, + "loss": 3.4377, + "step": 14850 + }, + { + "epoch": 0.85, + "learning_rate": 1.2996391482829273e-05, + "loss": 3.3731, + "step": 14855 + }, + { + "epoch": 0.85, + "learning_rate": 1.2947067283151837e-05, + "loss": 3.4025, + "step": 14860 + }, + { + "epoch": 0.85, + "learning_rate": 1.289783038035055e-05, + "loss": 3.4156, + "step": 14865 + }, + { + "epoch": 0.85, + "learning_rate": 1.2848680823800275e-05, + "loss": 3.3655, + "step": 14870 + }, + { + "epoch": 0.85, + "learning_rate": 1.2799618662788315e-05, + "loss": 3.4224, + "step": 14875 + }, + { + "epoch": 0.85, + "learning_rate": 1.2750643946514252e-05, + "loss": 3.4031, + "step": 14880 + }, + { + "epoch": 0.85, + "learning_rate": 1.2701756724090108e-05, + "loss": 3.4192, + "step": 14885 + }, + { + "epoch": 0.85, + "learning_rate": 1.2652957044540082e-05, + "loss": 3.4428, + "step": 14890 + }, + { + "epoch": 0.85, + "learning_rate": 1.2604244956800593e-05, + "loss": 3.4066, + "step": 14895 + }, + { + "epoch": 0.85, + "learning_rate": 1.2555620509720233e-05, + "loss": 3.387, + "step": 14900 + }, + { + "epoch": 0.86, + "learning_rate": 1.2507083752059723e-05, + "loss": 3.4369, + "step": 14905 + }, + { + "epoch": 0.86, + "learning_rate": 1.2458634732491781e-05, + "loss": 3.3398, + "step": 14910 + }, + { + "epoch": 0.86, + "learning_rate": 1.2410273499601266e-05, + "loss": 3.3277, + "step": 14915 + }, + { + "epoch": 0.86, + "learning_rate": 1.2362000101884885e-05, + "loss": 3.3688, + "step": 14920 + }, + { + "epoch": 0.86, + "learning_rate": 1.2313814587751316e-05, + "loss": 3.3822, + "step": 14925 + }, + { + "epoch": 0.86, + "learning_rate": 1.2265717005521115e-05, + "loss": 3.4371, + "step": 14930 + }, + { + "epoch": 0.86, + "learning_rate": 1.2217707403426627e-05, + "loss": 3.4282, + "step": 14935 + }, + { + "epoch": 0.86, + "learning_rate": 1.2169785829612001e-05, + "loss": 3.4913, + "step": 14940 + }, + { + "epoch": 0.86, + "learning_rate": 1.2121952332133091e-05, + "loss": 3.4664, + "step": 14945 + }, + { + "epoch": 0.86, + "learning_rate": 1.2074206958957447e-05, + "loss": 3.43, + "step": 14950 + }, + { + "epoch": 0.86, + "learning_rate": 1.2026549757964212e-05, + "loss": 3.4028, + "step": 14955 + }, + { + "epoch": 0.86, + "learning_rate": 1.1978980776944137e-05, + "loss": 3.3877, + "step": 14960 + }, + { + "epoch": 0.86, + "learning_rate": 1.1931500063599543e-05, + "loss": 3.4267, + "step": 14965 + }, + { + "epoch": 0.86, + "learning_rate": 1.1884107665544164e-05, + "loss": 3.4878, + "step": 14970 + }, + { + "epoch": 0.86, + "learning_rate": 1.1836803630303206e-05, + "loss": 3.5178, + "step": 14975 + }, + { + "epoch": 0.86, + "learning_rate": 1.1789588005313257e-05, + "loss": 3.3866, + "step": 14980 + }, + { + "epoch": 0.86, + "learning_rate": 1.1742460837922265e-05, + "loss": 3.4579, + "step": 14985 + }, + { + "epoch": 0.86, + "learning_rate": 1.1695422175389447e-05, + "loss": 3.3527, + "step": 14990 + }, + { + "epoch": 0.86, + "learning_rate": 1.1648472064885286e-05, + "loss": 3.4532, + "step": 14995 + }, + { + "epoch": 0.86, + "learning_rate": 1.160161055349146e-05, + "loss": 3.4171, + "step": 15000 + }, + { + "epoch": 0.86, + "learning_rate": 1.1554837688200793e-05, + "loss": 3.3658, + "step": 15005 + }, + { + "epoch": 0.86, + "learning_rate": 1.1508153515917196e-05, + "loss": 3.3988, + "step": 15010 + }, + { + "epoch": 0.86, + "learning_rate": 1.1461558083455704e-05, + "loss": 3.3761, + "step": 15015 + }, + { + "epoch": 0.86, + "learning_rate": 1.1415051437542302e-05, + "loss": 3.3766, + "step": 15020 + }, + { + "epoch": 0.86, + "learning_rate": 1.1368633624813974e-05, + "loss": 3.4008, + "step": 15025 + }, + { + "epoch": 0.86, + "learning_rate": 1.1322304691818575e-05, + "loss": 3.3589, + "step": 15030 + }, + { + "epoch": 0.86, + "learning_rate": 1.1276064685014886e-05, + "loss": 3.4871, + "step": 15035 + }, + { + "epoch": 0.86, + "learning_rate": 1.1229913650772472e-05, + "loss": 3.4084, + "step": 15040 + }, + { + "epoch": 0.86, + "learning_rate": 1.1183851635371734e-05, + "loss": 3.4025, + "step": 15045 + }, + { + "epoch": 0.86, + "learning_rate": 1.1137878685003722e-05, + "loss": 3.4326, + "step": 15050 + }, + { + "epoch": 0.86, + "learning_rate": 1.1091994845770226e-05, + "loss": 3.4232, + "step": 15055 + }, + { + "epoch": 0.86, + "learning_rate": 1.104620016368364e-05, + "loss": 3.4361, + "step": 15060 + }, + { + "epoch": 0.86, + "learning_rate": 1.1000494684667017e-05, + "loss": 3.4489, + "step": 15065 + }, + { + "epoch": 0.86, + "learning_rate": 1.0954878454553908e-05, + "loss": 3.4315, + "step": 15070 + }, + { + "epoch": 0.86, + "learning_rate": 1.0909351519088352e-05, + "loss": 3.3761, + "step": 15075 + }, + { + "epoch": 0.87, + "learning_rate": 1.0863913923924862e-05, + "loss": 3.4033, + "step": 15080 + }, + { + "epoch": 0.87, + "learning_rate": 1.081856571462837e-05, + "loss": 3.4356, + "step": 15085 + }, + { + "epoch": 0.87, + "learning_rate": 1.0773306936674133e-05, + "loss": 3.4922, + "step": 15090 + }, + { + "epoch": 0.87, + "learning_rate": 1.0728137635447821e-05, + "loss": 3.4293, + "step": 15095 + }, + { + "epoch": 0.87, + "learning_rate": 1.0683057856245259e-05, + "loss": 3.4008, + "step": 15100 + }, + { + "epoch": 0.87, + "learning_rate": 1.0638067644272532e-05, + "loss": 3.4822, + "step": 15105 + }, + { + "epoch": 0.87, + "learning_rate": 1.059316704464598e-05, + "loss": 3.4208, + "step": 15110 + }, + { + "epoch": 0.87, + "learning_rate": 1.0548356102391999e-05, + "loss": 3.4093, + "step": 15115 + }, + { + "epoch": 0.87, + "learning_rate": 1.0503634862447099e-05, + "loss": 3.4012, + "step": 15120 + }, + { + "epoch": 0.87, + "learning_rate": 1.0459003369657849e-05, + "loss": 3.4201, + "step": 15125 + }, + { + "epoch": 0.87, + "learning_rate": 1.0414461668780806e-05, + "loss": 3.4019, + "step": 15130 + }, + { + "epoch": 0.87, + "learning_rate": 1.0370009804482483e-05, + "loss": 3.4056, + "step": 15135 + }, + { + "epoch": 0.87, + "learning_rate": 1.032564782133929e-05, + "loss": 3.4937, + "step": 15140 + }, + { + "epoch": 0.87, + "learning_rate": 1.0281375763837598e-05, + "loss": 3.4876, + "step": 15145 + }, + { + "epoch": 0.87, + "learning_rate": 1.0237193676373435e-05, + "loss": 3.4868, + "step": 15150 + }, + { + "epoch": 0.87, + "learning_rate": 1.019310160325273e-05, + "loss": 3.3569, + "step": 15155 + }, + { + "epoch": 0.87, + "learning_rate": 1.0149099588691135e-05, + "loss": 3.3968, + "step": 15160 + }, + { + "epoch": 0.87, + "learning_rate": 1.0105187676813954e-05, + "loss": 3.3505, + "step": 15165 + }, + { + "epoch": 0.87, + "learning_rate": 1.006136591165614e-05, + "loss": 3.474, + "step": 15170 + }, + { + "epoch": 0.87, + "learning_rate": 1.0017634337162275e-05, + "loss": 3.451, + "step": 15175 + }, + { + "epoch": 0.87, + "learning_rate": 9.973992997186465e-06, + "loss": 3.4255, + "step": 15180 + }, + { + "epoch": 0.87, + "learning_rate": 9.930441935492363e-06, + "loss": 3.3469, + "step": 15185 + }, + { + "epoch": 0.87, + "learning_rate": 9.88698119575302e-06, + "loss": 3.4609, + "step": 15190 + }, + { + "epoch": 0.87, + "learning_rate": 9.843610821551053e-06, + "loss": 3.3845, + "step": 15195 + }, + { + "epoch": 0.87, + "learning_rate": 9.800330856378303e-06, + "loss": 3.4614, + "step": 15200 + }, + { + "epoch": 0.87, + "learning_rate": 9.757141343636e-06, + "loss": 3.4441, + "step": 15205 + }, + { + "epoch": 0.87, + "learning_rate": 9.714042326634743e-06, + "loss": 3.3735, + "step": 15210 + }, + { + "epoch": 0.87, + "learning_rate": 9.671033848594301e-06, + "loss": 3.4226, + "step": 15215 + }, + { + "epoch": 0.87, + "learning_rate": 9.628115952643657e-06, + "loss": 3.3962, + "step": 15220 + }, + { + "epoch": 0.87, + "learning_rate": 9.585288681820992e-06, + "loss": 3.4856, + "step": 15225 + }, + { + "epoch": 0.87, + "learning_rate": 9.542552079073586e-06, + "loss": 3.4181, + "step": 15230 + }, + { + "epoch": 0.87, + "learning_rate": 9.499906187257768e-06, + "loss": 3.3866, + "step": 15235 + }, + { + "epoch": 0.87, + "learning_rate": 9.457351049138974e-06, + "loss": 3.3888, + "step": 15240 + }, + { + "epoch": 0.87, + "learning_rate": 9.414886707391613e-06, + "loss": 3.4324, + "step": 15245 + }, + { + "epoch": 0.87, + "learning_rate": 9.372513204598954e-06, + "loss": 3.3369, + "step": 15250 + }, + { + "epoch": 0.88, + "learning_rate": 9.330230583253263e-06, + "loss": 3.4252, + "step": 15255 + }, + { + "epoch": 0.88, + "learning_rate": 9.288038885755679e-06, + "loss": 3.494, + "step": 15260 + }, + { + "epoch": 0.88, + "learning_rate": 9.245938154416112e-06, + "loss": 3.4341, + "step": 15265 + }, + { + "epoch": 0.88, + "learning_rate": 9.203928431453269e-06, + "loss": 3.4506, + "step": 15270 + }, + { + "epoch": 0.88, + "learning_rate": 9.162009758994593e-06, + "loss": 3.4266, + "step": 15275 + }, + { + "epoch": 0.88, + "learning_rate": 9.12018217907622e-06, + "loss": 3.4195, + "step": 15280 + }, + { + "epoch": 0.88, + "learning_rate": 9.078445733642926e-06, + "loss": 3.4822, + "step": 15285 + }, + { + "epoch": 0.88, + "learning_rate": 9.036800464548157e-06, + "loss": 3.3651, + "step": 15290 + }, + { + "epoch": 0.88, + "learning_rate": 8.995246413553871e-06, + "loss": 3.4373, + "step": 15295 + }, + { + "epoch": 0.88, + "learning_rate": 8.953783622330515e-06, + "loss": 3.4492, + "step": 15300 + }, + { + "epoch": 0.88, + "learning_rate": 8.912412132457116e-06, + "loss": 3.4483, + "step": 15305 + }, + { + "epoch": 0.88, + "learning_rate": 8.871131985421089e-06, + "loss": 3.4806, + "step": 15310 + }, + { + "epoch": 0.88, + "learning_rate": 8.829943222618242e-06, + "loss": 3.4217, + "step": 15315 + }, + { + "epoch": 0.88, + "learning_rate": 8.788845885352782e-06, + "loss": 3.4207, + "step": 15320 + }, + { + "epoch": 0.88, + "learning_rate": 8.747840014837194e-06, + "loss": 3.3894, + "step": 15325 + }, + { + "epoch": 0.88, + "learning_rate": 8.706925652192255e-06, + "loss": 3.4067, + "step": 15330 + }, + { + "epoch": 0.88, + "learning_rate": 8.666102838446976e-06, + "loss": 3.4235, + "step": 15335 + }, + { + "epoch": 0.88, + "learning_rate": 8.625371614538591e-06, + "loss": 3.3815, + "step": 15340 + }, + { + "epoch": 0.88, + "learning_rate": 8.584732021312469e-06, + "loss": 3.4519, + "step": 15345 + }, + { + "epoch": 0.88, + "learning_rate": 8.544184099522024e-06, + "loss": 3.4089, + "step": 15350 + }, + { + "epoch": 0.88, + "learning_rate": 8.50372788982886e-06, + "loss": 3.3804, + "step": 15355 + }, + { + "epoch": 0.88, + "learning_rate": 8.46336343280254e-06, + "loss": 3.4243, + "step": 15360 + }, + { + "epoch": 0.88, + "learning_rate": 8.423090768920628e-06, + "loss": 3.4613, + "step": 15365 + }, + { + "epoch": 0.88, + "learning_rate": 8.38290993856865e-06, + "loss": 3.3849, + "step": 15370 + }, + { + "epoch": 0.88, + "learning_rate": 8.342820982040011e-06, + "loss": 3.3897, + "step": 15375 + }, + { + "epoch": 0.88, + "learning_rate": 8.30282393953603e-06, + "loss": 3.4186, + "step": 15380 + }, + { + "epoch": 0.88, + "learning_rate": 8.262918851165813e-06, + "loss": 3.3917, + "step": 15385 + }, + { + "epoch": 0.88, + "learning_rate": 8.223105756946292e-06, + "loss": 3.3931, + "step": 15390 + }, + { + "epoch": 0.88, + "learning_rate": 8.183384696802132e-06, + "loss": 3.4408, + "step": 15395 + }, + { + "epoch": 0.88, + "learning_rate": 8.143755710565648e-06, + "loss": 3.3533, + "step": 15400 + }, + { + "epoch": 0.88, + "learning_rate": 8.10421883797694e-06, + "loss": 3.4292, + "step": 15405 + }, + { + "epoch": 0.88, + "learning_rate": 8.064774118683638e-06, + "loss": 3.4905, + "step": 15410 + }, + { + "epoch": 0.88, + "learning_rate": 8.025421592241012e-06, + "loss": 3.4325, + "step": 15415 + }, + { + "epoch": 0.88, + "learning_rate": 7.98616129811185e-06, + "loss": 3.4235, + "step": 15420 + }, + { + "epoch": 0.89, + "learning_rate": 7.94699327566647e-06, + "loss": 3.4667, + "step": 15425 + }, + { + "epoch": 0.89, + "learning_rate": 7.907917564182631e-06, + "loss": 3.3734, + "step": 15430 + }, + { + "epoch": 0.89, + "learning_rate": 7.86893420284559e-06, + "loss": 3.3776, + "step": 15435 + }, + { + "epoch": 0.89, + "learning_rate": 7.830043230747918e-06, + "loss": 3.514, + "step": 15440 + }, + { + "epoch": 0.89, + "learning_rate": 7.791244686889588e-06, + "loss": 3.4158, + "step": 15445 + }, + { + "epoch": 0.89, + "learning_rate": 7.752538610177817e-06, + "loss": 3.4004, + "step": 15450 + }, + { + "epoch": 0.89, + "learning_rate": 7.713925039427206e-06, + "loss": 3.4145, + "step": 15455 + }, + { + "epoch": 0.89, + "learning_rate": 7.67540401335951e-06, + "loss": 3.4869, + "step": 15460 + }, + { + "epoch": 0.89, + "learning_rate": 7.636975570603689e-06, + "loss": 3.3582, + "step": 15465 + }, + { + "epoch": 0.89, + "learning_rate": 7.5986397496958796e-06, + "loss": 3.5646, + "step": 15470 + }, + { + "epoch": 0.89, + "learning_rate": 7.560396589079322e-06, + "loss": 3.3347, + "step": 15475 + }, + { + "epoch": 0.89, + "learning_rate": 7.522246127104348e-06, + "loss": 3.4199, + "step": 15480 + }, + { + "epoch": 0.89, + "learning_rate": 7.484188402028336e-06, + "loss": 3.3738, + "step": 15485 + }, + { + "epoch": 0.89, + "learning_rate": 7.446223452015644e-06, + "loss": 3.426, + "step": 15490 + }, + { + "epoch": 0.89, + "learning_rate": 7.40835131513764e-06, + "loss": 3.395, + "step": 15495 + }, + { + "epoch": 0.89, + "learning_rate": 7.3705720293725245e-06, + "loss": 3.3783, + "step": 15500 + }, + { + "epoch": 0.89, + "learning_rate": 7.332885632605513e-06, + "loss": 3.4279, + "step": 15505 + }, + { + "epoch": 0.89, + "learning_rate": 7.295292162628575e-06, + "loss": 3.3332, + "step": 15510 + }, + { + "epoch": 0.89, + "learning_rate": 7.257791657140545e-06, + "loss": 3.4532, + "step": 15515 + }, + { + "epoch": 0.89, + "learning_rate": 7.220384153746995e-06, + "loss": 3.3776, + "step": 15520 + }, + { + "epoch": 0.89, + "learning_rate": 7.183069689960265e-06, + "loss": 3.4649, + "step": 15525 + }, + { + "epoch": 0.89, + "learning_rate": 7.145848303199365e-06, + "loss": 3.3873, + "step": 15530 + }, + { + "epoch": 0.89, + "learning_rate": 7.108720030790028e-06, + "loss": 3.4305, + "step": 15535 + }, + { + "epoch": 0.89, + "learning_rate": 7.071684909964526e-06, + "loss": 3.4011, + "step": 15540 + }, + { + "epoch": 0.89, + "learning_rate": 7.034742977861786e-06, + "loss": 3.4082, + "step": 15545 + }, + { + "epoch": 0.89, + "learning_rate": 6.99789427152725e-06, + "loss": 3.3679, + "step": 15550 + }, + { + "epoch": 0.89, + "learning_rate": 6.9611388279128835e-06, + "loss": 3.4245, + "step": 15555 + }, + { + "epoch": 0.89, + "learning_rate": 6.9244766838771235e-06, + "loss": 3.3979, + "step": 15560 + }, + { + "epoch": 0.89, + "learning_rate": 6.887907876184862e-06, + "loss": 3.3892, + "step": 15565 + }, + { + "epoch": 0.89, + "learning_rate": 6.851432441507377e-06, + "loss": 3.4322, + "step": 15570 + }, + { + "epoch": 0.89, + "learning_rate": 6.8150504164223085e-06, + "loss": 3.4381, + "step": 15575 + }, + { + "epoch": 0.89, + "learning_rate": 6.778761837413627e-06, + "loss": 3.4225, + "step": 15580 + }, + { + "epoch": 0.89, + "learning_rate": 6.742566740871625e-06, + "loss": 3.4122, + "step": 15585 + }, + { + "epoch": 0.89, + "learning_rate": 6.706465163092823e-06, + "loss": 3.4566, + "step": 15590 + }, + { + "epoch": 0.89, + "learning_rate": 6.67045714027994e-06, + "loss": 3.3956, + "step": 15595 + }, + { + "epoch": 0.9, + "learning_rate": 6.634542708541935e-06, + "loss": 3.4254, + "step": 15600 + }, + { + "epoch": 0.9, + "learning_rate": 6.5987219038938455e-06, + "loss": 3.469, + "step": 15605 + }, + { + "epoch": 0.9, + "learning_rate": 6.562994762256869e-06, + "loss": 3.38, + "step": 15610 + }, + { + "epoch": 0.9, + "learning_rate": 6.527361319458292e-06, + "loss": 3.4685, + "step": 15615 + }, + { + "epoch": 0.9, + "learning_rate": 6.491821611231364e-06, + "loss": 3.4602, + "step": 15620 + }, + { + "epoch": 0.9, + "learning_rate": 6.456375673215409e-06, + "loss": 3.3888, + "step": 15625 + }, + { + "epoch": 0.9, + "learning_rate": 6.421023540955684e-06, + "loss": 3.4289, + "step": 15630 + }, + { + "epoch": 0.9, + "learning_rate": 6.3857652499033974e-06, + "loss": 3.4247, + "step": 15635 + }, + { + "epoch": 0.9, + "learning_rate": 6.350600835415632e-06, + "loss": 3.3407, + "step": 15640 + }, + { + "epoch": 0.9, + "learning_rate": 6.31553033275536e-06, + "loss": 3.4866, + "step": 15645 + }, + { + "epoch": 0.9, + "learning_rate": 6.2805537770913356e-06, + "loss": 3.4535, + "step": 15650 + }, + { + "epoch": 0.9, + "learning_rate": 6.245671203498149e-06, + "loss": 3.3811, + "step": 15655 + }, + { + "epoch": 0.9, + "learning_rate": 6.210882646956084e-06, + "loss": 3.4042, + "step": 15660 + }, + { + "epoch": 0.9, + "learning_rate": 6.176188142351247e-06, + "loss": 3.5016, + "step": 15665 + }, + { + "epoch": 0.9, + "learning_rate": 6.141587724475317e-06, + "loss": 3.4632, + "step": 15670 + }, + { + "epoch": 0.9, + "learning_rate": 6.107081428025674e-06, + "loss": 3.3803, + "step": 15675 + }, + { + "epoch": 0.9, + "learning_rate": 6.072669287605326e-06, + "loss": 3.412, + "step": 15680 + }, + { + "epoch": 0.9, + "learning_rate": 6.038351337722836e-06, + "loss": 3.4406, + "step": 15685 + }, + { + "epoch": 0.9, + "learning_rate": 6.004127612792332e-06, + "loss": 3.4092, + "step": 15690 + }, + { + "epoch": 0.9, + "learning_rate": 5.969998147133415e-06, + "loss": 3.4037, + "step": 15695 + }, + { + "epoch": 0.9, + "learning_rate": 5.935962974971221e-06, + "loss": 3.3613, + "step": 15700 + }, + { + "epoch": 0.9, + "learning_rate": 5.9020221304362686e-06, + "loss": 3.4796, + "step": 15705 + }, + { + "epoch": 0.9, + "learning_rate": 5.868175647564522e-06, + "loss": 3.3717, + "step": 15710 + }, + { + "epoch": 0.9, + "learning_rate": 5.834423560297353e-06, + "loss": 3.3071, + "step": 15715 + }, + { + "epoch": 0.9, + "learning_rate": 5.800765902481364e-06, + "loss": 3.4349, + "step": 15720 + }, + { + "epoch": 0.9, + "learning_rate": 5.767202707868558e-06, + "loss": 3.3897, + "step": 15725 + }, + { + "epoch": 0.9, + "learning_rate": 5.733734010116188e-06, + "loss": 3.4839, + "step": 15730 + }, + { + "epoch": 0.9, + "learning_rate": 5.700359842786729e-06, + "loss": 3.4399, + "step": 15735 + }, + { + "epoch": 0.9, + "learning_rate": 5.667080239347889e-06, + "loss": 3.423, + "step": 15740 + }, + { + "epoch": 0.9, + "learning_rate": 5.633895233172504e-06, + "loss": 3.4389, + "step": 15745 + }, + { + "epoch": 0.9, + "learning_rate": 5.600804857538588e-06, + "loss": 3.4346, + "step": 15750 + }, + { + "epoch": 0.9, + "learning_rate": 5.567809145629244e-06, + "loss": 3.5118, + "step": 15755 + }, + { + "epoch": 0.9, + "learning_rate": 5.534908130532623e-06, + "loss": 3.4787, + "step": 15760 + }, + { + "epoch": 0.9, + "learning_rate": 5.50210184524198e-06, + "loss": 3.3663, + "step": 15765 + }, + { + "epoch": 0.9, + "learning_rate": 5.469390322655498e-06, + "loss": 3.4279, + "step": 15770 + }, + { + "epoch": 0.91, + "learning_rate": 5.436773595576361e-06, + "loss": 3.3763, + "step": 15775 + }, + { + "epoch": 0.91, + "learning_rate": 5.404251696712714e-06, + "loss": 3.3387, + "step": 15780 + }, + { + "epoch": 0.91, + "learning_rate": 5.371824658677594e-06, + "loss": 3.4136, + "step": 15785 + }, + { + "epoch": 0.91, + "learning_rate": 5.339492513988897e-06, + "loss": 3.5241, + "step": 15790 + }, + { + "epoch": 0.91, + "learning_rate": 5.307255295069369e-06, + "loss": 3.4415, + "step": 15795 + }, + { + "epoch": 0.91, + "learning_rate": 5.275113034246571e-06, + "loss": 3.4787, + "step": 15800 + }, + { + "epoch": 0.91, + "learning_rate": 5.243065763752819e-06, + "loss": 3.3971, + "step": 15805 + }, + { + "epoch": 0.91, + "learning_rate": 5.2111135157252076e-06, + "loss": 3.3735, + "step": 15810 + }, + { + "epoch": 0.91, + "learning_rate": 5.179256322205539e-06, + "loss": 3.4765, + "step": 15815 + }, + { + "epoch": 0.91, + "learning_rate": 5.147494215140236e-06, + "loss": 3.5324, + "step": 15820 + }, + { + "epoch": 0.91, + "learning_rate": 5.115827226380421e-06, + "loss": 3.3728, + "step": 15825 + }, + { + "epoch": 0.91, + "learning_rate": 5.084255387681836e-06, + "loss": 3.329, + "step": 15830 + }, + { + "epoch": 0.91, + "learning_rate": 5.052778730704788e-06, + "loss": 3.3808, + "step": 15835 + }, + { + "epoch": 0.91, + "learning_rate": 5.021397287014129e-06, + "loss": 3.5177, + "step": 15840 + }, + { + "epoch": 0.91, + "learning_rate": 4.990111088079263e-06, + "loss": 3.41, + "step": 15845 + }, + { + "epoch": 0.91, + "learning_rate": 4.958920165274039e-06, + "loss": 3.3683, + "step": 15850 + }, + { + "epoch": 0.91, + "learning_rate": 4.92782454987678e-06, + "loss": 3.4698, + "step": 15855 + }, + { + "epoch": 0.91, + "learning_rate": 4.896824273070255e-06, + "loss": 3.4777, + "step": 15860 + }, + { + "epoch": 0.91, + "learning_rate": 4.865919365941629e-06, + "loss": 3.3998, + "step": 15865 + }, + { + "epoch": 0.91, + "learning_rate": 4.8351098594823674e-06, + "loss": 3.3848, + "step": 15870 + }, + { + "epoch": 0.91, + "learning_rate": 4.804395784588334e-06, + "loss": 3.3568, + "step": 15875 + }, + { + "epoch": 0.91, + "learning_rate": 4.77377717205969e-06, + "loss": 3.3915, + "step": 15880 + }, + { + "epoch": 0.91, + "learning_rate": 4.7432540526008205e-06, + "loss": 3.4137, + "step": 15885 + }, + { + "epoch": 0.91, + "learning_rate": 4.712826456820385e-06, + "loss": 3.4409, + "step": 15890 + }, + { + "epoch": 0.91, + "learning_rate": 4.682494415231253e-06, + "loss": 3.4081, + "step": 15895 + }, + { + "epoch": 0.91, + "learning_rate": 4.652257958250461e-06, + "loss": 3.4192, + "step": 15900 + }, + { + "epoch": 0.91, + "learning_rate": 4.6221171161991874e-06, + "loss": 3.4142, + "step": 15905 + }, + { + "epoch": 0.91, + "learning_rate": 4.592071919302743e-06, + "loss": 3.4613, + "step": 15910 + }, + { + "epoch": 0.91, + "learning_rate": 4.562122397690538e-06, + "loss": 3.4465, + "step": 15915 + }, + { + "epoch": 0.91, + "learning_rate": 4.532268581395982e-06, + "loss": 3.4302, + "step": 15920 + }, + { + "epoch": 0.91, + "learning_rate": 4.502510500356571e-06, + "loss": 3.3493, + "step": 15925 + }, + { + "epoch": 0.91, + "learning_rate": 4.472848184413769e-06, + "loss": 3.3985, + "step": 15930 + }, + { + "epoch": 0.91, + "learning_rate": 4.443281663313026e-06, + "loss": 3.3603, + "step": 15935 + }, + { + "epoch": 0.91, + "learning_rate": 4.413810966703702e-06, + "loss": 3.388, + "step": 15940 + }, + { + "epoch": 0.91, + "learning_rate": 4.3844361241390795e-06, + "loss": 3.476, + "step": 15945 + }, + { + "epoch": 0.92, + "learning_rate": 4.355157165076318e-06, + "loss": 3.3577, + "step": 15950 + }, + { + "epoch": 0.92, + "learning_rate": 4.325974118876408e-06, + "loss": 3.4231, + "step": 15955 + }, + { + "epoch": 0.92, + "learning_rate": 4.296887014804207e-06, + "loss": 3.4153, + "step": 15960 + }, + { + "epoch": 0.92, + "learning_rate": 4.267895882028328e-06, + "loss": 3.4403, + "step": 15965 + }, + { + "epoch": 0.92, + "learning_rate": 4.239000749621092e-06, + "loss": 3.3955, + "step": 15970 + }, + { + "epoch": 0.92, + "learning_rate": 4.210201646558653e-06, + "loss": 3.4898, + "step": 15975 + }, + { + "epoch": 0.92, + "learning_rate": 4.181498601720801e-06, + "loss": 3.4349, + "step": 15980 + }, + { + "epoch": 0.92, + "learning_rate": 4.15289164389101e-06, + "loss": 3.4316, + "step": 15985 + }, + { + "epoch": 0.92, + "learning_rate": 4.124380801756411e-06, + "loss": 3.396, + "step": 15990 + }, + { + "epoch": 0.92, + "learning_rate": 4.095966103907723e-06, + "loss": 3.3852, + "step": 15995 + }, + { + "epoch": 0.92, + "learning_rate": 4.0676475788392845e-06, + "loss": 3.4381, + "step": 16000 + }, + { + "epoch": 0.92, + "learning_rate": 4.039425254948958e-06, + "loss": 3.4296, + "step": 16005 + }, + { + "epoch": 0.92, + "learning_rate": 4.011299160538185e-06, + "loss": 3.4722, + "step": 16010 + }, + { + "epoch": 0.92, + "learning_rate": 3.983269323811856e-06, + "loss": 3.4337, + "step": 16015 + }, + { + "epoch": 0.92, + "learning_rate": 3.955335772878343e-06, + "loss": 3.4559, + "step": 16020 + }, + { + "epoch": 0.92, + "learning_rate": 3.927498535749486e-06, + "loss": 3.3807, + "step": 16025 + }, + { + "epoch": 0.92, + "learning_rate": 3.89975764034054e-06, + "loss": 3.4179, + "step": 16030 + }, + { + "epoch": 0.92, + "learning_rate": 3.872113114470122e-06, + "loss": 3.4281, + "step": 16035 + }, + { + "epoch": 0.92, + "learning_rate": 3.844564985860222e-06, + "loss": 3.4428, + "step": 16040 + }, + { + "epoch": 0.92, + "learning_rate": 3.817113282136176e-06, + "loss": 3.4547, + "step": 16045 + }, + { + "epoch": 0.92, + "learning_rate": 3.7897580308265954e-06, + "loss": 3.4602, + "step": 16050 + }, + { + "epoch": 0.92, + "learning_rate": 3.762499259363417e-06, + "loss": 3.4494, + "step": 16055 + }, + { + "epoch": 0.92, + "learning_rate": 3.735336995081795e-06, + "loss": 3.4163, + "step": 16060 + }, + { + "epoch": 0.92, + "learning_rate": 3.7082712652200867e-06, + "loss": 3.4102, + "step": 16065 + }, + { + "epoch": 0.92, + "learning_rate": 3.6813020969198585e-06, + "loss": 3.4305, + "step": 16070 + }, + { + "epoch": 0.92, + "learning_rate": 3.654429517225877e-06, + "loss": 3.3537, + "step": 16075 + }, + { + "epoch": 0.92, + "learning_rate": 3.62765355308603e-06, + "loss": 3.4169, + "step": 16080 + }, + { + "epoch": 0.92, + "learning_rate": 3.600974231351306e-06, + "loss": 3.4335, + "step": 16085 + }, + { + "epoch": 0.92, + "learning_rate": 3.574391578775771e-06, + "loss": 3.3909, + "step": 16090 + }, + { + "epoch": 0.92, + "learning_rate": 3.547905622016601e-06, + "loss": 3.4554, + "step": 16095 + }, + { + "epoch": 0.92, + "learning_rate": 3.5215163876339274e-06, + "loss": 3.396, + "step": 16100 + }, + { + "epoch": 0.92, + "learning_rate": 3.495223902090983e-06, + "loss": 3.3914, + "step": 16105 + }, + { + "epoch": 0.92, + "learning_rate": 3.4690281917539203e-06, + "loss": 3.4643, + "step": 16110 + }, + { + "epoch": 0.92, + "learning_rate": 3.442929282891827e-06, + "loss": 3.3935, + "step": 16115 + }, + { + "epoch": 0.92, + "learning_rate": 3.416927201676767e-06, + "loss": 3.3379, + "step": 16120 + }, + { + "epoch": 0.93, + "learning_rate": 3.3910219741836944e-06, + "loss": 3.3195, + "step": 16125 + }, + { + "epoch": 0.93, + "learning_rate": 3.365213626390418e-06, + "loss": 3.451, + "step": 16130 + }, + { + "epoch": 0.93, + "learning_rate": 3.339502184177612e-06, + "loss": 3.4756, + "step": 16135 + }, + { + "epoch": 0.93, + "learning_rate": 3.3138876733287638e-06, + "loss": 3.4397, + "step": 16140 + }, + { + "epoch": 0.93, + "learning_rate": 3.28837011953016e-06, + "loss": 3.4613, + "step": 16145 + }, + { + "epoch": 0.93, + "learning_rate": 3.262949548370853e-06, + "loss": 3.4659, + "step": 16150 + }, + { + "epoch": 0.93, + "learning_rate": 3.237625985342674e-06, + "loss": 3.3994, + "step": 16155 + }, + { + "epoch": 0.93, + "learning_rate": 3.212399455840154e-06, + "loss": 3.4041, + "step": 16160 + }, + { + "epoch": 0.93, + "learning_rate": 3.187269985160457e-06, + "loss": 3.3417, + "step": 16165 + }, + { + "epoch": 0.93, + "learning_rate": 3.1622375985035367e-06, + "loss": 3.4132, + "step": 16170 + }, + { + "epoch": 0.93, + "learning_rate": 3.137302320971891e-06, + "loss": 3.4417, + "step": 16175 + }, + { + "epoch": 0.93, + "learning_rate": 3.112464177570662e-06, + "loss": 3.3599, + "step": 16180 + }, + { + "epoch": 0.93, + "learning_rate": 3.087723193207648e-06, + "loss": 3.3959, + "step": 16185 + }, + { + "epoch": 0.93, + "learning_rate": 3.0630793926931132e-06, + "loss": 3.4788, + "step": 16190 + }, + { + "epoch": 0.93, + "learning_rate": 3.038532800739935e-06, + "loss": 3.5079, + "step": 16195 + }, + { + "epoch": 0.93, + "learning_rate": 3.014083441963478e-06, + "loss": 3.4389, + "step": 16200 + }, + { + "epoch": 0.93, + "learning_rate": 2.9897313408816407e-06, + "loss": 3.4252, + "step": 16205 + }, + { + "epoch": 0.93, + "learning_rate": 2.9654765219147563e-06, + "loss": 3.4051, + "step": 16210 + }, + { + "epoch": 0.93, + "learning_rate": 2.941319009385579e-06, + "loss": 3.4969, + "step": 16215 + }, + { + "epoch": 0.93, + "learning_rate": 2.9172588275193534e-06, + "loss": 3.3736, + "step": 16220 + }, + { + "epoch": 0.93, + "learning_rate": 2.8932960004436795e-06, + "loss": 3.4316, + "step": 16225 + }, + { + "epoch": 0.93, + "learning_rate": 2.869430552188501e-06, + "loss": 3.3857, + "step": 16230 + }, + { + "epoch": 0.93, + "learning_rate": 2.8456625066861973e-06, + "loss": 3.4781, + "step": 16235 + }, + { + "epoch": 0.93, + "learning_rate": 2.8219918877713804e-06, + "loss": 3.466, + "step": 16240 + }, + { + "epoch": 0.93, + "learning_rate": 2.7984187191810063e-06, + "loss": 3.4648, + "step": 16245 + }, + { + "epoch": 0.93, + "learning_rate": 2.7749430245542997e-06, + "loss": 3.3612, + "step": 16250 + }, + { + "epoch": 0.93, + "learning_rate": 2.751564827432751e-06, + "loss": 3.4354, + "step": 16255 + }, + { + "epoch": 0.93, + "learning_rate": 2.7282841512600632e-06, + "loss": 3.872, + "step": 16260 + }, + { + "epoch": 0.93, + "learning_rate": 2.705101019382139e-06, + "loss": 3.4008, + "step": 16265 + }, + { + "epoch": 0.93, + "learning_rate": 2.682015455047093e-06, + "loss": 3.4858, + "step": 16270 + }, + { + "epoch": 0.93, + "learning_rate": 2.659027481405163e-06, + "loss": 3.3736, + "step": 16275 + }, + { + "epoch": 0.93, + "learning_rate": 2.636137121508753e-06, + "loss": 3.4891, + "step": 16280 + }, + { + "epoch": 0.93, + "learning_rate": 2.6133443983123785e-06, + "loss": 3.4677, + "step": 16285 + }, + { + "epoch": 0.93, + "learning_rate": 2.5906493346726126e-06, + "loss": 3.4149, + "step": 16290 + }, + { + "epoch": 0.93, + "learning_rate": 2.5680519533481052e-06, + "loss": 3.4942, + "step": 16295 + }, + { + "epoch": 0.94, + "learning_rate": 2.5455522769995966e-06, + "loss": 3.5232, + "step": 16300 + }, + { + "epoch": 0.94, + "learning_rate": 2.523150328189783e-06, + "loss": 3.3462, + "step": 16305 + }, + { + "epoch": 0.94, + "learning_rate": 2.500846129383416e-06, + "loss": 3.4967, + "step": 16310 + }, + { + "epoch": 0.94, + "learning_rate": 2.478639702947172e-06, + "loss": 3.403, + "step": 16315 + }, + { + "epoch": 0.94, + "learning_rate": 2.4565310711497146e-06, + "loss": 3.5001, + "step": 16320 + }, + { + "epoch": 0.94, + "learning_rate": 2.434520256161632e-06, + "loss": 3.4267, + "step": 16325 + }, + { + "epoch": 0.94, + "learning_rate": 2.412607280055401e-06, + "loss": 3.3484, + "step": 16330 + }, + { + "epoch": 0.94, + "learning_rate": 2.390792164805433e-06, + "loss": 3.3782, + "step": 16335 + }, + { + "epoch": 0.94, + "learning_rate": 2.3690749322879624e-06, + "loss": 3.388, + "step": 16340 + }, + { + "epoch": 0.94, + "learning_rate": 2.347455604281057e-06, + "loss": 3.4352, + "step": 16345 + }, + { + "epoch": 0.94, + "learning_rate": 2.3259342024646524e-06, + "loss": 3.4887, + "step": 16350 + }, + { + "epoch": 0.94, + "learning_rate": 2.304510748420463e-06, + "loss": 3.3867, + "step": 16355 + }, + { + "epoch": 0.94, + "learning_rate": 2.2831852636319594e-06, + "loss": 3.446, + "step": 16360 + }, + { + "epoch": 0.94, + "learning_rate": 2.2619577694843907e-06, + "loss": 3.38, + "step": 16365 + }, + { + "epoch": 0.94, + "learning_rate": 2.240828287264729e-06, + "loss": 3.3642, + "step": 16370 + }, + { + "epoch": 0.94, + "learning_rate": 2.219796838161681e-06, + "loss": 3.4133, + "step": 16375 + }, + { + "epoch": 0.94, + "learning_rate": 2.1988634432656197e-06, + "loss": 3.371, + "step": 16380 + }, + { + "epoch": 0.94, + "learning_rate": 2.1780281235686206e-06, + "loss": 3.4616, + "step": 16385 + }, + { + "epoch": 0.94, + "learning_rate": 2.1572908999643705e-06, + "loss": 3.3266, + "step": 16390 + }, + { + "epoch": 0.94, + "learning_rate": 2.13665179324819e-06, + "loss": 3.4578, + "step": 16395 + }, + { + "epoch": 0.94, + "learning_rate": 2.116110824117046e-06, + "loss": 3.414, + "step": 16400 + }, + { + "epoch": 0.94, + "learning_rate": 2.0956680131694604e-06, + "loss": 3.3896, + "step": 16405 + }, + { + "epoch": 0.94, + "learning_rate": 2.075323380905536e-06, + "loss": 3.5391, + "step": 16410 + }, + { + "epoch": 0.94, + "learning_rate": 2.0550769477269084e-06, + "loss": 3.4138, + "step": 16415 + }, + { + "epoch": 0.94, + "learning_rate": 2.0349287339367364e-06, + "loss": 3.359, + "step": 16420 + }, + { + "epoch": 0.94, + "learning_rate": 2.0148787597397136e-06, + "loss": 3.425, + "step": 16425 + }, + { + "epoch": 0.94, + "learning_rate": 1.99492704524199e-06, + "loss": 3.3849, + "step": 16430 + }, + { + "epoch": 0.94, + "learning_rate": 1.9750736104511947e-06, + "loss": 3.421, + "step": 16435 + }, + { + "epoch": 0.94, + "learning_rate": 1.955318475276391e-06, + "loss": 3.3681, + "step": 16440 + }, + { + "epoch": 0.94, + "learning_rate": 1.935661659528054e-06, + "loss": 3.4099, + "step": 16445 + }, + { + "epoch": 0.94, + "learning_rate": 1.9161031829181275e-06, + "loss": 3.4122, + "step": 16450 + }, + { + "epoch": 0.94, + "learning_rate": 1.8966430650598554e-06, + "loss": 3.3337, + "step": 16455 + }, + { + "epoch": 0.94, + "learning_rate": 1.8772813254679166e-06, + "loss": 3.3977, + "step": 16460 + }, + { + "epoch": 0.94, + "learning_rate": 1.85801798355828e-06, + "loss": 3.48, + "step": 16465 + }, + { + "epoch": 0.94, + "learning_rate": 1.8388530586482932e-06, + "loss": 3.3735, + "step": 16470 + }, + { + "epoch": 0.95, + "learning_rate": 1.8197865699565497e-06, + "loss": 3.5132, + "step": 16475 + }, + { + "epoch": 0.95, + "learning_rate": 1.8008185366030217e-06, + "loss": 3.3535, + "step": 16480 + }, + { + "epoch": 0.95, + "learning_rate": 1.7819489776088493e-06, + "loss": 3.4122, + "step": 16485 + }, + { + "epoch": 0.95, + "learning_rate": 1.7631779118964852e-06, + "loss": 3.3775, + "step": 16490 + }, + { + "epoch": 0.95, + "learning_rate": 1.7445053582895944e-06, + "loss": 3.4757, + "step": 16495 + }, + { + "epoch": 0.95, + "learning_rate": 1.7259313355130647e-06, + "loss": 3.4438, + "step": 16500 + }, + { + "epoch": 0.95, + "learning_rate": 1.7074558621929526e-06, + "loss": 3.4568, + "step": 16505 + }, + { + "epoch": 0.95, + "learning_rate": 1.6890789568565156e-06, + "loss": 3.3492, + "step": 16510 + }, + { + "epoch": 0.95, + "learning_rate": 1.670800637932146e-06, + "loss": 3.4443, + "step": 16515 + }, + { + "epoch": 0.95, + "learning_rate": 1.6526209237493928e-06, + "loss": 3.29, + "step": 16520 + }, + { + "epoch": 0.95, + "learning_rate": 1.634539832538895e-06, + "loss": 3.4434, + "step": 16525 + }, + { + "epoch": 0.95, + "learning_rate": 1.6165573824324488e-06, + "loss": 3.3871, + "step": 16530 + }, + { + "epoch": 0.95, + "learning_rate": 1.5986735914628625e-06, + "loss": 3.4267, + "step": 16535 + }, + { + "epoch": 0.95, + "learning_rate": 1.5808884775640464e-06, + "loss": 3.5371, + "step": 16540 + }, + { + "epoch": 0.95, + "learning_rate": 1.5632020585709673e-06, + "loss": 3.4673, + "step": 16545 + }, + { + "epoch": 0.95, + "learning_rate": 1.5456143522195931e-06, + "loss": 3.4332, + "step": 16550 + }, + { + "epoch": 0.95, + "learning_rate": 1.5281253761469161e-06, + "loss": 3.4395, + "step": 16555 + }, + { + "epoch": 0.95, + "learning_rate": 1.5107351478909293e-06, + "loss": 3.368, + "step": 16560 + }, + { + "epoch": 0.95, + "learning_rate": 1.493443684890583e-06, + "loss": 3.4064, + "step": 16565 + }, + { + "epoch": 0.95, + "learning_rate": 1.4762510044857957e-06, + "loss": 3.3378, + "step": 16570 + }, + { + "epoch": 0.95, + "learning_rate": 1.4591571239174317e-06, + "loss": 3.4045, + "step": 16575 + }, + { + "epoch": 0.95, + "learning_rate": 1.4421620603272789e-06, + "loss": 3.4136, + "step": 16580 + }, + { + "epoch": 0.95, + "learning_rate": 1.4252658307580048e-06, + "loss": 3.3964, + "step": 16585 + }, + { + "epoch": 0.95, + "learning_rate": 1.4084684521531887e-06, + "loss": 3.3881, + "step": 16590 + }, + { + "epoch": 0.95, + "learning_rate": 1.3917699413573014e-06, + "loss": 3.3623, + "step": 16595 + }, + { + "epoch": 0.95, + "learning_rate": 1.375170315115637e-06, + "loss": 3.4391, + "step": 16600 + }, + { + "epoch": 0.95, + "learning_rate": 1.3586695900743352e-06, + "loss": 3.4098, + "step": 16605 + }, + { + "epoch": 0.95, + "learning_rate": 1.3422677827803599e-06, + "loss": 3.4488, + "step": 16610 + }, + { + "epoch": 0.95, + "learning_rate": 1.3259649096814763e-06, + "loss": 3.478, + "step": 16615 + }, + { + "epoch": 0.95, + "learning_rate": 1.3097609871262295e-06, + "loss": 3.4537, + "step": 16620 + }, + { + "epoch": 0.95, + "learning_rate": 1.293656031363988e-06, + "loss": 3.3566, + "step": 16625 + }, + { + "epoch": 0.95, + "learning_rate": 1.2776500585448215e-06, + "loss": 3.4279, + "step": 16630 + }, + { + "epoch": 0.95, + "learning_rate": 1.2617430847195356e-06, + "loss": 3.4323, + "step": 16635 + }, + { + "epoch": 0.95, + "learning_rate": 1.2459351258396812e-06, + "loss": 3.429, + "step": 16640 + }, + { + "epoch": 0.96, + "learning_rate": 1.2302261977575447e-06, + "loss": 3.3681, + "step": 16645 + }, + { + "epoch": 0.96, + "learning_rate": 1.2146163162260581e-06, + "loss": 3.3966, + "step": 16650 + }, + { + "epoch": 0.96, + "learning_rate": 1.1991054968988336e-06, + "loss": 3.4434, + "step": 16655 + }, + { + "epoch": 0.96, + "learning_rate": 1.183693755330173e-06, + "loss": 3.4042, + "step": 16660 + }, + { + "epoch": 0.96, + "learning_rate": 1.1683811069749916e-06, + "loss": 3.4279, + "step": 16665 + }, + { + "epoch": 0.96, + "learning_rate": 1.1531675671888619e-06, + "loss": 3.4383, + "step": 16670 + }, + { + "epoch": 0.96, + "learning_rate": 1.1380531512279469e-06, + "loss": 3.4612, + "step": 16675 + }, + { + "epoch": 0.96, + "learning_rate": 1.1230378742490222e-06, + "loss": 3.4785, + "step": 16680 + }, + { + "epoch": 0.96, + "learning_rate": 1.1081217513094212e-06, + "loss": 3.4413, + "step": 16685 + }, + { + "epoch": 0.96, + "learning_rate": 1.0933047973670896e-06, + "loss": 3.5041, + "step": 16690 + }, + { + "epoch": 0.96, + "learning_rate": 1.0785870272804977e-06, + "loss": 3.3471, + "step": 16695 + }, + { + "epoch": 0.96, + "learning_rate": 1.0639684558086504e-06, + "loss": 3.5865, + "step": 16700 + }, + { + "epoch": 0.96, + "learning_rate": 1.0494490976110883e-06, + "loss": 3.4429, + "step": 16705 + }, + { + "epoch": 0.96, + "learning_rate": 1.035028967247864e-06, + "loss": 3.4391, + "step": 16710 + }, + { + "epoch": 0.96, + "learning_rate": 1.0207080791794998e-06, + "loss": 3.3926, + "step": 16715 + }, + { + "epoch": 0.96, + "learning_rate": 1.006486447767019e-06, + "loss": 3.4304, + "step": 16720 + }, + { + "epoch": 0.96, + "learning_rate": 9.923640872719131e-07, + "loss": 3.3861, + "step": 16725 + }, + { + "epoch": 0.96, + "learning_rate": 9.78341011856121e-07, + "loss": 3.4625, + "step": 16730 + }, + { + "epoch": 0.96, + "learning_rate": 9.644172355819936e-07, + "loss": 3.5085, + "step": 16735 + }, + { + "epoch": 0.96, + "learning_rate": 9.505927724123509e-07, + "loss": 3.4166, + "step": 16740 + }, + { + "epoch": 0.96, + "learning_rate": 9.368676362103701e-07, + "loss": 3.4101, + "step": 16745 + }, + { + "epoch": 0.96, + "learning_rate": 9.232418407396636e-07, + "loss": 3.3577, + "step": 16750 + }, + { + "epoch": 0.96, + "learning_rate": 9.097153996642238e-07, + "loss": 3.457, + "step": 16755 + }, + { + "epoch": 0.96, + "learning_rate": 8.962883265483668e-07, + "loss": 3.3854, + "step": 16760 + }, + { + "epoch": 0.96, + "learning_rate": 8.829606348567999e-07, + "loss": 3.4381, + "step": 16765 + }, + { + "epoch": 0.96, + "learning_rate": 8.697323379545653e-07, + "loss": 3.3898, + "step": 16770 + }, + { + "epoch": 0.96, + "learning_rate": 8.566034491070407e-07, + "loss": 3.4394, + "step": 16775 + }, + { + "epoch": 0.96, + "learning_rate": 8.435739814798949e-07, + "loss": 3.554, + "step": 16780 + }, + { + "epoch": 0.96, + "learning_rate": 8.30643948139087e-07, + "loss": 3.4869, + "step": 16785 + }, + { + "epoch": 0.96, + "learning_rate": 8.178133620509232e-07, + "loss": 3.3997, + "step": 16790 + }, + { + "epoch": 0.96, + "learning_rate": 8.050822360819221e-07, + "loss": 3.4091, + "step": 16795 + }, + { + "epoch": 0.96, + "learning_rate": 7.924505829988716e-07, + "loss": 3.3729, + "step": 16800 + }, + { + "epoch": 0.96, + "learning_rate": 7.79918415468861e-07, + "loss": 3.4036, + "step": 16805 + }, + { + "epoch": 0.96, + "learning_rate": 7.674857460591379e-07, + "loss": 3.3937, + "step": 16810 + }, + { + "epoch": 0.96, + "learning_rate": 7.551525872372289e-07, + "loss": 3.4239, + "step": 16815 + }, + { + "epoch": 0.97, + "learning_rate": 7.429189513708524e-07, + "loss": 3.3967, + "step": 16820 + }, + { + "epoch": 0.97, + "learning_rate": 7.307848507279169e-07, + "loss": 3.3652, + "step": 16825 + }, + { + "epoch": 0.97, + "learning_rate": 7.187502974765448e-07, + "loss": 3.4464, + "step": 16830 + }, + { + "epoch": 0.97, + "learning_rate": 7.068153036849934e-07, + "loss": 3.4788, + "step": 16835 + }, + { + "epoch": 0.97, + "learning_rate": 6.949798813217001e-07, + "loss": 3.4489, + "step": 16840 + }, + { + "epoch": 0.97, + "learning_rate": 6.83244042255271e-07, + "loss": 3.4216, + "step": 16845 + }, + { + "epoch": 0.97, + "learning_rate": 6.716077982544256e-07, + "loss": 3.4615, + "step": 16850 + }, + { + "epoch": 0.97, + "learning_rate": 6.600711609880072e-07, + "loss": 3.3639, + "step": 16855 + }, + { + "epoch": 0.97, + "learning_rate": 6.486341420249842e-07, + "loss": 3.469, + "step": 16860 + }, + { + "epoch": 0.97, + "learning_rate": 6.372967528344264e-07, + "loss": 3.3457, + "step": 16865 + }, + { + "epoch": 0.97, + "learning_rate": 6.260590047854952e-07, + "loss": 3.4112, + "step": 16870 + }, + { + "epoch": 0.97, + "learning_rate": 6.149209091474318e-07, + "loss": 3.3981, + "step": 16875 + }, + { + "epoch": 0.97, + "learning_rate": 6.038824770895457e-07, + "loss": 3.3743, + "step": 16880 + }, + { + "epoch": 0.97, + "learning_rate": 5.929437196811827e-07, + "loss": 3.3627, + "step": 16885 + }, + { + "epoch": 0.97, + "learning_rate": 5.821046478917791e-07, + "loss": 3.3199, + "step": 16890 + }, + { + "epoch": 0.97, + "learning_rate": 5.713652725907626e-07, + "loss": 3.4238, + "step": 16895 + }, + { + "epoch": 0.97, + "learning_rate": 5.607256045475961e-07, + "loss": 3.4091, + "step": 16900 + }, + { + "epoch": 0.97, + "learning_rate": 5.501856544317896e-07, + "loss": 3.3896, + "step": 16905 + }, + { + "epoch": 0.97, + "learning_rate": 5.397454328128104e-07, + "loss": 3.4281, + "step": 16910 + }, + { + "epoch": 0.97, + "learning_rate": 5.294049501601283e-07, + "loss": 3.4183, + "step": 16915 + }, + { + "epoch": 0.97, + "learning_rate": 5.191642168432154e-07, + "loss": 3.4636, + "step": 16920 + }, + { + "epoch": 0.97, + "learning_rate": 5.090232431315123e-07, + "loss": 3.3741, + "step": 16925 + }, + { + "epoch": 0.97, + "learning_rate": 4.989820391943845e-07, + "loss": 3.4348, + "step": 16930 + }, + { + "epoch": 0.97, + "learning_rate": 4.890406151011884e-07, + "loss": 3.4067, + "step": 16935 + }, + { + "epoch": 0.97, + "learning_rate": 4.79198980821216e-07, + "loss": 3.3656, + "step": 16940 + }, + { + "epoch": 0.97, + "learning_rate": 4.694571462236619e-07, + "loss": 3.3766, + "step": 16945 + }, + { + "epoch": 0.97, + "learning_rate": 4.5981512107766687e-07, + "loss": 3.4242, + "step": 16950 + }, + { + "epoch": 0.97, + "learning_rate": 4.5027291505227443e-07, + "loss": 3.3524, + "step": 16955 + }, + { + "epoch": 0.97, + "learning_rate": 4.408305377164301e-07, + "loss": 3.4701, + "step": 16960 + }, + { + "epoch": 0.97, + "learning_rate": 4.314879985389708e-07, + "loss": 3.4688, + "step": 16965 + }, + { + "epoch": 0.97, + "learning_rate": 4.222453068886245e-07, + "loss": 3.4002, + "step": 16970 + }, + { + "epoch": 0.97, + "learning_rate": 4.13102472033966e-07, + "loss": 3.4864, + "step": 16975 + }, + { + "epoch": 0.97, + "learning_rate": 4.0405950314347243e-07, + "loss": 3.3569, + "step": 16980 + }, + { + "epoch": 0.97, + "learning_rate": 3.951164092854343e-07, + "loss": 3.4588, + "step": 16985 + }, + { + "epoch": 0.97, + "learning_rate": 3.862731994280111e-07, + "loss": 3.4159, + "step": 16990 + }, + { + "epoch": 0.98, + "learning_rate": 3.775298824391982e-07, + "loss": 3.4006, + "step": 16995 + }, + { + "epoch": 0.98, + "learning_rate": 3.688864670868153e-07, + "loss": 3.3577, + "step": 17000 + }, + { + "epoch": 0.98, + "learning_rate": 3.6034296203848463e-07, + "loss": 3.5087, + "step": 17005 + }, + { + "epoch": 0.98, + "learning_rate": 3.51899375861664e-07, + "loss": 3.372, + "step": 17010 + }, + { + "epoch": 0.98, + "learning_rate": 3.435557170236026e-07, + "loss": 3.3523, + "step": 17015 + }, + { + "epoch": 0.98, + "learning_rate": 3.3531199389132963e-07, + "loss": 3.3763, + "step": 17020 + }, + { + "epoch": 0.98, + "learning_rate": 3.271682147316879e-07, + "loss": 3.3921, + "step": 17025 + }, + { + "epoch": 0.98, + "learning_rate": 3.1912438771125594e-07, + "loss": 3.4268, + "step": 17030 + }, + { + "epoch": 0.98, + "learning_rate": 3.111805208964036e-07, + "loss": 3.4141, + "step": 17035 + }, + { + "epoch": 0.98, + "learning_rate": 3.0333662225328074e-07, + "loss": 3.5221, + "step": 17040 + }, + { + "epoch": 0.98, + "learning_rate": 2.955926996477398e-07, + "loss": 3.4738, + "step": 17045 + }, + { + "epoch": 0.98, + "learning_rate": 2.8794876084541346e-07, + "loss": 3.3299, + "step": 17050 + }, + { + "epoch": 0.98, + "learning_rate": 2.8040481351166993e-07, + "loss": 3.4663, + "step": 17055 + }, + { + "epoch": 0.98, + "learning_rate": 2.7296086521158003e-07, + "loss": 3.3776, + "step": 17060 + }, + { + "epoch": 0.98, + "learning_rate": 2.6561692340997255e-07, + "loss": 3.4761, + "step": 17065 + }, + { + "epoch": 0.98, + "learning_rate": 2.583729954713454e-07, + "loss": 3.4914, + "step": 17070 + }, + { + "epoch": 0.98, + "learning_rate": 2.512290886599433e-07, + "loss": 3.4293, + "step": 17075 + }, + { + "epoch": 0.98, + "learning_rate": 2.441852101396802e-07, + "loss": 3.3301, + "step": 17080 + }, + { + "epoch": 0.98, + "learning_rate": 2.3724136697418353e-07, + "loss": 3.3773, + "step": 17085 + }, + { + "epoch": 0.98, + "learning_rate": 2.303975661267499e-07, + "loss": 3.3801, + "step": 17090 + }, + { + "epoch": 0.98, + "learning_rate": 2.2365381446035617e-07, + "loss": 3.3833, + "step": 17095 + }, + { + "epoch": 0.98, + "learning_rate": 2.170101187376594e-07, + "loss": 3.463, + "step": 17100 + }, + { + "epoch": 0.98, + "learning_rate": 2.104664856209637e-07, + "loss": 3.3564, + "step": 17105 + }, + { + "epoch": 0.98, + "learning_rate": 2.0402292167224225e-07, + "loss": 3.4219, + "step": 17110 + }, + { + "epoch": 0.98, + "learning_rate": 1.976794333531151e-07, + "loss": 3.4198, + "step": 17115 + }, + { + "epoch": 0.98, + "learning_rate": 1.9143602702484942e-07, + "loss": 3.4459, + "step": 17120 + }, + { + "epoch": 0.98, + "learning_rate": 1.8529270894833694e-07, + "loss": 3.4669, + "step": 17125 + }, + { + "epoch": 0.98, + "learning_rate": 1.7924948528412755e-07, + "loss": 3.3376, + "step": 17130 + }, + { + "epoch": 0.98, + "learning_rate": 1.733063620923625e-07, + "loss": 3.4309, + "step": 17135 + }, + { + "epoch": 0.98, + "learning_rate": 1.6746334533284115e-07, + "loss": 3.4079, + "step": 17140 + }, + { + "epoch": 0.98, + "learning_rate": 1.6172044086492088e-07, + "loss": 3.46, + "step": 17145 + }, + { + "epoch": 0.98, + "learning_rate": 1.5607765444762834e-07, + "loss": 3.4364, + "step": 17150 + }, + { + "epoch": 0.98, + "learning_rate": 1.5053499173955933e-07, + "loss": 3.4168, + "step": 17155 + }, + { + "epoch": 0.98, + "learning_rate": 1.4509245829888996e-07, + "loss": 3.5191, + "step": 17160 + }, + { + "epoch": 0.98, + "learning_rate": 1.3975005958341003e-07, + "loss": 3.504, + "step": 17165 + }, + { + "epoch": 0.99, + "learning_rate": 1.3450780095051186e-07, + "loss": 3.3598, + "step": 17170 + }, + { + "epoch": 0.99, + "learning_rate": 1.2936568765711254e-07, + "loss": 3.5448, + "step": 17175 + }, + { + "epoch": 0.99, + "learning_rate": 1.2432372485975395e-07, + "loss": 3.3861, + "step": 17180 + }, + { + "epoch": 0.99, + "learning_rate": 1.193819176145361e-07, + "loss": 3.4468, + "step": 17185 + }, + { + "epoch": 0.99, + "learning_rate": 1.1454027087708375e-07, + "loss": 3.2909, + "step": 17190 + }, + { + "epoch": 0.99, + "learning_rate": 1.0979878950263534e-07, + "loss": 3.3866, + "step": 17195 + }, + { + "epoch": 0.99, + "learning_rate": 1.0515747824595413e-07, + "loss": 3.3499, + "step": 17200 + }, + { + "epoch": 0.99, + "learning_rate": 1.0061634176136148e-07, + "loss": 3.4299, + "step": 17205 + }, + { + "epoch": 0.99, + "learning_rate": 9.617538460270358e-08, + "loss": 3.3898, + "step": 17210 + }, + { + "epoch": 0.99, + "learning_rate": 9.183461122339587e-08, + "loss": 3.3819, + "step": 17215 + }, + { + "epoch": 0.99, + "learning_rate": 8.759402597637855e-08, + "loss": 3.4828, + "step": 17220 + }, + { + "epoch": 0.99, + "learning_rate": 8.345363311410559e-08, + "loss": 3.4786, + "step": 17225 + }, + { + "epoch": 0.99, + "learning_rate": 7.941343678857794e-08, + "loss": 3.5254, + "step": 17230 + }, + { + "epoch": 0.99, + "learning_rate": 7.547344105132137e-08, + "loss": 3.4258, + "step": 17235 + }, + { + "epoch": 0.99, + "learning_rate": 7.16336498533643e-08, + "loss": 3.4967, + "step": 17240 + }, + { + "epoch": 0.99, + "learning_rate": 6.789406704527102e-08, + "loss": 3.4317, + "step": 17245 + }, + { + "epoch": 0.99, + "learning_rate": 6.425469637708625e-08, + "loss": 3.4728, + "step": 17250 + }, + { + "epoch": 0.99, + "learning_rate": 6.071554149837955e-08, + "loss": 3.4544, + "step": 17255 + }, + { + "epoch": 0.99, + "learning_rate": 5.727660595823414e-08, + "loss": 3.4559, + "step": 17260 + }, + { + "epoch": 0.99, + "learning_rate": 5.39378932052248e-08, + "loss": 3.3827, + "step": 17265 + }, + { + "epoch": 0.99, + "learning_rate": 5.069940658740668e-08, + "loss": 3.4338, + "step": 17270 + }, + { + "epoch": 0.99, + "learning_rate": 4.7561149352348675e-08, + "loss": 3.3445, + "step": 17275 + }, + { + "epoch": 0.99, + "learning_rate": 4.4523124647100065e-08, + "loss": 3.3746, + "step": 17280 + }, + { + "epoch": 0.99, + "learning_rate": 4.158533551820165e-08, + "loss": 3.4412, + "step": 17285 + }, + { + "epoch": 0.99, + "learning_rate": 3.874778491167463e-08, + "loss": 3.4129, + "step": 17290 + }, + { + "epoch": 0.99, + "learning_rate": 3.6010475673009524e-08, + "loss": 3.3887, + "step": 17295 + }, + { + "epoch": 0.99, + "learning_rate": 3.337341054721055e-08, + "loss": 3.3326, + "step": 17300 + }, + { + "epoch": 0.99, + "learning_rate": 3.0836592178717926e-08, + "loss": 3.3864, + "step": 17305 + }, + { + "epoch": 0.99, + "learning_rate": 2.840002311145229e-08, + "loss": 3.4259, + "step": 17310 + }, + { + "epoch": 0.99, + "learning_rate": 2.6063705788825776e-08, + "loss": 3.4227, + "step": 17315 + }, + { + "epoch": 0.99, + "learning_rate": 2.3827642553686523e-08, + "loss": 3.372, + "step": 17320 + }, + { + "epoch": 0.99, + "learning_rate": 2.169183564837418e-08, + "loss": 3.365, + "step": 17325 + }, + { + "epoch": 0.99, + "learning_rate": 1.9656287214686598e-08, + "loss": 3.4321, + "step": 17330 + }, + { + "epoch": 0.99, + "learning_rate": 1.772099929385762e-08, + "loss": 3.4251, + "step": 17335 + }, + { + "epoch": 0.99, + "learning_rate": 1.588597382661261e-08, + "loss": 3.4773, + "step": 17340 + }, + { + "epoch": 1.0, + "learning_rate": 1.4151212653112922e-08, + "loss": 3.4041, + "step": 17345 + }, + { + "epoch": 1.0, + "learning_rate": 1.2516717512989219e-08, + "loss": 3.4205, + "step": 17350 + }, + { + "epoch": 1.0, + "learning_rate": 1.0982490045308157e-08, + "loss": 3.2826, + "step": 17355 + }, + { + "epoch": 1.0, + "learning_rate": 9.548531788605707e-09, + "loss": 3.3906, + "step": 17360 + }, + { + "epoch": 1.0, + "learning_rate": 8.21484418084273e-09, + "loss": 3.4246, + "step": 17365 + }, + { + "epoch": 1.0, + "learning_rate": 6.98142855946049e-09, + "loss": 3.3791, + "step": 17370 + }, + { + "epoch": 1.0, + "learning_rate": 5.848286161314054e-09, + "loss": 3.4087, + "step": 17375 + }, + { + "epoch": 1.0, + "learning_rate": 4.81541812273889e-09, + "loss": 3.3641, + "step": 17380 + }, + { + "epoch": 1.0, + "learning_rate": 3.882825479495367e-09, + "loss": 3.4101, + "step": 17385 + }, + { + "epoch": 1.0, + "learning_rate": 3.050509166779847e-09, + "loss": 3.444, + "step": 17390 + }, + { + "epoch": 1.0, + "learning_rate": 2.3184700192357966e-09, + "loss": 3.3518, + "step": 17395 + }, + { + "epoch": 1.0, + "learning_rate": 1.6867087709759866e-09, + "loss": 3.5253, + "step": 17400 + }, + { + "epoch": 1.0, + "learning_rate": 1.1552260555047767e-09, + "loss": 3.3462, + "step": 17405 + }, + { + "epoch": 1.0, + "learning_rate": 7.240224058180367e-10, + "loss": 3.4175, + "step": 17410 + }, + { + "epoch": 1.0, + "learning_rate": 3.93098254314328e-10, + "loss": 3.5075, + "step": 17415 + }, + { + "epoch": 1.0, + "learning_rate": 1.624539328615171e-10, + "loss": 3.4466, + "step": 17420 + }, + { + "epoch": 1.0, + "learning_rate": 3.208967271906005e-11, + "loss": 3.3339, + "step": 17425 + }, + { + "epoch": 1.0, + "eval_loss": 3.4159839153289795, + "eval_runtime": 3064.1904, + "eval_samples_per_second": 5.036, + "eval_steps_per_second": 0.63, + "step": 17429 + }, + { + "epoch": 1.0, + "step": 17429, + "total_flos": 7.981656717297032e+19, + "train_loss": 3.291749287953892, + "train_runtime": 84492.8357, + "train_samples_per_second": 1.65, + "train_steps_per_second": 0.206 + } + ], + "logging_steps": 5, + "max_steps": 17429, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 100, + "total_flos": 7.981656717297032e+19, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +}