{ "best_metric": 1.145164132118225, "best_model_checkpoint": "./checkpoints/bart-JES-cnn_dailymail/checkpoint-215337", "epoch": 6.0, "global_step": 430674, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 2.999338246562365e-05, "loss": 2.5262, "step": 100 }, { "epoch": 0.0, "learning_rate": 2.9986416639964334e-05, "loss": 2.1005, "step": 200 }, { "epoch": 0.0, "learning_rate": 2.997945081430502e-05, "loss": 1.9954, "step": 300 }, { "epoch": 0.01, "learning_rate": 2.9972484988645703e-05, "loss": 1.9422, "step": 400 }, { "epoch": 0.01, "learning_rate": 2.996551916298639e-05, "loss": 1.8915, "step": 500 }, { "epoch": 0.01, "learning_rate": 2.9958553337327073e-05, "loss": 1.8859, "step": 600 }, { "epoch": 0.01, "learning_rate": 2.995158751166776e-05, "loss": 1.8133, "step": 700 }, { "epoch": 0.01, "learning_rate": 2.9944621686008442e-05, "loss": 1.8649, "step": 800 }, { "epoch": 0.01, "learning_rate": 2.9937725518605722e-05, "loss": 1.8476, "step": 900 }, { "epoch": 0.01, "learning_rate": 2.9930759692946405e-05, "loss": 1.8485, "step": 1000 }, { "epoch": 0.02, "learning_rate": 2.992379386728709e-05, "loss": 1.7922, "step": 1100 }, { "epoch": 0.02, "learning_rate": 2.9916828041627774e-05, "loss": 1.8166, "step": 1200 }, { "epoch": 0.02, "learning_rate": 2.990986221596846e-05, "loss": 1.7964, "step": 1300 }, { "epoch": 0.02, "learning_rate": 2.9902896390309144e-05, "loss": 1.7728, "step": 1400 }, { "epoch": 0.02, "learning_rate": 2.989593056464983e-05, "loss": 1.7414, "step": 1500 }, { "epoch": 0.02, "learning_rate": 2.9888964738990513e-05, "loss": 1.7531, "step": 1600 }, { "epoch": 0.02, "learning_rate": 2.98819989133312e-05, "loss": 1.7304, "step": 1700 }, { "epoch": 0.03, "learning_rate": 2.9875033087671882e-05, "loss": 1.7315, "step": 1800 }, { "epoch": 0.03, "learning_rate": 2.986806726201257e-05, "loss": 1.7073, "step": 1900 }, { "epoch": 0.03, "learning_rate": 2.986110143635325e-05, "loss": 1.7254, "step": 2000 }, { "epoch": 0.03, "learning_rate": 2.9854135610693938e-05, "loss": 1.7187, "step": 2100 }, { "epoch": 0.03, "learning_rate": 2.984716978503462e-05, "loss": 1.7382, "step": 2200 }, { "epoch": 0.03, "learning_rate": 2.9840203959375304e-05, "loss": 1.6937, "step": 2300 }, { "epoch": 0.03, "learning_rate": 2.983323813371599e-05, "loss": 1.6649, "step": 2400 }, { "epoch": 0.03, "learning_rate": 2.9826272308056673e-05, "loss": 1.7018, "step": 2500 }, { "epoch": 0.04, "learning_rate": 2.981930648239736e-05, "loss": 1.6868, "step": 2600 }, { "epoch": 0.04, "learning_rate": 2.9812340656738043e-05, "loss": 1.6758, "step": 2700 }, { "epoch": 0.04, "learning_rate": 2.980537483107873e-05, "loss": 1.7183, "step": 2800 }, { "epoch": 0.04, "learning_rate": 2.9798409005419412e-05, "loss": 1.659, "step": 2900 }, { "epoch": 0.04, "learning_rate": 2.97914431797601e-05, "loss": 1.6675, "step": 3000 }, { "epoch": 0.04, "learning_rate": 2.978447735410078e-05, "loss": 1.6956, "step": 3100 }, { "epoch": 0.04, "learning_rate": 2.9777511528441468e-05, "loss": 1.6716, "step": 3200 }, { "epoch": 0.05, "learning_rate": 2.977054570278215e-05, "loss": 1.6501, "step": 3300 }, { "epoch": 0.05, "learning_rate": 2.9763579877122837e-05, "loss": 1.669, "step": 3400 }, { "epoch": 0.05, "learning_rate": 2.975661405146352e-05, "loss": 1.6555, "step": 3500 }, { "epoch": 0.05, "learning_rate": 2.9749648225804207e-05, "loss": 1.6304, "step": 3600 }, { "epoch": 0.05, "learning_rate": 2.974268240014489e-05, "loss": 1.7103, "step": 3700 }, { "epoch": 0.05, "learning_rate": 2.9735716574485576e-05, "loss": 1.6437, "step": 3800 }, { "epoch": 0.05, "learning_rate": 2.972875074882626e-05, "loss": 1.6302, "step": 3900 }, { "epoch": 0.06, "learning_rate": 2.9721784923166945e-05, "loss": 1.6387, "step": 4000 }, { "epoch": 0.06, "learning_rate": 2.971481909750763e-05, "loss": 1.6215, "step": 4100 }, { "epoch": 0.06, "learning_rate": 2.9707853271848315e-05, "loss": 1.6473, "step": 4200 }, { "epoch": 0.06, "learning_rate": 2.9700887446188998e-05, "loss": 1.6353, "step": 4300 }, { "epoch": 0.06, "learning_rate": 2.9693921620529684e-05, "loss": 1.6203, "step": 4400 }, { "epoch": 0.06, "learning_rate": 2.9686955794870364e-05, "loss": 1.6379, "step": 4500 }, { "epoch": 0.06, "learning_rate": 2.967998996921105e-05, "loss": 1.6431, "step": 4600 }, { "epoch": 0.07, "learning_rate": 2.9673024143551733e-05, "loss": 1.6002, "step": 4700 }, { "epoch": 0.07, "learning_rate": 2.966605831789242e-05, "loss": 1.6099, "step": 4800 }, { "epoch": 0.07, "learning_rate": 2.9659092492233106e-05, "loss": 1.6515, "step": 4900 }, { "epoch": 0.07, "learning_rate": 2.965212666657379e-05, "loss": 1.6138, "step": 5000 }, { "epoch": 0.07, "learning_rate": 2.9645160840914475e-05, "loss": 1.6059, "step": 5100 }, { "epoch": 0.07, "learning_rate": 2.9638195015255158e-05, "loss": 1.6292, "step": 5200 }, { "epoch": 0.07, "learning_rate": 2.9631229189595845e-05, "loss": 1.6093, "step": 5300 }, { "epoch": 0.08, "learning_rate": 2.9624263363936528e-05, "loss": 1.5921, "step": 5400 }, { "epoch": 0.08, "learning_rate": 2.9617297538277214e-05, "loss": 1.6198, "step": 5500 }, { "epoch": 0.08, "learning_rate": 2.9610331712617897e-05, "loss": 1.5706, "step": 5600 }, { "epoch": 0.08, "learning_rate": 2.9603365886958583e-05, "loss": 1.58, "step": 5700 }, { "epoch": 0.08, "learning_rate": 2.9596400061299266e-05, "loss": 1.5918, "step": 5800 }, { "epoch": 0.08, "learning_rate": 2.9589434235639953e-05, "loss": 1.6068, "step": 5900 }, { "epoch": 0.08, "learning_rate": 2.9582468409980636e-05, "loss": 1.6168, "step": 6000 }, { "epoch": 0.08, "learning_rate": 2.9575502584321322e-05, "loss": 1.6105, "step": 6100 }, { "epoch": 0.09, "learning_rate": 2.9568536758662005e-05, "loss": 1.6061, "step": 6200 }, { "epoch": 0.09, "learning_rate": 2.956157093300269e-05, "loss": 1.5705, "step": 6300 }, { "epoch": 0.09, "learning_rate": 2.9554605107343374e-05, "loss": 1.6033, "step": 6400 }, { "epoch": 0.09, "learning_rate": 2.954763928168406e-05, "loss": 1.5526, "step": 6500 }, { "epoch": 0.09, "learning_rate": 2.9540673456024744e-05, "loss": 1.5967, "step": 6600 }, { "epoch": 0.09, "learning_rate": 2.9533707630365427e-05, "loss": 1.6052, "step": 6700 }, { "epoch": 0.09, "learning_rate": 2.952674180470611e-05, "loss": 1.5469, "step": 6800 }, { "epoch": 0.1, "learning_rate": 2.9519775979046796e-05, "loss": 1.5831, "step": 6900 }, { "epoch": 0.1, "learning_rate": 2.9512879811644073e-05, "loss": 1.5752, "step": 7000 }, { "epoch": 0.1, "learning_rate": 2.950591398598476e-05, "loss": 1.5882, "step": 7100 }, { "epoch": 0.1, "learning_rate": 2.9498948160325442e-05, "loss": 1.6126, "step": 7200 }, { "epoch": 0.1, "learning_rate": 2.9491982334666128e-05, "loss": 1.5726, "step": 7300 }, { "epoch": 0.1, "learning_rate": 2.948501650900681e-05, "loss": 1.6108, "step": 7400 }, { "epoch": 0.1, "learning_rate": 2.9478050683347498e-05, "loss": 1.5633, "step": 7500 }, { "epoch": 0.11, "learning_rate": 2.947108485768818e-05, "loss": 1.5745, "step": 7600 }, { "epoch": 0.11, "learning_rate": 2.9464119032028867e-05, "loss": 1.5643, "step": 7700 }, { "epoch": 0.11, "learning_rate": 2.945715320636955e-05, "loss": 1.5851, "step": 7800 }, { "epoch": 0.11, "learning_rate": 2.9450187380710236e-05, "loss": 1.5255, "step": 7900 }, { "epoch": 0.11, "learning_rate": 2.944322155505092e-05, "loss": 1.5414, "step": 8000 }, { "epoch": 0.11, "learning_rate": 2.9436255729391606e-05, "loss": 1.5642, "step": 8100 }, { "epoch": 0.11, "learning_rate": 2.9429289903732292e-05, "loss": 1.5488, "step": 8200 }, { "epoch": 0.12, "learning_rate": 2.9422324078072975e-05, "loss": 1.5419, "step": 8300 }, { "epoch": 0.12, "learning_rate": 2.941535825241366e-05, "loss": 1.5602, "step": 8400 }, { "epoch": 0.12, "learning_rate": 2.9408392426754344e-05, "loss": 1.5651, "step": 8500 }, { "epoch": 0.12, "learning_rate": 2.940142660109503e-05, "loss": 1.56, "step": 8600 }, { "epoch": 0.12, "learning_rate": 2.9394460775435714e-05, "loss": 1.5774, "step": 8700 }, { "epoch": 0.12, "learning_rate": 2.93874949497764e-05, "loss": 1.5753, "step": 8800 }, { "epoch": 0.12, "learning_rate": 2.9380529124117083e-05, "loss": 1.5387, "step": 8900 }, { "epoch": 0.13, "learning_rate": 2.937356329845777e-05, "loss": 1.538, "step": 9000 }, { "epoch": 0.13, "learning_rate": 2.9366667131055046e-05, "loss": 1.5481, "step": 9100 }, { "epoch": 0.13, "learning_rate": 2.9359701305395732e-05, "loss": 1.5839, "step": 9200 }, { "epoch": 0.13, "learning_rate": 2.9352735479736412e-05, "loss": 1.5186, "step": 9300 }, { "epoch": 0.13, "learning_rate": 2.93457696540771e-05, "loss": 1.5407, "step": 9400 }, { "epoch": 0.13, "learning_rate": 2.933880382841778e-05, "loss": 1.5759, "step": 9500 }, { "epoch": 0.13, "learning_rate": 2.9331838002758468e-05, "loss": 1.5414, "step": 9600 }, { "epoch": 0.14, "learning_rate": 2.932487217709915e-05, "loss": 1.518, "step": 9700 }, { "epoch": 0.14, "learning_rate": 2.9317906351439837e-05, "loss": 1.5332, "step": 9800 }, { "epoch": 0.14, "learning_rate": 2.931094052578052e-05, "loss": 1.5843, "step": 9900 }, { "epoch": 0.14, "learning_rate": 2.9303974700121206e-05, "loss": 1.5369, "step": 10000 }, { "epoch": 0.14, "learning_rate": 2.929700887446189e-05, "loss": 1.5456, "step": 10100 }, { "epoch": 0.14, "learning_rate": 2.9290043048802576e-05, "loss": 1.5237, "step": 10200 }, { "epoch": 0.14, "learning_rate": 2.928307722314326e-05, "loss": 1.5473, "step": 10300 }, { "epoch": 0.14, "learning_rate": 2.9276111397483945e-05, "loss": 1.5331, "step": 10400 }, { "epoch": 0.15, "learning_rate": 2.9269145571824628e-05, "loss": 1.5466, "step": 10500 }, { "epoch": 0.15, "learning_rate": 2.9262179746165315e-05, "loss": 1.5567, "step": 10600 }, { "epoch": 0.15, "learning_rate": 2.9255213920505998e-05, "loss": 1.5725, "step": 10700 }, { "epoch": 0.15, "learning_rate": 2.9248248094846684e-05, "loss": 1.5546, "step": 10800 }, { "epoch": 0.15, "learning_rate": 2.9241282269187367e-05, "loss": 1.5172, "step": 10900 }, { "epoch": 0.15, "learning_rate": 2.9234316443528053e-05, "loss": 1.511, "step": 11000 }, { "epoch": 0.15, "learning_rate": 2.9227350617868736e-05, "loss": 1.4924, "step": 11100 }, { "epoch": 0.16, "learning_rate": 2.9220384792209423e-05, "loss": 1.5773, "step": 11200 }, { "epoch": 0.16, "learning_rate": 2.92134886248067e-05, "loss": 1.568, "step": 11300 }, { "epoch": 0.16, "learning_rate": 2.9206522799147385e-05, "loss": 1.5465, "step": 11400 }, { "epoch": 0.16, "learning_rate": 2.919955697348807e-05, "loss": 1.5592, "step": 11500 }, { "epoch": 0.16, "learning_rate": 2.9192591147828755e-05, "loss": 1.5336, "step": 11600 }, { "epoch": 0.16, "learning_rate": 2.9185625322169438e-05, "loss": 1.4984, "step": 11700 }, { "epoch": 0.16, "learning_rate": 2.917865949651012e-05, "loss": 1.5131, "step": 11800 }, { "epoch": 0.17, "learning_rate": 2.9171693670850804e-05, "loss": 1.5503, "step": 11900 }, { "epoch": 0.17, "learning_rate": 2.916472784519149e-05, "loss": 1.5037, "step": 12000 }, { "epoch": 0.17, "learning_rate": 2.9157762019532173e-05, "loss": 1.5283, "step": 12100 }, { "epoch": 0.17, "learning_rate": 2.915079619387286e-05, "loss": 1.5331, "step": 12200 }, { "epoch": 0.17, "learning_rate": 2.9143830368213546e-05, "loss": 1.509, "step": 12300 }, { "epoch": 0.17, "learning_rate": 2.913686454255423e-05, "loss": 1.5433, "step": 12400 }, { "epoch": 0.17, "learning_rate": 2.9129968375151505e-05, "loss": 1.513, "step": 12500 }, { "epoch": 0.18, "learning_rate": 2.9123002549492192e-05, "loss": 1.553, "step": 12600 }, { "epoch": 0.18, "learning_rate": 2.9116036723832875e-05, "loss": 1.5211, "step": 12700 }, { "epoch": 0.18, "learning_rate": 2.910907089817356e-05, "loss": 1.4837, "step": 12800 }, { "epoch": 0.18, "learning_rate": 2.9102105072514244e-05, "loss": 1.5275, "step": 12900 }, { "epoch": 0.18, "learning_rate": 2.909513924685493e-05, "loss": 1.4938, "step": 13000 }, { "epoch": 0.18, "learning_rate": 2.9088173421195617e-05, "loss": 1.5339, "step": 13100 }, { "epoch": 0.18, "learning_rate": 2.90812075955363e-05, "loss": 1.4991, "step": 13200 }, { "epoch": 0.19, "learning_rate": 2.9074241769876986e-05, "loss": 1.4775, "step": 13300 }, { "epoch": 0.19, "learning_rate": 2.906727594421767e-05, "loss": 1.4896, "step": 13400 }, { "epoch": 0.19, "learning_rate": 2.9060310118558356e-05, "loss": 1.5014, "step": 13500 }, { "epoch": 0.19, "learning_rate": 2.905334429289904e-05, "loss": 1.5069, "step": 13600 }, { "epoch": 0.19, "learning_rate": 2.9046378467239725e-05, "loss": 1.4634, "step": 13700 }, { "epoch": 0.19, "learning_rate": 2.9039412641580408e-05, "loss": 1.4807, "step": 13800 }, { "epoch": 0.19, "learning_rate": 2.9032446815921094e-05, "loss": 1.5231, "step": 13900 }, { "epoch": 0.2, "learning_rate": 2.9025480990261777e-05, "loss": 1.482, "step": 14000 }, { "epoch": 0.2, "learning_rate": 2.9018515164602464e-05, "loss": 1.5036, "step": 14100 }, { "epoch": 0.2, "learning_rate": 2.9011549338943143e-05, "loss": 1.5032, "step": 14200 }, { "epoch": 0.2, "learning_rate": 2.900458351328383e-05, "loss": 1.4612, "step": 14300 }, { "epoch": 0.2, "learning_rate": 2.8997617687624513e-05, "loss": 1.4982, "step": 14400 }, { "epoch": 0.2, "learning_rate": 2.89906518619652e-05, "loss": 1.4899, "step": 14500 }, { "epoch": 0.2, "learning_rate": 2.8983686036305882e-05, "loss": 1.5048, "step": 14600 }, { "epoch": 0.2, "learning_rate": 2.897672021064657e-05, "loss": 1.4901, "step": 14700 }, { "epoch": 0.21, "learning_rate": 2.896975438498725e-05, "loss": 1.4956, "step": 14800 }, { "epoch": 0.21, "learning_rate": 2.8962788559327938e-05, "loss": 1.505, "step": 14900 }, { "epoch": 0.21, "learning_rate": 2.895582273366862e-05, "loss": 1.4982, "step": 15000 }, { "epoch": 0.21, "learning_rate": 2.8948856908009307e-05, "loss": 1.4777, "step": 15100 }, { "epoch": 0.21, "learning_rate": 2.894189108234999e-05, "loss": 1.4768, "step": 15200 }, { "epoch": 0.21, "learning_rate": 2.8934925256690676e-05, "loss": 1.485, "step": 15300 }, { "epoch": 0.21, "learning_rate": 2.892795943103136e-05, "loss": 1.4376, "step": 15400 }, { "epoch": 0.22, "learning_rate": 2.8920993605372046e-05, "loss": 1.4533, "step": 15500 }, { "epoch": 0.22, "learning_rate": 2.8914027779712732e-05, "loss": 1.4851, "step": 15600 }, { "epoch": 0.22, "learning_rate": 2.8907061954053415e-05, "loss": 1.4593, "step": 15700 }, { "epoch": 0.22, "learning_rate": 2.89000961283941e-05, "loss": 1.4993, "step": 15800 }, { "epoch": 0.22, "learning_rate": 2.8893130302734785e-05, "loss": 1.5023, "step": 15900 }, { "epoch": 0.22, "learning_rate": 2.888616447707547e-05, "loss": 1.4778, "step": 16000 }, { "epoch": 0.22, "learning_rate": 2.8879198651416154e-05, "loss": 1.4609, "step": 16100 }, { "epoch": 0.23, "learning_rate": 2.887223282575684e-05, "loss": 1.494, "step": 16200 }, { "epoch": 0.23, "learning_rate": 2.8865267000097523e-05, "loss": 1.4559, "step": 16300 }, { "epoch": 0.23, "learning_rate": 2.8858301174438206e-05, "loss": 1.503, "step": 16400 }, { "epoch": 0.23, "learning_rate": 2.885133534877889e-05, "loss": 1.5209, "step": 16500 }, { "epoch": 0.23, "learning_rate": 2.8844369523119576e-05, "loss": 1.4833, "step": 16600 }, { "epoch": 0.23, "learning_rate": 2.883740369746026e-05, "loss": 1.4805, "step": 16700 }, { "epoch": 0.23, "learning_rate": 2.8830437871800945e-05, "loss": 1.4773, "step": 16800 }, { "epoch": 0.24, "learning_rate": 2.8823472046141628e-05, "loss": 1.4863, "step": 16900 }, { "epoch": 0.24, "learning_rate": 2.8816575878738908e-05, "loss": 1.5073, "step": 17000 }, { "epoch": 0.24, "learning_rate": 2.880961005307959e-05, "loss": 1.4309, "step": 17100 }, { "epoch": 0.24, "learning_rate": 2.8802644227420277e-05, "loss": 1.486, "step": 17200 }, { "epoch": 0.24, "learning_rate": 2.8795748060017554e-05, "loss": 1.4545, "step": 17300 }, { "epoch": 0.24, "learning_rate": 2.878878223435824e-05, "loss": 1.4366, "step": 17400 }, { "epoch": 0.24, "learning_rate": 2.8781816408698923e-05, "loss": 1.4476, "step": 17500 }, { "epoch": 0.25, "learning_rate": 2.877485058303961e-05, "loss": 1.5035, "step": 17600 }, { "epoch": 0.25, "learning_rate": 2.8767884757380292e-05, "loss": 1.4817, "step": 17700 }, { "epoch": 0.25, "learning_rate": 2.876091893172098e-05, "loss": 1.4922, "step": 17800 }, { "epoch": 0.25, "learning_rate": 2.8753953106061662e-05, "loss": 1.4724, "step": 17900 }, { "epoch": 0.25, "learning_rate": 2.8746987280402348e-05, "loss": 1.467, "step": 18000 }, { "epoch": 0.25, "learning_rate": 2.874002145474303e-05, "loss": 1.4925, "step": 18100 }, { "epoch": 0.25, "learning_rate": 2.8733055629083717e-05, "loss": 1.4711, "step": 18200 }, { "epoch": 0.25, "learning_rate": 2.87260898034244e-05, "loss": 1.467, "step": 18300 }, { "epoch": 0.26, "learning_rate": 2.8719123977765087e-05, "loss": 1.4444, "step": 18400 }, { "epoch": 0.26, "learning_rate": 2.871215815210577e-05, "loss": 1.462, "step": 18500 }, { "epoch": 0.26, "learning_rate": 2.8705192326446456e-05, "loss": 1.4642, "step": 18600 }, { "epoch": 0.26, "learning_rate": 2.869822650078714e-05, "loss": 1.4821, "step": 18700 }, { "epoch": 0.26, "learning_rate": 2.8691260675127826e-05, "loss": 1.455, "step": 18800 }, { "epoch": 0.26, "learning_rate": 2.868429484946851e-05, "loss": 1.5036, "step": 18900 }, { "epoch": 0.26, "learning_rate": 2.867732902380919e-05, "loss": 1.4585, "step": 19000 }, { "epoch": 0.27, "learning_rate": 2.8670363198149875e-05, "loss": 1.4333, "step": 19100 }, { "epoch": 0.27, "learning_rate": 2.866339737249056e-05, "loss": 1.4954, "step": 19200 }, { "epoch": 0.27, "learning_rate": 2.8656431546831244e-05, "loss": 1.5066, "step": 19300 }, { "epoch": 0.27, "learning_rate": 2.864946572117193e-05, "loss": 1.4496, "step": 19400 }, { "epoch": 0.27, "learning_rate": 2.8642499895512613e-05, "loss": 1.4684, "step": 19500 }, { "epoch": 0.27, "learning_rate": 2.86355340698533e-05, "loss": 1.4466, "step": 19600 }, { "epoch": 0.27, "learning_rate": 2.8628637902450576e-05, "loss": 1.4826, "step": 19700 }, { "epoch": 0.28, "learning_rate": 2.8621672076791262e-05, "loss": 1.4521, "step": 19800 }, { "epoch": 0.28, "learning_rate": 2.8614706251131945e-05, "loss": 1.4588, "step": 19900 }, { "epoch": 0.28, "learning_rate": 2.8607740425472632e-05, "loss": 1.4844, "step": 20000 }, { "epoch": 0.28, "learning_rate": 2.8600774599813315e-05, "loss": 1.4472, "step": 20100 }, { "epoch": 0.28, "learning_rate": 2.8593808774154e-05, "loss": 1.4347, "step": 20200 }, { "epoch": 0.28, "learning_rate": 2.8586842948494684e-05, "loss": 1.4641, "step": 20300 }, { "epoch": 0.28, "learning_rate": 2.857987712283537e-05, "loss": 1.4338, "step": 20400 }, { "epoch": 0.29, "learning_rate": 2.8572911297176057e-05, "loss": 1.4878, "step": 20500 }, { "epoch": 0.29, "learning_rate": 2.856594547151674e-05, "loss": 1.4638, "step": 20600 }, { "epoch": 0.29, "learning_rate": 2.8558979645857426e-05, "loss": 1.4685, "step": 20700 }, { "epoch": 0.29, "learning_rate": 2.855201382019811e-05, "loss": 1.455, "step": 20800 }, { "epoch": 0.29, "learning_rate": 2.8545047994538796e-05, "loss": 1.4495, "step": 20900 }, { "epoch": 0.29, "learning_rate": 2.853808216887948e-05, "loss": 1.4194, "step": 21000 }, { "epoch": 0.29, "learning_rate": 2.8531116343220165e-05, "loss": 1.4316, "step": 21100 }, { "epoch": 0.3, "learning_rate": 2.8524150517560848e-05, "loss": 1.4557, "step": 21200 }, { "epoch": 0.3, "learning_rate": 2.8517184691901534e-05, "loss": 1.4427, "step": 21300 }, { "epoch": 0.3, "learning_rate": 2.8510218866242217e-05, "loss": 1.4752, "step": 21400 }, { "epoch": 0.3, "learning_rate": 2.85032530405829e-05, "loss": 1.4492, "step": 21500 }, { "epoch": 0.3, "learning_rate": 2.8496287214923583e-05, "loss": 1.4511, "step": 21600 }, { "epoch": 0.3, "learning_rate": 2.848932138926427e-05, "loss": 1.4583, "step": 21700 }, { "epoch": 0.3, "learning_rate": 2.8482355563604953e-05, "loss": 1.4913, "step": 21800 }, { "epoch": 0.31, "learning_rate": 2.847538973794564e-05, "loss": 1.4318, "step": 21900 }, { "epoch": 0.31, "learning_rate": 2.8468423912286322e-05, "loss": 1.4226, "step": 22000 }, { "epoch": 0.31, "learning_rate": 2.846145808662701e-05, "loss": 1.4472, "step": 22100 }, { "epoch": 0.31, "learning_rate": 2.845449226096769e-05, "loss": 1.4495, "step": 22200 }, { "epoch": 0.31, "learning_rate": 2.8447526435308378e-05, "loss": 1.4007, "step": 22300 }, { "epoch": 0.31, "learning_rate": 2.844056060964906e-05, "loss": 1.4201, "step": 22400 }, { "epoch": 0.31, "learning_rate": 2.8433594783989747e-05, "loss": 1.4641, "step": 22500 }, { "epoch": 0.31, "learning_rate": 2.842662895833043e-05, "loss": 1.4675, "step": 22600 }, { "epoch": 0.32, "learning_rate": 2.8419663132671117e-05, "loss": 1.4365, "step": 22700 }, { "epoch": 0.32, "learning_rate": 2.84126973070118e-05, "loss": 1.4509, "step": 22800 }, { "epoch": 0.32, "learning_rate": 2.8405731481352486e-05, "loss": 1.4628, "step": 22900 }, { "epoch": 0.32, "learning_rate": 2.8398765655693172e-05, "loss": 1.4723, "step": 23000 }, { "epoch": 0.32, "learning_rate": 2.8391799830033855e-05, "loss": 1.4234, "step": 23100 }, { "epoch": 0.32, "learning_rate": 2.838483400437454e-05, "loss": 1.4527, "step": 23200 }, { "epoch": 0.32, "learning_rate": 2.8377868178715225e-05, "loss": 1.4473, "step": 23300 }, { "epoch": 0.33, "learning_rate": 2.837090235305591e-05, "loss": 1.4594, "step": 23400 }, { "epoch": 0.33, "learning_rate": 2.8363936527396594e-05, "loss": 1.4888, "step": 23500 }, { "epoch": 0.33, "learning_rate": 2.835697070173728e-05, "loss": 1.4387, "step": 23600 }, { "epoch": 0.33, "learning_rate": 2.835000487607796e-05, "loss": 1.4355, "step": 23700 }, { "epoch": 0.33, "learning_rate": 2.8343039050418646e-05, "loss": 1.4441, "step": 23800 }, { "epoch": 0.33, "learning_rate": 2.833607322475933e-05, "loss": 1.4317, "step": 23900 }, { "epoch": 0.33, "learning_rate": 2.8329107399100016e-05, "loss": 1.4386, "step": 24000 }, { "epoch": 0.34, "learning_rate": 2.83221415734407e-05, "loss": 1.4528, "step": 24100 }, { "epoch": 0.34, "learning_rate": 2.8315175747781385e-05, "loss": 1.4295, "step": 24200 }, { "epoch": 0.34, "learning_rate": 2.8308209922122068e-05, "loss": 1.402, "step": 24300 }, { "epoch": 0.34, "learning_rate": 2.8301244096462754e-05, "loss": 1.4026, "step": 24400 }, { "epoch": 0.34, "learning_rate": 2.8294278270803437e-05, "loss": 1.4503, "step": 24500 }, { "epoch": 0.34, "learning_rate": 2.8287312445144124e-05, "loss": 1.4299, "step": 24600 }, { "epoch": 0.34, "learning_rate": 2.8280346619484807e-05, "loss": 1.4347, "step": 24700 }, { "epoch": 0.35, "learning_rate": 2.8273380793825493e-05, "loss": 1.4493, "step": 24800 }, { "epoch": 0.35, "learning_rate": 2.8266414968166176e-05, "loss": 1.4448, "step": 24900 }, { "epoch": 0.35, "learning_rate": 2.8259449142506863e-05, "loss": 1.4153, "step": 25000 }, { "epoch": 0.35, "learning_rate": 2.8252483316847546e-05, "loss": 1.4252, "step": 25100 }, { "epoch": 0.35, "learning_rate": 2.8245517491188232e-05, "loss": 1.3862, "step": 25200 }, { "epoch": 0.35, "learning_rate": 2.823855166552892e-05, "loss": 1.4359, "step": 25300 }, { "epoch": 0.35, "learning_rate": 2.82315858398696e-05, "loss": 1.4428, "step": 25400 }, { "epoch": 0.36, "learning_rate": 2.8224620014210288e-05, "loss": 1.4598, "step": 25500 }, { "epoch": 0.36, "learning_rate": 2.821765418855097e-05, "loss": 1.4415, "step": 25600 }, { "epoch": 0.36, "learning_rate": 2.8210758021148247e-05, "loss": 1.4143, "step": 25700 }, { "epoch": 0.36, "learning_rate": 2.8203792195488933e-05, "loss": 1.3649, "step": 25800 }, { "epoch": 0.36, "learning_rate": 2.819689602808621e-05, "loss": 1.4065, "step": 25900 }, { "epoch": 0.36, "learning_rate": 2.8189930202426896e-05, "loss": 1.4547, "step": 26000 }, { "epoch": 0.36, "learning_rate": 2.818296437676758e-05, "loss": 1.4199, "step": 26100 }, { "epoch": 0.37, "learning_rate": 2.8175998551108266e-05, "loss": 1.418, "step": 26200 }, { "epoch": 0.37, "learning_rate": 2.8169032725448945e-05, "loss": 1.4205, "step": 26300 }, { "epoch": 0.37, "learning_rate": 2.816206689978963e-05, "loss": 1.433, "step": 26400 }, { "epoch": 0.37, "learning_rate": 2.8155101074130315e-05, "loss": 1.413, "step": 26500 }, { "epoch": 0.37, "learning_rate": 2.8148135248471e-05, "loss": 1.3851, "step": 26600 }, { "epoch": 0.37, "learning_rate": 2.8141169422811684e-05, "loss": 1.4563, "step": 26700 }, { "epoch": 0.37, "learning_rate": 2.813420359715237e-05, "loss": 1.4352, "step": 26800 }, { "epoch": 0.37, "learning_rate": 2.8127237771493057e-05, "loss": 1.4139, "step": 26900 }, { "epoch": 0.38, "learning_rate": 2.812027194583374e-05, "loss": 1.3865, "step": 27000 }, { "epoch": 0.38, "learning_rate": 2.8113306120174426e-05, "loss": 1.4146, "step": 27100 }, { "epoch": 0.38, "learning_rate": 2.810634029451511e-05, "loss": 1.3974, "step": 27200 }, { "epoch": 0.38, "learning_rate": 2.8099374468855795e-05, "loss": 1.3841, "step": 27300 }, { "epoch": 0.38, "learning_rate": 2.809240864319648e-05, "loss": 1.4116, "step": 27400 }, { "epoch": 0.38, "learning_rate": 2.8085442817537165e-05, "loss": 1.3978, "step": 27500 }, { "epoch": 0.38, "learning_rate": 2.8078476991877848e-05, "loss": 1.4165, "step": 27600 }, { "epoch": 0.39, "learning_rate": 2.8071511166218534e-05, "loss": 1.4303, "step": 27700 }, { "epoch": 0.39, "learning_rate": 2.8064545340559217e-05, "loss": 1.4583, "step": 27800 }, { "epoch": 0.39, "learning_rate": 2.8057579514899904e-05, "loss": 1.3974, "step": 27900 }, { "epoch": 0.39, "learning_rate": 2.8050613689240587e-05, "loss": 1.4024, "step": 28000 }, { "epoch": 0.39, "learning_rate": 2.8043647863581273e-05, "loss": 1.4423, "step": 28100 }, { "epoch": 0.39, "learning_rate": 2.8036682037921956e-05, "loss": 1.4083, "step": 28200 }, { "epoch": 0.39, "learning_rate": 2.8029716212262642e-05, "loss": 1.421, "step": 28300 }, { "epoch": 0.4, "learning_rate": 2.8022750386603325e-05, "loss": 1.3915, "step": 28400 }, { "epoch": 0.4, "learning_rate": 2.8015784560944008e-05, "loss": 1.3912, "step": 28500 }, { "epoch": 0.4, "learning_rate": 2.8008888393541288e-05, "loss": 1.3958, "step": 28600 }, { "epoch": 0.4, "learning_rate": 2.800192256788197e-05, "loss": 1.4012, "step": 28700 }, { "epoch": 0.4, "learning_rate": 2.7994956742222654e-05, "loss": 1.4497, "step": 28800 }, { "epoch": 0.4, "learning_rate": 2.798799091656334e-05, "loss": 1.4186, "step": 28900 }, { "epoch": 0.4, "learning_rate": 2.7981025090904023e-05, "loss": 1.4303, "step": 29000 }, { "epoch": 0.41, "learning_rate": 2.797405926524471e-05, "loss": 1.4108, "step": 29100 }, { "epoch": 0.41, "learning_rate": 2.7967093439585393e-05, "loss": 1.3992, "step": 29200 }, { "epoch": 0.41, "learning_rate": 2.796012761392608e-05, "loss": 1.4444, "step": 29300 }, { "epoch": 0.41, "learning_rate": 2.7953161788266762e-05, "loss": 1.4092, "step": 29400 }, { "epoch": 0.41, "learning_rate": 2.794619596260745e-05, "loss": 1.417, "step": 29500 }, { "epoch": 0.41, "learning_rate": 2.793923013694813e-05, "loss": 1.4212, "step": 29600 }, { "epoch": 0.41, "learning_rate": 2.7932264311288818e-05, "loss": 1.4464, "step": 29700 }, { "epoch": 0.42, "learning_rate": 2.79252984856295e-05, "loss": 1.3923, "step": 29800 }, { "epoch": 0.42, "learning_rate": 2.7918332659970187e-05, "loss": 1.4111, "step": 29900 }, { "epoch": 0.42, "learning_rate": 2.791136683431087e-05, "loss": 1.4088, "step": 30000 }, { "epoch": 0.42, "learning_rate": 2.7904401008651557e-05, "loss": 1.4142, "step": 30100 }, { "epoch": 0.42, "learning_rate": 2.7897435182992243e-05, "loss": 1.4369, "step": 30200 }, { "epoch": 0.42, "learning_rate": 2.7890469357332926e-05, "loss": 1.414, "step": 30300 }, { "epoch": 0.42, "learning_rate": 2.7883503531673612e-05, "loss": 1.3888, "step": 30400 }, { "epoch": 0.42, "learning_rate": 2.7876537706014295e-05, "loss": 1.4087, "step": 30500 }, { "epoch": 0.43, "learning_rate": 2.7869641538611572e-05, "loss": 1.4189, "step": 30600 }, { "epoch": 0.43, "learning_rate": 2.7862675712952258e-05, "loss": 1.3948, "step": 30700 }, { "epoch": 0.43, "learning_rate": 2.785570988729294e-05, "loss": 1.4055, "step": 30800 }, { "epoch": 0.43, "learning_rate": 2.7848744061633628e-05, "loss": 1.3809, "step": 30900 }, { "epoch": 0.43, "learning_rate": 2.7841778235974314e-05, "loss": 1.454, "step": 31000 }, { "epoch": 0.43, "learning_rate": 2.7834812410314994e-05, "loss": 1.3974, "step": 31100 }, { "epoch": 0.43, "learning_rate": 2.782784658465568e-05, "loss": 1.4263, "step": 31200 }, { "epoch": 0.44, "learning_rate": 2.7820880758996363e-05, "loss": 1.3835, "step": 31300 }, { "epoch": 0.44, "learning_rate": 2.781391493333705e-05, "loss": 1.4252, "step": 31400 }, { "epoch": 0.44, "learning_rate": 2.7806949107677732e-05, "loss": 1.4255, "step": 31500 }, { "epoch": 0.44, "learning_rate": 2.779998328201842e-05, "loss": 1.3858, "step": 31600 }, { "epoch": 0.44, "learning_rate": 2.77930174563591e-05, "loss": 1.3573, "step": 31700 }, { "epoch": 0.44, "learning_rate": 2.7786051630699788e-05, "loss": 1.4498, "step": 31800 }, { "epoch": 0.44, "learning_rate": 2.777908580504047e-05, "loss": 1.3861, "step": 31900 }, { "epoch": 0.45, "learning_rate": 2.7772119979381157e-05, "loss": 1.459, "step": 32000 }, { "epoch": 0.45, "learning_rate": 2.776515415372184e-05, "loss": 1.4208, "step": 32100 }, { "epoch": 0.45, "learning_rate": 2.7758188328062527e-05, "loss": 1.4005, "step": 32200 }, { "epoch": 0.45, "learning_rate": 2.775122250240321e-05, "loss": 1.4153, "step": 32300 }, { "epoch": 0.45, "learning_rate": 2.7744256676743896e-05, "loss": 1.3866, "step": 32400 }, { "epoch": 0.45, "learning_rate": 2.773729085108458e-05, "loss": 1.3921, "step": 32500 }, { "epoch": 0.45, "learning_rate": 2.7730325025425265e-05, "loss": 1.4176, "step": 32600 }, { "epoch": 0.46, "learning_rate": 2.772335919976595e-05, "loss": 1.438, "step": 32700 }, { "epoch": 0.46, "learning_rate": 2.7716393374106635e-05, "loss": 1.4075, "step": 32800 }, { "epoch": 0.46, "learning_rate": 2.7709427548447318e-05, "loss": 1.4416, "step": 32900 }, { "epoch": 0.46, "learning_rate": 2.7702531381044598e-05, "loss": 1.3933, "step": 33000 }, { "epoch": 0.46, "learning_rate": 2.769556555538528e-05, "loss": 1.3889, "step": 33100 }, { "epoch": 0.46, "learning_rate": 2.7688599729725967e-05, "loss": 1.4002, "step": 33200 }, { "epoch": 0.46, "learning_rate": 2.768163390406665e-05, "loss": 1.4135, "step": 33300 }, { "epoch": 0.47, "learning_rate": 2.7674668078407336e-05, "loss": 1.392, "step": 33400 }, { "epoch": 0.47, "learning_rate": 2.766770225274802e-05, "loss": 1.4133, "step": 33500 }, { "epoch": 0.47, "learning_rate": 2.7660736427088702e-05, "loss": 1.397, "step": 33600 }, { "epoch": 0.47, "learning_rate": 2.7653770601429385e-05, "loss": 1.3767, "step": 33700 }, { "epoch": 0.47, "learning_rate": 2.7646804775770072e-05, "loss": 1.411, "step": 33800 }, { "epoch": 0.47, "learning_rate": 2.7639838950110755e-05, "loss": 1.3778, "step": 33900 }, { "epoch": 0.47, "learning_rate": 2.763287312445144e-05, "loss": 1.4178, "step": 34000 }, { "epoch": 0.48, "learning_rate": 2.7625907298792124e-05, "loss": 1.4095, "step": 34100 }, { "epoch": 0.48, "learning_rate": 2.761894147313281e-05, "loss": 1.3936, "step": 34200 }, { "epoch": 0.48, "learning_rate": 2.7611975647473497e-05, "loss": 1.3965, "step": 34300 }, { "epoch": 0.48, "learning_rate": 2.760500982181418e-05, "loss": 1.4105, "step": 34400 }, { "epoch": 0.48, "learning_rate": 2.7598043996154866e-05, "loss": 1.3629, "step": 34500 }, { "epoch": 0.48, "learning_rate": 2.7591217487008736e-05, "loss": 1.3862, "step": 34600 }, { "epoch": 0.48, "learning_rate": 2.758425166134942e-05, "loss": 1.4215, "step": 34700 }, { "epoch": 0.48, "learning_rate": 2.7577285835690105e-05, "loss": 1.3996, "step": 34800 }, { "epoch": 0.49, "learning_rate": 2.757032001003079e-05, "loss": 1.4127, "step": 34900 }, { "epoch": 0.49, "learning_rate": 2.7563354184371475e-05, "loss": 1.3986, "step": 35000 }, { "epoch": 0.49, "learning_rate": 2.7556388358712158e-05, "loss": 1.3808, "step": 35100 }, { "epoch": 0.49, "learning_rate": 2.7549422533052844e-05, "loss": 1.3846, "step": 35200 }, { "epoch": 0.49, "learning_rate": 2.7542456707393527e-05, "loss": 1.3943, "step": 35300 }, { "epoch": 0.49, "learning_rate": 2.7535490881734214e-05, "loss": 1.3846, "step": 35400 }, { "epoch": 0.49, "learning_rate": 2.7528525056074897e-05, "loss": 1.41, "step": 35500 }, { "epoch": 0.5, "learning_rate": 2.7521559230415583e-05, "loss": 1.3983, "step": 35600 }, { "epoch": 0.5, "learning_rate": 2.7514593404756266e-05, "loss": 1.4252, "step": 35700 }, { "epoch": 0.5, "learning_rate": 2.7507627579096952e-05, "loss": 1.3834, "step": 35800 }, { "epoch": 0.5, "learning_rate": 2.750066175343764e-05, "loss": 1.3957, "step": 35900 }, { "epoch": 0.5, "learning_rate": 2.749369592777832e-05, "loss": 1.3715, "step": 36000 }, { "epoch": 0.5, "learning_rate": 2.7486730102119008e-05, "loss": 1.3904, "step": 36100 }, { "epoch": 0.5, "learning_rate": 2.7479764276459688e-05, "loss": 1.4317, "step": 36200 }, { "epoch": 0.51, "learning_rate": 2.7472798450800374e-05, "loss": 1.3789, "step": 36300 }, { "epoch": 0.51, "learning_rate": 2.7465832625141057e-05, "loss": 1.343, "step": 36400 }, { "epoch": 0.51, "learning_rate": 2.7458866799481743e-05, "loss": 1.3381, "step": 36500 }, { "epoch": 0.51, "learning_rate": 2.7451900973822426e-05, "loss": 1.4004, "step": 36600 }, { "epoch": 0.51, "learning_rate": 2.7444935148163113e-05, "loss": 1.4007, "step": 36700 }, { "epoch": 0.51, "learning_rate": 2.7437969322503796e-05, "loss": 1.4021, "step": 36800 }, { "epoch": 0.51, "learning_rate": 2.7431003496844482e-05, "loss": 1.4194, "step": 36900 }, { "epoch": 0.52, "learning_rate": 2.7424037671185165e-05, "loss": 1.3586, "step": 37000 }, { "epoch": 0.52, "learning_rate": 2.741707184552585e-05, "loss": 1.3964, "step": 37100 }, { "epoch": 0.52, "learning_rate": 2.7410106019866534e-05, "loss": 1.4042, "step": 37200 }, { "epoch": 0.52, "learning_rate": 2.740314019420722e-05, "loss": 1.4042, "step": 37300 }, { "epoch": 0.52, "learning_rate": 2.7396174368547904e-05, "loss": 1.4241, "step": 37400 }, { "epoch": 0.52, "learning_rate": 2.7389278201145184e-05, "loss": 1.3756, "step": 37500 }, { "epoch": 0.52, "learning_rate": 2.7382312375485867e-05, "loss": 1.3848, "step": 37600 }, { "epoch": 0.53, "learning_rate": 2.7375346549826553e-05, "loss": 1.4253, "step": 37700 }, { "epoch": 0.53, "learning_rate": 2.7368380724167236e-05, "loss": 1.4135, "step": 37800 }, { "epoch": 0.53, "learning_rate": 2.7361414898507922e-05, "loss": 1.4172, "step": 37900 }, { "epoch": 0.53, "learning_rate": 2.7354449072848605e-05, "loss": 1.416, "step": 38000 }, { "epoch": 0.53, "learning_rate": 2.7347483247189292e-05, "loss": 1.3781, "step": 38100 }, { "epoch": 0.53, "learning_rate": 2.7340517421529975e-05, "loss": 1.3785, "step": 38200 }, { "epoch": 0.53, "learning_rate": 2.733355159587066e-05, "loss": 1.3614, "step": 38300 }, { "epoch": 0.53, "learning_rate": 2.7326585770211344e-05, "loss": 1.3609, "step": 38400 }, { "epoch": 0.54, "learning_rate": 2.731961994455203e-05, "loss": 1.3633, "step": 38500 }, { "epoch": 0.54, "learning_rate": 2.731265411889271e-05, "loss": 1.3579, "step": 38600 }, { "epoch": 0.54, "learning_rate": 2.7305688293233396e-05, "loss": 1.3761, "step": 38700 }, { "epoch": 0.54, "learning_rate": 2.729872246757408e-05, "loss": 1.3834, "step": 38800 }, { "epoch": 0.54, "learning_rate": 2.7291756641914766e-05, "loss": 1.3888, "step": 38900 }, { "epoch": 0.54, "learning_rate": 2.728479081625545e-05, "loss": 1.4012, "step": 39000 }, { "epoch": 0.54, "learning_rate": 2.7277824990596135e-05, "loss": 1.4121, "step": 39100 }, { "epoch": 0.55, "learning_rate": 2.727085916493682e-05, "loss": 1.3844, "step": 39200 }, { "epoch": 0.55, "learning_rate": 2.7263893339277505e-05, "loss": 1.348, "step": 39300 }, { "epoch": 0.55, "learning_rate": 2.725692751361819e-05, "loss": 1.4259, "step": 39400 }, { "epoch": 0.55, "learning_rate": 2.7249961687958874e-05, "loss": 1.3763, "step": 39500 }, { "epoch": 0.55, "learning_rate": 2.724299586229956e-05, "loss": 1.3797, "step": 39600 }, { "epoch": 0.55, "learning_rate": 2.7236030036640243e-05, "loss": 1.4056, "step": 39700 }, { "epoch": 0.55, "learning_rate": 2.722906421098093e-05, "loss": 1.3613, "step": 39800 }, { "epoch": 0.56, "learning_rate": 2.7222098385321613e-05, "loss": 1.387, "step": 39900 }, { "epoch": 0.56, "learning_rate": 2.72151325596623e-05, "loss": 1.3829, "step": 40000 }, { "epoch": 0.56, "learning_rate": 2.7208166734002982e-05, "loss": 1.4094, "step": 40100 }, { "epoch": 0.56, "learning_rate": 2.720120090834367e-05, "loss": 1.378, "step": 40200 }, { "epoch": 0.56, "learning_rate": 2.719423508268435e-05, "loss": 1.4012, "step": 40300 }, { "epoch": 0.56, "learning_rate": 2.7187269257025038e-05, "loss": 1.4169, "step": 40400 }, { "epoch": 0.56, "learning_rate": 2.718030343136572e-05, "loss": 1.3587, "step": 40500 }, { "epoch": 0.57, "learning_rate": 2.7173337605706407e-05, "loss": 1.4081, "step": 40600 }, { "epoch": 0.57, "learning_rate": 2.716637178004709e-05, "loss": 1.372, "step": 40700 }, { "epoch": 0.57, "learning_rate": 2.7159405954387773e-05, "loss": 1.3797, "step": 40800 }, { "epoch": 0.57, "learning_rate": 2.7152440128728456e-05, "loss": 1.3596, "step": 40900 }, { "epoch": 0.57, "learning_rate": 2.7145474303069142e-05, "loss": 1.3785, "step": 41000 }, { "epoch": 0.57, "learning_rate": 2.7138508477409825e-05, "loss": 1.3757, "step": 41100 }, { "epoch": 0.57, "learning_rate": 2.7131542651750512e-05, "loss": 1.3541, "step": 41200 }, { "epoch": 0.58, "learning_rate": 2.7124576826091195e-05, "loss": 1.359, "step": 41300 }, { "epoch": 0.58, "learning_rate": 2.711761100043188e-05, "loss": 1.3764, "step": 41400 }, { "epoch": 0.58, "learning_rate": 2.7110645174772564e-05, "loss": 1.397, "step": 41500 }, { "epoch": 0.58, "learning_rate": 2.710367934911325e-05, "loss": 1.378, "step": 41600 }, { "epoch": 0.58, "learning_rate": 2.7096713523453937e-05, "loss": 1.3919, "step": 41700 }, { "epoch": 0.58, "learning_rate": 2.708974769779462e-05, "loss": 1.3885, "step": 41800 }, { "epoch": 0.58, "learning_rate": 2.7082781872135306e-05, "loss": 1.3811, "step": 41900 }, { "epoch": 0.59, "learning_rate": 2.707581604647599e-05, "loss": 1.3357, "step": 42000 }, { "epoch": 0.59, "learning_rate": 2.7068850220816676e-05, "loss": 1.378, "step": 42100 }, { "epoch": 0.59, "learning_rate": 2.706188439515736e-05, "loss": 1.3892, "step": 42200 }, { "epoch": 0.59, "learning_rate": 2.7054918569498045e-05, "loss": 1.4016, "step": 42300 }, { "epoch": 0.59, "learning_rate": 2.7047952743838728e-05, "loss": 1.3872, "step": 42400 }, { "epoch": 0.59, "learning_rate": 2.7040986918179414e-05, "loss": 1.3525, "step": 42500 }, { "epoch": 0.59, "learning_rate": 2.7034021092520097e-05, "loss": 1.4103, "step": 42600 }, { "epoch": 0.59, "learning_rate": 2.7027055266860784e-05, "loss": 1.4118, "step": 42700 }, { "epoch": 0.6, "learning_rate": 2.7020089441201467e-05, "loss": 1.3327, "step": 42800 }, { "epoch": 0.6, "learning_rate": 2.7013123615542153e-05, "loss": 1.3728, "step": 42900 }, { "epoch": 0.6, "learning_rate": 2.7006157789882836e-05, "loss": 1.3841, "step": 43000 }, { "epoch": 0.6, "learning_rate": 2.699919196422352e-05, "loss": 1.3852, "step": 43100 }, { "epoch": 0.6, "learning_rate": 2.6992226138564202e-05, "loss": 1.3959, "step": 43200 }, { "epoch": 0.6, "learning_rate": 2.698526031290489e-05, "loss": 1.3464, "step": 43300 }, { "epoch": 0.6, "learning_rate": 2.697829448724557e-05, "loss": 1.3963, "step": 43400 }, { "epoch": 0.61, "learning_rate": 2.6971328661586258e-05, "loss": 1.3546, "step": 43500 }, { "epoch": 0.61, "learning_rate": 2.696436283592694e-05, "loss": 1.3473, "step": 43600 }, { "epoch": 0.61, "learning_rate": 2.6957397010267627e-05, "loss": 1.3486, "step": 43700 }, { "epoch": 0.61, "learning_rate": 2.695043118460831e-05, "loss": 1.3969, "step": 43800 }, { "epoch": 0.61, "learning_rate": 2.6943465358948997e-05, "loss": 1.3857, "step": 43900 }, { "epoch": 0.61, "learning_rate": 2.6936499533289683e-05, "loss": 1.3501, "step": 44000 }, { "epoch": 0.61, "learning_rate": 2.6929533707630366e-05, "loss": 1.3741, "step": 44100 }, { "epoch": 0.62, "learning_rate": 2.6922567881971052e-05, "loss": 1.3536, "step": 44200 }, { "epoch": 0.62, "learning_rate": 2.6915602056311735e-05, "loss": 1.3821, "step": 44300 }, { "epoch": 0.62, "learning_rate": 2.6908636230652422e-05, "loss": 1.3686, "step": 44400 }, { "epoch": 0.62, "learning_rate": 2.6901670404993105e-05, "loss": 1.3842, "step": 44500 }, { "epoch": 0.62, "learning_rate": 2.689470457933379e-05, "loss": 1.3715, "step": 44600 }, { "epoch": 0.62, "learning_rate": 2.6887738753674474e-05, "loss": 1.3883, "step": 44700 }, { "epoch": 0.62, "learning_rate": 2.688077292801516e-05, "loss": 1.3736, "step": 44800 }, { "epoch": 0.63, "learning_rate": 2.6873876760612437e-05, "loss": 1.4091, "step": 44900 }, { "epoch": 0.63, "learning_rate": 2.6866910934953123e-05, "loss": 1.3606, "step": 45000 }, { "epoch": 0.63, "learning_rate": 2.6859945109293806e-05, "loss": 1.3626, "step": 45100 }, { "epoch": 0.63, "learning_rate": 2.6852979283634493e-05, "loss": 1.3744, "step": 45200 }, { "epoch": 0.63, "learning_rate": 2.6846013457975176e-05, "loss": 1.3564, "step": 45300 }, { "epoch": 0.63, "learning_rate": 2.6839047632315862e-05, "loss": 1.3506, "step": 45400 }, { "epoch": 0.63, "learning_rate": 2.683208180665654e-05, "loss": 1.3395, "step": 45500 }, { "epoch": 0.64, "learning_rate": 2.6825115980997228e-05, "loss": 1.3471, "step": 45600 }, { "epoch": 0.64, "learning_rate": 2.681815015533791e-05, "loss": 1.3495, "step": 45700 }, { "epoch": 0.64, "learning_rate": 2.6811184329678597e-05, "loss": 1.3854, "step": 45800 }, { "epoch": 0.64, "learning_rate": 2.680421850401928e-05, "loss": 1.3284, "step": 45900 }, { "epoch": 0.64, "learning_rate": 2.6797252678359967e-05, "loss": 1.3682, "step": 46000 }, { "epoch": 0.64, "learning_rate": 2.679028685270065e-05, "loss": 1.3701, "step": 46100 }, { "epoch": 0.64, "learning_rate": 2.6783321027041336e-05, "loss": 1.3407, "step": 46200 }, { "epoch": 0.65, "learning_rate": 2.677635520138202e-05, "loss": 1.39, "step": 46300 }, { "epoch": 0.65, "learning_rate": 2.6769389375722705e-05, "loss": 1.3653, "step": 46400 }, { "epoch": 0.65, "learning_rate": 2.676242355006339e-05, "loss": 1.3297, "step": 46500 }, { "epoch": 0.65, "learning_rate": 2.6755457724404075e-05, "loss": 1.3955, "step": 46600 }, { "epoch": 0.65, "learning_rate": 2.6748491898744758e-05, "loss": 1.3503, "step": 46700 }, { "epoch": 0.65, "learning_rate": 2.6741526073085444e-05, "loss": 1.3589, "step": 46800 }, { "epoch": 0.65, "learning_rate": 2.6734560247426127e-05, "loss": 1.3556, "step": 46900 }, { "epoch": 0.65, "learning_rate": 2.6727594421766814e-05, "loss": 1.3429, "step": 47000 }, { "epoch": 0.66, "learning_rate": 2.6720628596107497e-05, "loss": 1.3697, "step": 47100 }, { "epoch": 0.66, "learning_rate": 2.6713662770448183e-05, "loss": 1.3673, "step": 47200 }, { "epoch": 0.66, "learning_rate": 2.670669694478887e-05, "loss": 1.392, "step": 47300 }, { "epoch": 0.66, "learning_rate": 2.6699731119129552e-05, "loss": 1.3497, "step": 47400 }, { "epoch": 0.66, "learning_rate": 2.669283495172683e-05, "loss": 1.3425, "step": 47500 }, { "epoch": 0.66, "learning_rate": 2.6685869126067515e-05, "loss": 1.3948, "step": 47600 }, { "epoch": 0.66, "learning_rate": 2.6678903300408198e-05, "loss": 1.3741, "step": 47700 }, { "epoch": 0.67, "learning_rate": 2.6671937474748884e-05, "loss": 1.348, "step": 47800 }, { "epoch": 0.67, "learning_rate": 2.6664971649089564e-05, "loss": 1.3932, "step": 47900 }, { "epoch": 0.67, "learning_rate": 2.665800582343025e-05, "loss": 1.3822, "step": 48000 }, { "epoch": 0.67, "learning_rate": 2.6651039997770937e-05, "loss": 1.3723, "step": 48100 }, { "epoch": 0.67, "learning_rate": 2.664407417211162e-05, "loss": 1.4158, "step": 48200 }, { "epoch": 0.67, "learning_rate": 2.6637108346452306e-05, "loss": 1.3831, "step": 48300 }, { "epoch": 0.67, "learning_rate": 2.663014252079299e-05, "loss": 1.3851, "step": 48400 }, { "epoch": 0.68, "learning_rate": 2.6623176695133676e-05, "loss": 1.3529, "step": 48500 }, { "epoch": 0.68, "learning_rate": 2.661621086947436e-05, "loss": 1.4082, "step": 48600 }, { "epoch": 0.68, "learning_rate": 2.6609245043815045e-05, "loss": 1.3375, "step": 48700 }, { "epoch": 0.68, "learning_rate": 2.6602279218155728e-05, "loss": 1.3496, "step": 48800 }, { "epoch": 0.68, "learning_rate": 2.6595313392496414e-05, "loss": 1.3226, "step": 48900 }, { "epoch": 0.68, "learning_rate": 2.6588347566837097e-05, "loss": 1.352, "step": 49000 }, { "epoch": 0.68, "learning_rate": 2.6581381741177784e-05, "loss": 1.3398, "step": 49100 }, { "epoch": 0.69, "learning_rate": 2.6574415915518467e-05, "loss": 1.3801, "step": 49200 }, { "epoch": 0.69, "learning_rate": 2.6567450089859153e-05, "loss": 1.3649, "step": 49300 }, { "epoch": 0.69, "learning_rate": 2.6560484264199836e-05, "loss": 1.3455, "step": 49400 }, { "epoch": 0.69, "learning_rate": 2.6553518438540522e-05, "loss": 1.34, "step": 49500 }, { "epoch": 0.69, "learning_rate": 2.6546552612881205e-05, "loss": 1.3499, "step": 49600 }, { "epoch": 0.69, "learning_rate": 2.6539586787221892e-05, "loss": 1.3164, "step": 49700 }, { "epoch": 0.69, "learning_rate": 2.6532620961562575e-05, "loss": 1.3259, "step": 49800 }, { "epoch": 0.7, "learning_rate": 2.652565513590326e-05, "loss": 1.3376, "step": 49900 }, { "epoch": 0.7, "learning_rate": 2.6518689310243944e-05, "loss": 1.3874, "step": 50000 }, { "epoch": 0.7, "learning_rate": 2.651172348458463e-05, "loss": 1.4079, "step": 50100 }, { "epoch": 0.7, "learning_rate": 2.650475765892531e-05, "loss": 1.4041, "step": 50200 }, { "epoch": 0.7, "learning_rate": 2.6497791833265996e-05, "loss": 1.3329, "step": 50300 }, { "epoch": 0.7, "learning_rate": 2.649082600760668e-05, "loss": 1.3223, "step": 50400 }, { "epoch": 0.7, "learning_rate": 2.6483860181947366e-05, "loss": 1.3544, "step": 50500 }, { "epoch": 0.7, "learning_rate": 2.6476894356288052e-05, "loss": 1.3594, "step": 50600 }, { "epoch": 0.71, "learning_rate": 2.6469928530628735e-05, "loss": 1.3821, "step": 50700 }, { "epoch": 0.71, "learning_rate": 2.646296270496942e-05, "loss": 1.3086, "step": 50800 }, { "epoch": 0.71, "learning_rate": 2.6455996879310105e-05, "loss": 1.3501, "step": 50900 }, { "epoch": 0.71, "learning_rate": 2.644903105365079e-05, "loss": 1.3729, "step": 51000 }, { "epoch": 0.71, "learning_rate": 2.6442065227991474e-05, "loss": 1.3819, "step": 51100 }, { "epoch": 0.71, "learning_rate": 2.643509940233216e-05, "loss": 1.3432, "step": 51200 }, { "epoch": 0.71, "learning_rate": 2.6428133576672843e-05, "loss": 1.3654, "step": 51300 }, { "epoch": 0.72, "learning_rate": 2.642116775101353e-05, "loss": 1.3374, "step": 51400 }, { "epoch": 0.72, "learning_rate": 2.6414201925354213e-05, "loss": 1.3297, "step": 51500 }, { "epoch": 0.72, "learning_rate": 2.6407305757951492e-05, "loss": 1.3748, "step": 51600 }, { "epoch": 0.72, "learning_rate": 2.6400339932292175e-05, "loss": 1.3697, "step": 51700 }, { "epoch": 0.72, "learning_rate": 2.6393374106632862e-05, "loss": 1.3465, "step": 51800 }, { "epoch": 0.72, "learning_rate": 2.6386408280973545e-05, "loss": 1.321, "step": 51900 }, { "epoch": 0.72, "learning_rate": 2.637944245531423e-05, "loss": 1.325, "step": 52000 }, { "epoch": 0.73, "learning_rate": 2.6372476629654914e-05, "loss": 1.3387, "step": 52100 }, { "epoch": 0.73, "learning_rate": 2.63655108039956e-05, "loss": 1.3562, "step": 52200 }, { "epoch": 0.73, "learning_rate": 2.6358614636592877e-05, "loss": 1.3304, "step": 52300 }, { "epoch": 0.73, "learning_rate": 2.6351648810933563e-05, "loss": 1.3403, "step": 52400 }, { "epoch": 0.73, "learning_rate": 2.634475264353084e-05, "loss": 1.3563, "step": 52500 }, { "epoch": 0.73, "learning_rate": 2.6337786817871523e-05, "loss": 1.3802, "step": 52600 }, { "epoch": 0.73, "learning_rate": 2.633082099221221e-05, "loss": 1.3703, "step": 52700 }, { "epoch": 0.74, "learning_rate": 2.6323855166552892e-05, "loss": 1.3479, "step": 52800 }, { "epoch": 0.74, "learning_rate": 2.6316889340893575e-05, "loss": 1.3605, "step": 52900 }, { "epoch": 0.74, "learning_rate": 2.630992351523426e-05, "loss": 1.3378, "step": 53000 }, { "epoch": 0.74, "learning_rate": 2.6302957689574945e-05, "loss": 1.3712, "step": 53100 }, { "epoch": 0.74, "learning_rate": 2.629599186391563e-05, "loss": 1.3279, "step": 53200 }, { "epoch": 0.74, "learning_rate": 2.6289026038256314e-05, "loss": 1.3342, "step": 53300 }, { "epoch": 0.74, "learning_rate": 2.6282060212597e-05, "loss": 1.3442, "step": 53400 }, { "epoch": 0.75, "learning_rate": 2.6275094386937683e-05, "loss": 1.3437, "step": 53500 }, { "epoch": 0.75, "learning_rate": 2.626812856127837e-05, "loss": 1.3576, "step": 53600 }, { "epoch": 0.75, "learning_rate": 2.6261162735619053e-05, "loss": 1.358, "step": 53700 }, { "epoch": 0.75, "learning_rate": 2.625419690995974e-05, "loss": 1.3469, "step": 53800 }, { "epoch": 0.75, "learning_rate": 2.6247231084300422e-05, "loss": 1.3949, "step": 53900 }, { "epoch": 0.75, "learning_rate": 2.624026525864111e-05, "loss": 1.3745, "step": 54000 }, { "epoch": 0.75, "learning_rate": 2.623329943298179e-05, "loss": 1.3844, "step": 54100 }, { "epoch": 0.76, "learning_rate": 2.6226333607322478e-05, "loss": 1.3565, "step": 54200 }, { "epoch": 0.76, "learning_rate": 2.621936778166316e-05, "loss": 1.3887, "step": 54300 }, { "epoch": 0.76, "learning_rate": 2.6212401956003847e-05, "loss": 1.3459, "step": 54400 }, { "epoch": 0.76, "learning_rate": 2.620543613034453e-05, "loss": 1.3812, "step": 54500 }, { "epoch": 0.76, "learning_rate": 2.6198470304685216e-05, "loss": 1.3449, "step": 54600 }, { "epoch": 0.76, "learning_rate": 2.61915044790259e-05, "loss": 1.3412, "step": 54700 }, { "epoch": 0.76, "learning_rate": 2.6184538653366586e-05, "loss": 1.3461, "step": 54800 }, { "epoch": 0.76, "learning_rate": 2.617757282770727e-05, "loss": 1.3561, "step": 54900 }, { "epoch": 0.77, "learning_rate": 2.6170607002047955e-05, "loss": 1.3813, "step": 55000 }, { "epoch": 0.77, "learning_rate": 2.6163641176388638e-05, "loss": 1.374, "step": 55100 }, { "epoch": 0.77, "learning_rate": 2.615667535072932e-05, "loss": 1.3128, "step": 55200 }, { "epoch": 0.77, "learning_rate": 2.6149709525070004e-05, "loss": 1.3356, "step": 55300 }, { "epoch": 0.77, "learning_rate": 2.614274369941069e-05, "loss": 1.3672, "step": 55400 }, { "epoch": 0.77, "learning_rate": 2.6135777873751377e-05, "loss": 1.3412, "step": 55500 }, { "epoch": 0.77, "learning_rate": 2.612881204809206e-05, "loss": 1.3583, "step": 55600 }, { "epoch": 0.78, "learning_rate": 2.6121846222432746e-05, "loss": 1.3645, "step": 55700 }, { "epoch": 0.78, "learning_rate": 2.611488039677343e-05, "loss": 1.3354, "step": 55800 }, { "epoch": 0.78, "learning_rate": 2.6107914571114116e-05, "loss": 1.383, "step": 55900 }, { "epoch": 0.78, "learning_rate": 2.61009487454548e-05, "loss": 1.2789, "step": 56000 }, { "epoch": 0.78, "learning_rate": 2.6093982919795485e-05, "loss": 1.3473, "step": 56100 }, { "epoch": 0.78, "learning_rate": 2.6087017094136168e-05, "loss": 1.3763, "step": 56200 }, { "epoch": 0.78, "learning_rate": 2.6080051268476854e-05, "loss": 1.3376, "step": 56300 }, { "epoch": 0.79, "learning_rate": 2.6073085442817537e-05, "loss": 1.3036, "step": 56400 }, { "epoch": 0.79, "learning_rate": 2.6066119617158224e-05, "loss": 1.3755, "step": 56500 }, { "epoch": 0.79, "learning_rate": 2.6059153791498907e-05, "loss": 1.3289, "step": 56600 }, { "epoch": 0.79, "learning_rate": 2.6052187965839593e-05, "loss": 1.357, "step": 56700 }, { "epoch": 0.79, "learning_rate": 2.6045222140180276e-05, "loss": 1.3371, "step": 56800 }, { "epoch": 0.79, "learning_rate": 2.6038256314520962e-05, "loss": 1.2948, "step": 56900 }, { "epoch": 0.79, "learning_rate": 2.6031290488861645e-05, "loss": 1.3417, "step": 57000 }, { "epoch": 0.8, "learning_rate": 2.6024324663202332e-05, "loss": 1.3512, "step": 57100 }, { "epoch": 0.8, "learning_rate": 2.6017358837543015e-05, "loss": 1.3313, "step": 57200 }, { "epoch": 0.8, "learning_rate": 2.60103930118837e-05, "loss": 1.3259, "step": 57300 }, { "epoch": 0.8, "learning_rate": 2.600342718622438e-05, "loss": 1.3269, "step": 57400 }, { "epoch": 0.8, "learning_rate": 2.5996461360565067e-05, "loss": 1.3432, "step": 57500 }, { "epoch": 0.8, "learning_rate": 2.598949553490575e-05, "loss": 1.3698, "step": 57600 }, { "epoch": 0.8, "learning_rate": 2.5982529709246437e-05, "loss": 1.3546, "step": 57700 }, { "epoch": 0.81, "learning_rate": 2.597556388358712e-05, "loss": 1.383, "step": 57800 }, { "epoch": 0.81, "learning_rate": 2.5968598057927806e-05, "loss": 1.2983, "step": 57900 }, { "epoch": 0.81, "learning_rate": 2.5961632232268492e-05, "loss": 1.3356, "step": 58000 }, { "epoch": 0.81, "learning_rate": 2.595473606486577e-05, "loss": 1.3308, "step": 58100 }, { "epoch": 0.81, "learning_rate": 2.5947770239206452e-05, "loss": 1.3321, "step": 58200 }, { "epoch": 0.81, "learning_rate": 2.5940804413547138e-05, "loss": 1.3815, "step": 58300 }, { "epoch": 0.81, "learning_rate": 2.593383858788782e-05, "loss": 1.3486, "step": 58400 }, { "epoch": 0.82, "learning_rate": 2.5926872762228507e-05, "loss": 1.3451, "step": 58500 }, { "epoch": 0.82, "learning_rate": 2.591990693656919e-05, "loss": 1.3521, "step": 58600 }, { "epoch": 0.82, "learning_rate": 2.5912941110909877e-05, "loss": 1.3315, "step": 58700 }, { "epoch": 0.82, "learning_rate": 2.5905975285250563e-05, "loss": 1.3411, "step": 58800 }, { "epoch": 0.82, "learning_rate": 2.5899009459591246e-05, "loss": 1.2954, "step": 58900 }, { "epoch": 0.82, "learning_rate": 2.5892043633931933e-05, "loss": 1.3433, "step": 59000 }, { "epoch": 0.82, "learning_rate": 2.5885077808272616e-05, "loss": 1.3422, "step": 59100 }, { "epoch": 0.82, "learning_rate": 2.5878111982613302e-05, "loss": 1.3384, "step": 59200 }, { "epoch": 0.83, "learning_rate": 2.5871146156953985e-05, "loss": 1.2941, "step": 59300 }, { "epoch": 0.83, "learning_rate": 2.586418033129467e-05, "loss": 1.3337, "step": 59400 }, { "epoch": 0.83, "learning_rate": 2.5857214505635354e-05, "loss": 1.3622, "step": 59500 }, { "epoch": 0.83, "learning_rate": 2.585024867997604e-05, "loss": 1.36, "step": 59600 }, { "epoch": 0.83, "learning_rate": 2.5843282854316724e-05, "loss": 1.336, "step": 59700 }, { "epoch": 0.83, "learning_rate": 2.583631702865741e-05, "loss": 1.3074, "step": 59800 }, { "epoch": 0.83, "learning_rate": 2.582935120299809e-05, "loss": 1.3693, "step": 59900 }, { "epoch": 0.84, "learning_rate": 2.5822385377338776e-05, "loss": 1.3477, "step": 60000 }, { "epoch": 0.84, "learning_rate": 2.581541955167946e-05, "loss": 1.3297, "step": 60100 }, { "epoch": 0.84, "learning_rate": 2.5808453726020145e-05, "loss": 1.3227, "step": 60200 }, { "epoch": 0.84, "learning_rate": 2.580148790036083e-05, "loss": 1.3525, "step": 60300 }, { "epoch": 0.84, "learning_rate": 2.5794522074701515e-05, "loss": 1.374, "step": 60400 }, { "epoch": 0.84, "learning_rate": 2.5787556249042198e-05, "loss": 1.3384, "step": 60500 }, { "epoch": 0.84, "learning_rate": 2.5780590423382884e-05, "loss": 1.3401, "step": 60600 }, { "epoch": 0.85, "learning_rate": 2.5773624597723567e-05, "loss": 1.3454, "step": 60700 }, { "epoch": 0.85, "learning_rate": 2.5766658772064253e-05, "loss": 1.3388, "step": 60800 }, { "epoch": 0.85, "learning_rate": 2.5759692946404936e-05, "loss": 1.3883, "step": 60900 }, { "epoch": 0.85, "learning_rate": 2.5752727120745623e-05, "loss": 1.3638, "step": 61000 }, { "epoch": 0.85, "learning_rate": 2.5745761295086306e-05, "loss": 1.3263, "step": 61100 }, { "epoch": 0.85, "learning_rate": 2.5738795469426992e-05, "loss": 1.3555, "step": 61200 }, { "epoch": 0.85, "learning_rate": 2.573182964376768e-05, "loss": 1.3402, "step": 61300 }, { "epoch": 0.86, "learning_rate": 2.572486381810836e-05, "loss": 1.3395, "step": 61400 }, { "epoch": 0.86, "learning_rate": 2.5717897992449048e-05, "loss": 1.3215, "step": 61500 }, { "epoch": 0.86, "learning_rate": 2.571093216678973e-05, "loss": 1.3557, "step": 61600 }, { "epoch": 0.86, "learning_rate": 2.5703966341130417e-05, "loss": 1.3658, "step": 61700 }, { "epoch": 0.86, "learning_rate": 2.56970005154711e-05, "loss": 1.3467, "step": 61800 }, { "epoch": 0.86, "learning_rate": 2.5690034689811787e-05, "loss": 1.3427, "step": 61900 }, { "epoch": 0.86, "learning_rate": 2.568306886415247e-05, "loss": 1.3249, "step": 62000 }, { "epoch": 0.87, "learning_rate": 2.5676103038493153e-05, "loss": 1.3471, "step": 62100 }, { "epoch": 0.87, "learning_rate": 2.5669206871090432e-05, "loss": 1.3272, "step": 62200 }, { "epoch": 0.87, "learning_rate": 2.5662241045431115e-05, "loss": 1.307, "step": 62300 }, { "epoch": 0.87, "learning_rate": 2.56552752197718e-05, "loss": 1.2977, "step": 62400 }, { "epoch": 0.87, "learning_rate": 2.5648309394112485e-05, "loss": 1.3312, "step": 62500 }, { "epoch": 0.87, "learning_rate": 2.5641343568453168e-05, "loss": 1.3112, "step": 62600 }, { "epoch": 0.87, "learning_rate": 2.5634377742793854e-05, "loss": 1.377, "step": 62700 }, { "epoch": 0.87, "learning_rate": 2.5627411917134537e-05, "loss": 1.3737, "step": 62800 }, { "epoch": 0.88, "learning_rate": 2.5620446091475224e-05, "loss": 1.3194, "step": 62900 }, { "epoch": 0.88, "learning_rate": 2.5613480265815907e-05, "loss": 1.3558, "step": 63000 }, { "epoch": 0.88, "learning_rate": 2.5606514440156593e-05, "loss": 1.3076, "step": 63100 }, { "epoch": 0.88, "learning_rate": 2.5599548614497276e-05, "loss": 1.3565, "step": 63200 }, { "epoch": 0.88, "learning_rate": 2.5592582788837962e-05, "loss": 1.3515, "step": 63300 }, { "epoch": 0.88, "learning_rate": 2.5585616963178645e-05, "loss": 1.3132, "step": 63400 }, { "epoch": 0.88, "learning_rate": 2.557865113751933e-05, "loss": 1.3345, "step": 63500 }, { "epoch": 0.89, "learning_rate": 2.5571685311860015e-05, "loss": 1.3776, "step": 63600 }, { "epoch": 0.89, "learning_rate": 2.55647194862007e-05, "loss": 1.3238, "step": 63700 }, { "epoch": 0.89, "learning_rate": 2.5557753660541384e-05, "loss": 1.3039, "step": 63800 }, { "epoch": 0.89, "learning_rate": 2.555078783488207e-05, "loss": 1.3506, "step": 63900 }, { "epoch": 0.89, "learning_rate": 2.5543822009222753e-05, "loss": 1.3469, "step": 64000 }, { "epoch": 0.89, "learning_rate": 2.553685618356344e-05, "loss": 1.3297, "step": 64100 }, { "epoch": 0.89, "learning_rate": 2.5529890357904123e-05, "loss": 1.3315, "step": 64200 }, { "epoch": 0.9, "learning_rate": 2.5522994190501403e-05, "loss": 1.3083, "step": 64300 }, { "epoch": 0.9, "learning_rate": 2.5516028364842086e-05, "loss": 1.3433, "step": 64400 }, { "epoch": 0.9, "learning_rate": 2.5509062539182772e-05, "loss": 1.3406, "step": 64500 }, { "epoch": 0.9, "learning_rate": 2.5502096713523455e-05, "loss": 1.3269, "step": 64600 }, { "epoch": 0.9, "learning_rate": 2.5495130887864138e-05, "loss": 1.3314, "step": 64700 }, { "epoch": 0.9, "learning_rate": 2.548816506220482e-05, "loss": 1.3872, "step": 64800 }, { "epoch": 0.9, "learning_rate": 2.5481199236545507e-05, "loss": 1.3365, "step": 64900 }, { "epoch": 0.91, "learning_rate": 2.547423341088619e-05, "loss": 1.3184, "step": 65000 }, { "epoch": 0.91, "learning_rate": 2.5467267585226877e-05, "loss": 1.2843, "step": 65100 }, { "epoch": 0.91, "learning_rate": 2.5460301759567563e-05, "loss": 1.3652, "step": 65200 }, { "epoch": 0.91, "learning_rate": 2.5453335933908246e-05, "loss": 1.3367, "step": 65300 }, { "epoch": 0.91, "learning_rate": 2.5446370108248932e-05, "loss": 1.3429, "step": 65400 }, { "epoch": 0.91, "learning_rate": 2.5439404282589615e-05, "loss": 1.3395, "step": 65500 }, { "epoch": 0.91, "learning_rate": 2.5432438456930302e-05, "loss": 1.329, "step": 65600 }, { "epoch": 0.92, "learning_rate": 2.5425542289527578e-05, "loss": 1.31, "step": 65700 }, { "epoch": 0.92, "learning_rate": 2.541857646386826e-05, "loss": 1.3535, "step": 65800 }, { "epoch": 0.92, "learning_rate": 2.5411610638208948e-05, "loss": 1.3204, "step": 65900 }, { "epoch": 0.92, "learning_rate": 2.540464481254963e-05, "loss": 1.3465, "step": 66000 }, { "epoch": 0.92, "learning_rate": 2.5397678986890317e-05, "loss": 1.3004, "step": 66100 }, { "epoch": 0.92, "learning_rate": 2.5390713161231003e-05, "loss": 1.3113, "step": 66200 }, { "epoch": 0.92, "learning_rate": 2.5383747335571686e-05, "loss": 1.3437, "step": 66300 }, { "epoch": 0.93, "learning_rate": 2.5376781509912373e-05, "loss": 1.3098, "step": 66400 }, { "epoch": 0.93, "learning_rate": 2.5369815684253056e-05, "loss": 1.3631, "step": 66500 }, { "epoch": 0.93, "learning_rate": 2.5362849858593742e-05, "loss": 1.3486, "step": 66600 }, { "epoch": 0.93, "learning_rate": 2.5355884032934425e-05, "loss": 1.3174, "step": 66700 }, { "epoch": 0.93, "learning_rate": 2.534891820727511e-05, "loss": 1.3509, "step": 66800 }, { "epoch": 0.93, "learning_rate": 2.5341952381615794e-05, "loss": 1.3222, "step": 66900 }, { "epoch": 0.93, "learning_rate": 2.533498655595648e-05, "loss": 1.3343, "step": 67000 }, { "epoch": 0.93, "learning_rate": 2.532802073029716e-05, "loss": 1.341, "step": 67100 }, { "epoch": 0.94, "learning_rate": 2.5321054904637847e-05, "loss": 1.3564, "step": 67200 }, { "epoch": 0.94, "learning_rate": 2.531408907897853e-05, "loss": 1.3297, "step": 67300 }, { "epoch": 0.94, "learning_rate": 2.5307123253319216e-05, "loss": 1.3156, "step": 67400 }, { "epoch": 0.94, "learning_rate": 2.53001574276599e-05, "loss": 1.309, "step": 67500 }, { "epoch": 0.94, "learning_rate": 2.5293191602000585e-05, "loss": 1.2972, "step": 67600 }, { "epoch": 0.94, "learning_rate": 2.528622577634127e-05, "loss": 1.2935, "step": 67700 }, { "epoch": 0.94, "learning_rate": 2.5279259950681955e-05, "loss": 1.3517, "step": 67800 }, { "epoch": 0.95, "learning_rate": 2.5272294125022638e-05, "loss": 1.3298, "step": 67900 }, { "epoch": 0.95, "learning_rate": 2.5265328299363324e-05, "loss": 1.3111, "step": 68000 }, { "epoch": 0.95, "learning_rate": 2.5258362473704007e-05, "loss": 1.2882, "step": 68100 }, { "epoch": 0.95, "learning_rate": 2.5251396648044694e-05, "loss": 1.3066, "step": 68200 }, { "epoch": 0.95, "learning_rate": 2.5244430822385377e-05, "loss": 1.3078, "step": 68300 }, { "epoch": 0.95, "learning_rate": 2.5237464996726063e-05, "loss": 1.3353, "step": 68400 }, { "epoch": 0.95, "learning_rate": 2.523049917106675e-05, "loss": 1.3302, "step": 68500 }, { "epoch": 0.96, "learning_rate": 2.5223533345407432e-05, "loss": 1.3026, "step": 68600 }, { "epoch": 0.96, "learning_rate": 2.521656751974812e-05, "loss": 1.3094, "step": 68700 }, { "epoch": 0.96, "learning_rate": 2.52096016940888e-05, "loss": 1.301, "step": 68800 }, { "epoch": 0.96, "learning_rate": 2.5202635868429488e-05, "loss": 1.3381, "step": 68900 }, { "epoch": 0.96, "learning_rate": 2.519567004277017e-05, "loss": 1.296, "step": 69000 }, { "epoch": 0.96, "learning_rate": 2.5188704217110857e-05, "loss": 1.3327, "step": 69100 }, { "epoch": 0.96, "learning_rate": 2.518173839145154e-05, "loss": 1.3504, "step": 69200 }, { "epoch": 0.97, "learning_rate": 2.5174772565792227e-05, "loss": 1.329, "step": 69300 }, { "epoch": 0.97, "learning_rate": 2.5167806740132906e-05, "loss": 1.3304, "step": 69400 }, { "epoch": 0.97, "learning_rate": 2.5160840914473593e-05, "loss": 1.3442, "step": 69500 }, { "epoch": 0.97, "learning_rate": 2.5153875088814276e-05, "loss": 1.2983, "step": 69600 }, { "epoch": 0.97, "learning_rate": 2.5146909263154962e-05, "loss": 1.3069, "step": 69700 }, { "epoch": 0.97, "learning_rate": 2.5139943437495645e-05, "loss": 1.3425, "step": 69800 }, { "epoch": 0.97, "learning_rate": 2.5133116928349515e-05, "loss": 1.2985, "step": 69900 }, { "epoch": 0.98, "learning_rate": 2.51261511026902e-05, "loss": 1.3073, "step": 70000 }, { "epoch": 0.98, "learning_rate": 2.5119254935287478e-05, "loss": 1.3266, "step": 70100 }, { "epoch": 0.98, "learning_rate": 2.5112289109628164e-05, "loss": 1.3149, "step": 70200 }, { "epoch": 0.98, "learning_rate": 2.5105323283968847e-05, "loss": 1.352, "step": 70300 }, { "epoch": 0.98, "learning_rate": 2.5098357458309534e-05, "loss": 1.3348, "step": 70400 }, { "epoch": 0.98, "learning_rate": 2.5091391632650217e-05, "loss": 1.3357, "step": 70500 }, { "epoch": 0.98, "learning_rate": 2.5084425806990903e-05, "loss": 1.3143, "step": 70600 }, { "epoch": 0.98, "learning_rate": 2.5077459981331586e-05, "loss": 1.3632, "step": 70700 }, { "epoch": 0.99, "learning_rate": 2.5070494155672272e-05, "loss": 1.3142, "step": 70800 }, { "epoch": 0.99, "learning_rate": 2.506352833001296e-05, "loss": 1.2954, "step": 70900 }, { "epoch": 0.99, "learning_rate": 2.505656250435364e-05, "loss": 1.3081, "step": 71000 }, { "epoch": 0.99, "learning_rate": 2.5049596678694328e-05, "loss": 1.2836, "step": 71100 }, { "epoch": 0.99, "learning_rate": 2.504263085303501e-05, "loss": 1.3663, "step": 71200 }, { "epoch": 0.99, "learning_rate": 2.5035665027375697e-05, "loss": 1.3225, "step": 71300 }, { "epoch": 0.99, "learning_rate": 2.502869920171638e-05, "loss": 1.2865, "step": 71400 }, { "epoch": 1.0, "learning_rate": 2.5021733376057067e-05, "loss": 1.3454, "step": 71500 }, { "epoch": 1.0, "learning_rate": 2.501476755039775e-05, "loss": 1.3092, "step": 71600 }, { "epoch": 1.0, "learning_rate": 2.5007801724738436e-05, "loss": 1.2949, "step": 71700 }, { "epoch": 1.0, "eval_gen_len": 20.0, "eval_loss": 1.208003044128418, "eval_rouge1": 11.7171, "eval_rouge2": 3.3284, "eval_rougeL": 11.3209, "eval_rougeLsum": 11.4022, "eval_runtime": 1534.4697, "eval_samples_per_second": 8.712, "eval_steps_per_second": 2.178, "step": 71779 }, { "epoch": 1.0, "learning_rate": 2.500083589907912e-05, "loss": 1.2763, "step": 71800 }, { "epoch": 1.0, "learning_rate": 2.4993870073419805e-05, "loss": 1.2162, "step": 71900 }, { "epoch": 1.0, "learning_rate": 2.498690424776049e-05, "loss": 1.1719, "step": 72000 }, { "epoch": 1.0, "learning_rate": 2.497993842210117e-05, "loss": 1.2228, "step": 72100 }, { "epoch": 1.01, "learning_rate": 2.4972972596441854e-05, "loss": 1.2252, "step": 72200 }, { "epoch": 1.01, "learning_rate": 2.496600677078254e-05, "loss": 1.1962, "step": 72300 }, { "epoch": 1.01, "learning_rate": 2.4959040945123224e-05, "loss": 1.1819, "step": 72400 }, { "epoch": 1.01, "learning_rate": 2.495207511946391e-05, "loss": 1.1982, "step": 72500 }, { "epoch": 1.01, "learning_rate": 2.4945109293804593e-05, "loss": 1.2265, "step": 72600 }, { "epoch": 1.01, "learning_rate": 2.493814346814528e-05, "loss": 1.1959, "step": 72700 }, { "epoch": 1.01, "learning_rate": 2.4931177642485963e-05, "loss": 1.176, "step": 72800 }, { "epoch": 1.02, "learning_rate": 2.492421181682665e-05, "loss": 1.2382, "step": 72900 }, { "epoch": 1.02, "learning_rate": 2.4917245991167332e-05, "loss": 1.2064, "step": 73000 }, { "epoch": 1.02, "learning_rate": 2.4910280165508018e-05, "loss": 1.2151, "step": 73100 }, { "epoch": 1.02, "learning_rate": 2.49033143398487e-05, "loss": 1.1908, "step": 73200 }, { "epoch": 1.02, "learning_rate": 2.4896348514189388e-05, "loss": 1.2045, "step": 73300 }, { "epoch": 1.02, "learning_rate": 2.4889382688530074e-05, "loss": 1.2272, "step": 73400 }, { "epoch": 1.02, "learning_rate": 2.4882416862870757e-05, "loss": 1.1965, "step": 73500 }, { "epoch": 1.03, "learning_rate": 2.4875451037211443e-05, "loss": 1.2207, "step": 73600 }, { "epoch": 1.03, "learning_rate": 2.4868485211552126e-05, "loss": 1.1842, "step": 73700 }, { "epoch": 1.03, "learning_rate": 2.4861519385892813e-05, "loss": 1.2278, "step": 73800 }, { "epoch": 1.03, "learning_rate": 2.4854553560233496e-05, "loss": 1.2294, "step": 73900 }, { "epoch": 1.03, "learning_rate": 2.4847587734574182e-05, "loss": 1.2443, "step": 74000 }, { "epoch": 1.03, "learning_rate": 2.4840621908914865e-05, "loss": 1.2025, "step": 74100 }, { "epoch": 1.03, "learning_rate": 2.483365608325555e-05, "loss": 1.2018, "step": 74200 }, { "epoch": 1.04, "learning_rate": 2.4826690257596234e-05, "loss": 1.202, "step": 74300 }, { "epoch": 1.04, "learning_rate": 2.4819724431936917e-05, "loss": 1.1752, "step": 74400 }, { "epoch": 1.04, "learning_rate": 2.48127586062776e-05, "loss": 1.2313, "step": 74500 }, { "epoch": 1.04, "learning_rate": 2.4805792780618287e-05, "loss": 1.1882, "step": 74600 }, { "epoch": 1.04, "learning_rate": 2.479882695495897e-05, "loss": 1.2348, "step": 74700 }, { "epoch": 1.04, "learning_rate": 2.4791861129299656e-05, "loss": 1.2377, "step": 74800 }, { "epoch": 1.04, "learning_rate": 2.478489530364034e-05, "loss": 1.1769, "step": 74900 }, { "epoch": 1.04, "learning_rate": 2.4777929477981026e-05, "loss": 1.2058, "step": 75000 }, { "epoch": 1.05, "learning_rate": 2.477096365232171e-05, "loss": 1.1412, "step": 75100 }, { "epoch": 1.05, "learning_rate": 2.4763997826662395e-05, "loss": 1.2086, "step": 75200 }, { "epoch": 1.05, "learning_rate": 2.4757032001003078e-05, "loss": 1.2231, "step": 75300 }, { "epoch": 1.05, "learning_rate": 2.4750066175343764e-05, "loss": 1.2127, "step": 75400 }, { "epoch": 1.05, "learning_rate": 2.4743100349684447e-05, "loss": 1.2203, "step": 75500 }, { "epoch": 1.05, "learning_rate": 2.4736134524025134e-05, "loss": 1.2357, "step": 75600 }, { "epoch": 1.05, "learning_rate": 2.4729168698365817e-05, "loss": 1.1935, "step": 75700 }, { "epoch": 1.06, "learning_rate": 2.4722202872706503e-05, "loss": 1.242, "step": 75800 }, { "epoch": 1.06, "learning_rate": 2.471523704704719e-05, "loss": 1.2526, "step": 75900 }, { "epoch": 1.06, "learning_rate": 2.4708271221387872e-05, "loss": 1.2058, "step": 76000 }, { "epoch": 1.06, "learning_rate": 2.470137505398515e-05, "loss": 1.2012, "step": 76100 }, { "epoch": 1.06, "learning_rate": 2.4694409228325835e-05, "loss": 1.2217, "step": 76200 }, { "epoch": 1.06, "learning_rate": 2.4687443402666518e-05, "loss": 1.2121, "step": 76300 }, { "epoch": 1.06, "learning_rate": 2.4680477577007205e-05, "loss": 1.2092, "step": 76400 }, { "epoch": 1.07, "learning_rate": 2.4673511751347888e-05, "loss": 1.2415, "step": 76500 }, { "epoch": 1.07, "learning_rate": 2.4666545925688574e-05, "loss": 1.219, "step": 76600 }, { "epoch": 1.07, "learning_rate": 2.465958010002926e-05, "loss": 1.2309, "step": 76700 }, { "epoch": 1.07, "learning_rate": 2.465261427436994e-05, "loss": 1.2061, "step": 76800 }, { "epoch": 1.07, "learning_rate": 2.4645648448710626e-05, "loss": 1.2408, "step": 76900 }, { "epoch": 1.07, "learning_rate": 2.463868262305131e-05, "loss": 1.1982, "step": 77000 }, { "epoch": 1.07, "learning_rate": 2.4631716797391996e-05, "loss": 1.2145, "step": 77100 }, { "epoch": 1.08, "learning_rate": 2.462475097173268e-05, "loss": 1.2393, "step": 77200 }, { "epoch": 1.08, "learning_rate": 2.4617785146073365e-05, "loss": 1.2282, "step": 77300 }, { "epoch": 1.08, "learning_rate": 2.4610819320414048e-05, "loss": 1.2315, "step": 77400 }, { "epoch": 1.08, "learning_rate": 2.4603853494754734e-05, "loss": 1.1694, "step": 77500 }, { "epoch": 1.08, "learning_rate": 2.4596887669095417e-05, "loss": 1.2348, "step": 77600 }, { "epoch": 1.08, "learning_rate": 2.4589921843436104e-05, "loss": 1.2112, "step": 77700 }, { "epoch": 1.08, "learning_rate": 2.4582956017776787e-05, "loss": 1.1807, "step": 77800 }, { "epoch": 1.09, "learning_rate": 2.4575990192117473e-05, "loss": 1.197, "step": 77900 }, { "epoch": 1.09, "learning_rate": 2.4569024366458156e-05, "loss": 1.2154, "step": 78000 }, { "epoch": 1.09, "learning_rate": 2.4562058540798843e-05, "loss": 1.2056, "step": 78100 }, { "epoch": 1.09, "learning_rate": 2.455516237339612e-05, "loss": 1.23, "step": 78200 }, { "epoch": 1.09, "learning_rate": 2.4548196547736805e-05, "loss": 1.2178, "step": 78300 }, { "epoch": 1.09, "learning_rate": 2.4541230722077488e-05, "loss": 1.1859, "step": 78400 }, { "epoch": 1.09, "learning_rate": 2.4534264896418175e-05, "loss": 1.2113, "step": 78500 }, { "epoch": 1.1, "learning_rate": 2.4527299070758858e-05, "loss": 1.1367, "step": 78600 }, { "epoch": 1.1, "learning_rate": 2.4520333245099544e-05, "loss": 1.2035, "step": 78700 }, { "epoch": 1.1, "learning_rate": 2.4513367419440227e-05, "loss": 1.1868, "step": 78800 }, { "epoch": 1.1, "learning_rate": 2.4506401593780913e-05, "loss": 1.2222, "step": 78900 }, { "epoch": 1.1, "learning_rate": 2.4499435768121596e-05, "loss": 1.1801, "step": 79000 }, { "epoch": 1.1, "learning_rate": 2.4492469942462283e-05, "loss": 1.2566, "step": 79100 }, { "epoch": 1.1, "learning_rate": 2.4485504116802962e-05, "loss": 1.2105, "step": 79200 }, { "epoch": 1.1, "learning_rate": 2.447853829114365e-05, "loss": 1.209, "step": 79300 }, { "epoch": 1.11, "learning_rate": 2.4471642123740925e-05, "loss": 1.2341, "step": 79400 }, { "epoch": 1.11, "learning_rate": 2.446467629808161e-05, "loss": 1.212, "step": 79500 }, { "epoch": 1.11, "learning_rate": 2.4457710472422295e-05, "loss": 1.2183, "step": 79600 }, { "epoch": 1.11, "learning_rate": 2.445074464676298e-05, "loss": 1.2142, "step": 79700 }, { "epoch": 1.11, "learning_rate": 2.4443778821103664e-05, "loss": 1.2149, "step": 79800 }, { "epoch": 1.11, "learning_rate": 2.443681299544435e-05, "loss": 1.2276, "step": 79900 }, { "epoch": 1.11, "learning_rate": 2.4429847169785033e-05, "loss": 1.2213, "step": 80000 }, { "epoch": 1.12, "learning_rate": 2.442288134412572e-05, "loss": 1.2164, "step": 80100 }, { "epoch": 1.12, "learning_rate": 2.4415915518466403e-05, "loss": 1.2111, "step": 80200 }, { "epoch": 1.12, "learning_rate": 2.440894969280709e-05, "loss": 1.2367, "step": 80300 }, { "epoch": 1.12, "learning_rate": 2.4401983867147772e-05, "loss": 1.1896, "step": 80400 }, { "epoch": 1.12, "learning_rate": 2.439501804148846e-05, "loss": 1.2185, "step": 80500 }, { "epoch": 1.12, "learning_rate": 2.438805221582914e-05, "loss": 1.2104, "step": 80600 }, { "epoch": 1.12, "learning_rate": 2.4381086390169828e-05, "loss": 1.1925, "step": 80700 }, { "epoch": 1.13, "learning_rate": 2.4374120564510514e-05, "loss": 1.2154, "step": 80800 }, { "epoch": 1.13, "learning_rate": 2.4367154738851197e-05, "loss": 1.189, "step": 80900 }, { "epoch": 1.13, "learning_rate": 2.4360188913191884e-05, "loss": 1.2177, "step": 81000 }, { "epoch": 1.13, "learning_rate": 2.4353223087532566e-05, "loss": 1.2071, "step": 81100 }, { "epoch": 1.13, "learning_rate": 2.4346257261873253e-05, "loss": 1.2237, "step": 81200 }, { "epoch": 1.13, "learning_rate": 2.4339291436213936e-05, "loss": 1.2119, "step": 81300 }, { "epoch": 1.13, "learning_rate": 2.4332325610554622e-05, "loss": 1.2448, "step": 81400 }, { "epoch": 1.14, "learning_rate": 2.4325359784895305e-05, "loss": 1.1896, "step": 81500 }, { "epoch": 1.14, "learning_rate": 2.431839395923599e-05, "loss": 1.1852, "step": 81600 }, { "epoch": 1.14, "learning_rate": 2.431142813357667e-05, "loss": 1.2208, "step": 81700 }, { "epoch": 1.14, "learning_rate": 2.4304462307917358e-05, "loss": 1.2055, "step": 81800 }, { "epoch": 1.14, "learning_rate": 2.429749648225804e-05, "loss": 1.2003, "step": 81900 }, { "epoch": 1.14, "learning_rate": 2.4290530656598727e-05, "loss": 1.2091, "step": 82000 }, { "epoch": 1.14, "learning_rate": 2.428356483093941e-05, "loss": 1.1914, "step": 82100 }, { "epoch": 1.15, "learning_rate": 2.4276599005280096e-05, "loss": 1.2248, "step": 82200 }, { "epoch": 1.15, "learning_rate": 2.426963317962078e-05, "loss": 1.2079, "step": 82300 }, { "epoch": 1.15, "learning_rate": 2.4262667353961466e-05, "loss": 1.2115, "step": 82400 }, { "epoch": 1.15, "learning_rate": 2.425570152830215e-05, "loss": 1.1846, "step": 82500 }, { "epoch": 1.15, "learning_rate": 2.4248735702642835e-05, "loss": 1.196, "step": 82600 }, { "epoch": 1.15, "learning_rate": 2.4241769876983518e-05, "loss": 1.2216, "step": 82700 }, { "epoch": 1.15, "learning_rate": 2.4234804051324204e-05, "loss": 1.1833, "step": 82800 }, { "epoch": 1.15, "learning_rate": 2.4227838225664887e-05, "loss": 1.2104, "step": 82900 }, { "epoch": 1.16, "learning_rate": 2.4220872400005574e-05, "loss": 1.2132, "step": 83000 }, { "epoch": 1.16, "learning_rate": 2.4213906574346257e-05, "loss": 1.2086, "step": 83100 }, { "epoch": 1.16, "learning_rate": 2.4206940748686943e-05, "loss": 1.1941, "step": 83200 }, { "epoch": 1.16, "learning_rate": 2.419997492302763e-05, "loss": 1.191, "step": 83300 }, { "epoch": 1.16, "learning_rate": 2.4193009097368313e-05, "loss": 1.1874, "step": 83400 }, { "epoch": 1.16, "learning_rate": 2.4186043271709e-05, "loss": 1.2118, "step": 83500 }, { "epoch": 1.16, "learning_rate": 2.4179077446049682e-05, "loss": 1.1804, "step": 83600 }, { "epoch": 1.17, "learning_rate": 2.4172111620390368e-05, "loss": 1.1734, "step": 83700 }, { "epoch": 1.17, "learning_rate": 2.4165215452987645e-05, "loss": 1.1996, "step": 83800 }, { "epoch": 1.17, "learning_rate": 2.4158249627328328e-05, "loss": 1.156, "step": 83900 }, { "epoch": 1.17, "learning_rate": 2.4151283801669014e-05, "loss": 1.2506, "step": 84000 }, { "epoch": 1.17, "learning_rate": 2.4144317976009697e-05, "loss": 1.2064, "step": 84100 }, { "epoch": 1.17, "learning_rate": 2.413735215035038e-05, "loss": 1.1956, "step": 84200 }, { "epoch": 1.17, "learning_rate": 2.4130386324691066e-05, "loss": 1.2134, "step": 84300 }, { "epoch": 1.18, "learning_rate": 2.412342049903175e-05, "loss": 1.1889, "step": 84400 }, { "epoch": 1.18, "learning_rate": 2.4116454673372436e-05, "loss": 1.2413, "step": 84500 }, { "epoch": 1.18, "learning_rate": 2.410948884771312e-05, "loss": 1.1893, "step": 84600 }, { "epoch": 1.18, "learning_rate": 2.4102523022053805e-05, "loss": 1.2346, "step": 84700 }, { "epoch": 1.18, "learning_rate": 2.4095557196394488e-05, "loss": 1.2005, "step": 84800 }, { "epoch": 1.18, "learning_rate": 2.4088591370735175e-05, "loss": 1.2187, "step": 84900 }, { "epoch": 1.18, "learning_rate": 2.4081625545075858e-05, "loss": 1.2475, "step": 85000 }, { "epoch": 1.19, "learning_rate": 2.4074659719416544e-05, "loss": 1.1655, "step": 85100 }, { "epoch": 1.19, "learning_rate": 2.4067693893757227e-05, "loss": 1.2359, "step": 85200 }, { "epoch": 1.19, "learning_rate": 2.4060728068097913e-05, "loss": 1.2474, "step": 85300 }, { "epoch": 1.19, "learning_rate": 2.4053762242438596e-05, "loss": 1.2113, "step": 85400 }, { "epoch": 1.19, "learning_rate": 2.4046796416779283e-05, "loss": 1.2017, "step": 85500 }, { "epoch": 1.19, "learning_rate": 2.4039830591119966e-05, "loss": 1.1962, "step": 85600 }, { "epoch": 1.19, "learning_rate": 2.4032864765460652e-05, "loss": 1.2242, "step": 85700 }, { "epoch": 1.2, "learning_rate": 2.4025898939801335e-05, "loss": 1.1803, "step": 85800 }, { "epoch": 1.2, "learning_rate": 2.4019002772398615e-05, "loss": 1.2171, "step": 85900 }, { "epoch": 1.2, "learning_rate": 2.4012036946739298e-05, "loss": 1.2158, "step": 86000 }, { "epoch": 1.2, "learning_rate": 2.4005071121079984e-05, "loss": 1.2223, "step": 86100 }, { "epoch": 1.2, "learning_rate": 2.3998105295420667e-05, "loss": 1.234, "step": 86200 }, { "epoch": 1.2, "learning_rate": 2.3991139469761354e-05, "loss": 1.1993, "step": 86300 }, { "epoch": 1.2, "learning_rate": 2.3984173644102037e-05, "loss": 1.2003, "step": 86400 }, { "epoch": 1.21, "learning_rate": 2.397720781844272e-05, "loss": 1.2102, "step": 86500 }, { "epoch": 1.21, "learning_rate": 2.3970241992783402e-05, "loss": 1.1924, "step": 86600 }, { "epoch": 1.21, "learning_rate": 2.396327616712409e-05, "loss": 1.2074, "step": 86700 }, { "epoch": 1.21, "learning_rate": 2.3956310341464772e-05, "loss": 1.2076, "step": 86800 }, { "epoch": 1.21, "learning_rate": 2.3949344515805458e-05, "loss": 1.1719, "step": 86900 }, { "epoch": 1.21, "learning_rate": 2.394237869014614e-05, "loss": 1.171, "step": 87000 }, { "epoch": 1.21, "learning_rate": 2.3935412864486828e-05, "loss": 1.2379, "step": 87100 }, { "epoch": 1.21, "learning_rate": 2.3928447038827514e-05, "loss": 1.1785, "step": 87200 }, { "epoch": 1.22, "learning_rate": 2.3921481213168197e-05, "loss": 1.2119, "step": 87300 }, { "epoch": 1.22, "learning_rate": 2.3914515387508883e-05, "loss": 1.1969, "step": 87400 }, { "epoch": 1.22, "learning_rate": 2.3907549561849566e-05, "loss": 1.1893, "step": 87500 }, { "epoch": 1.22, "learning_rate": 2.3900583736190253e-05, "loss": 1.2106, "step": 87600 }, { "epoch": 1.22, "learning_rate": 2.3893617910530936e-05, "loss": 1.2031, "step": 87700 }, { "epoch": 1.22, "learning_rate": 2.3886652084871622e-05, "loss": 1.2224, "step": 87800 }, { "epoch": 1.22, "learning_rate": 2.3879686259212305e-05, "loss": 1.1619, "step": 87900 }, { "epoch": 1.23, "learning_rate": 2.387279009180958e-05, "loss": 1.2235, "step": 88000 }, { "epoch": 1.23, "learning_rate": 2.3865824266150268e-05, "loss": 1.2187, "step": 88100 }, { "epoch": 1.23, "learning_rate": 2.3858858440490954e-05, "loss": 1.1788, "step": 88200 }, { "epoch": 1.23, "learning_rate": 2.385196227308823e-05, "loss": 1.1814, "step": 88300 }, { "epoch": 1.23, "learning_rate": 2.3844996447428914e-05, "loss": 1.1988, "step": 88400 }, { "epoch": 1.23, "learning_rate": 2.38380306217696e-05, "loss": 1.1783, "step": 88500 }, { "epoch": 1.23, "learning_rate": 2.3831064796110283e-05, "loss": 1.1934, "step": 88600 }, { "epoch": 1.24, "learning_rate": 2.382409897045097e-05, "loss": 1.2138, "step": 88700 }, { "epoch": 1.24, "learning_rate": 2.3817133144791652e-05, "loss": 1.2141, "step": 88800 }, { "epoch": 1.24, "learning_rate": 2.381016731913234e-05, "loss": 1.262, "step": 88900 }, { "epoch": 1.24, "learning_rate": 2.3803201493473025e-05, "loss": 1.2314, "step": 89000 }, { "epoch": 1.24, "learning_rate": 2.3796235667813705e-05, "loss": 1.1988, "step": 89100 }, { "epoch": 1.24, "learning_rate": 2.378926984215439e-05, "loss": 1.2241, "step": 89200 }, { "epoch": 1.24, "learning_rate": 2.3782304016495074e-05, "loss": 1.2096, "step": 89300 }, { "epoch": 1.25, "learning_rate": 2.377533819083576e-05, "loss": 1.2187, "step": 89400 }, { "epoch": 1.25, "learning_rate": 2.3768372365176443e-05, "loss": 1.1916, "step": 89500 }, { "epoch": 1.25, "learning_rate": 2.376140653951713e-05, "loss": 1.1877, "step": 89600 }, { "epoch": 1.25, "learning_rate": 2.3754440713857813e-05, "loss": 1.2055, "step": 89700 }, { "epoch": 1.25, "learning_rate": 2.37474748881985e-05, "loss": 1.2144, "step": 89800 }, { "epoch": 1.25, "learning_rate": 2.3740509062539182e-05, "loss": 1.2193, "step": 89900 }, { "epoch": 1.25, "learning_rate": 2.373354323687987e-05, "loss": 1.1965, "step": 90000 }, { "epoch": 1.26, "learning_rate": 2.372657741122055e-05, "loss": 1.2008, "step": 90100 }, { "epoch": 1.26, "learning_rate": 2.3719611585561238e-05, "loss": 1.2098, "step": 90200 }, { "epoch": 1.26, "learning_rate": 2.371264575990192e-05, "loss": 1.1948, "step": 90300 }, { "epoch": 1.26, "learning_rate": 2.3705679934242607e-05, "loss": 1.1907, "step": 90400 }, { "epoch": 1.26, "learning_rate": 2.369871410858329e-05, "loss": 1.2195, "step": 90500 }, { "epoch": 1.26, "learning_rate": 2.3691748282923977e-05, "loss": 1.195, "step": 90600 }, { "epoch": 1.26, "learning_rate": 2.368478245726466e-05, "loss": 1.2201, "step": 90700 }, { "epoch": 1.26, "learning_rate": 2.3677816631605346e-05, "loss": 1.1833, "step": 90800 }, { "epoch": 1.27, "learning_rate": 2.367085080594603e-05, "loss": 1.2109, "step": 90900 }, { "epoch": 1.27, "learning_rate": 2.3663884980286715e-05, "loss": 1.2184, "step": 91000 }, { "epoch": 1.27, "learning_rate": 2.36569191546274e-05, "loss": 1.2485, "step": 91100 }, { "epoch": 1.27, "learning_rate": 2.3649953328968085e-05, "loss": 1.2013, "step": 91200 }, { "epoch": 1.27, "learning_rate": 2.3642987503308768e-05, "loss": 1.2579, "step": 91300 }, { "epoch": 1.27, "learning_rate": 2.363602167764945e-05, "loss": 1.2107, "step": 91400 }, { "epoch": 1.27, "learning_rate": 2.3629055851990137e-05, "loss": 1.1716, "step": 91500 }, { "epoch": 1.28, "learning_rate": 2.362209002633082e-05, "loss": 1.2439, "step": 91600 }, { "epoch": 1.28, "learning_rate": 2.3615124200671507e-05, "loss": 1.1907, "step": 91700 }, { "epoch": 1.28, "learning_rate": 2.360815837501219e-05, "loss": 1.2171, "step": 91800 }, { "epoch": 1.28, "learning_rate": 2.3601192549352876e-05, "loss": 1.1806, "step": 91900 }, { "epoch": 1.28, "learning_rate": 2.3594296381950152e-05, "loss": 1.1918, "step": 92000 }, { "epoch": 1.28, "learning_rate": 2.358733055629084e-05, "loss": 1.2124, "step": 92100 }, { "epoch": 1.28, "learning_rate": 2.358036473063152e-05, "loss": 1.2193, "step": 92200 }, { "epoch": 1.29, "learning_rate": 2.3573398904972208e-05, "loss": 1.1929, "step": 92300 }, { "epoch": 1.29, "learning_rate": 2.356643307931289e-05, "loss": 1.2022, "step": 92400 }, { "epoch": 1.29, "learning_rate": 2.3559467253653577e-05, "loss": 1.2052, "step": 92500 }, { "epoch": 1.29, "learning_rate": 2.355250142799426e-05, "loss": 1.1878, "step": 92600 }, { "epoch": 1.29, "learning_rate": 2.3545535602334947e-05, "loss": 1.1895, "step": 92700 }, { "epoch": 1.29, "learning_rate": 2.353856977667563e-05, "loss": 1.2043, "step": 92800 }, { "epoch": 1.29, "learning_rate": 2.3531603951016316e-05, "loss": 1.1816, "step": 92900 }, { "epoch": 1.3, "learning_rate": 2.3524638125357e-05, "loss": 1.234, "step": 93000 }, { "epoch": 1.3, "learning_rate": 2.3517672299697686e-05, "loss": 1.2092, "step": 93100 }, { "epoch": 1.3, "learning_rate": 2.351070647403837e-05, "loss": 1.2086, "step": 93200 }, { "epoch": 1.3, "learning_rate": 2.3503740648379055e-05, "loss": 1.1953, "step": 93300 }, { "epoch": 1.3, "learning_rate": 2.3496774822719738e-05, "loss": 1.2012, "step": 93400 }, { "epoch": 1.3, "learning_rate": 2.3489808997060424e-05, "loss": 1.2169, "step": 93500 }, { "epoch": 1.3, "learning_rate": 2.3482843171401107e-05, "loss": 1.2498, "step": 93600 }, { "epoch": 1.31, "learning_rate": 2.3475877345741794e-05, "loss": 1.2184, "step": 93700 }, { "epoch": 1.31, "learning_rate": 2.3468911520082473e-05, "loss": 1.1944, "step": 93800 }, { "epoch": 1.31, "learning_rate": 2.346194569442316e-05, "loss": 1.2077, "step": 93900 }, { "epoch": 1.31, "learning_rate": 2.3454979868763843e-05, "loss": 1.1873, "step": 94000 }, { "epoch": 1.31, "learning_rate": 2.344801404310453e-05, "loss": 1.206, "step": 94100 }, { "epoch": 1.31, "learning_rate": 2.3441048217445212e-05, "loss": 1.1979, "step": 94200 }, { "epoch": 1.31, "learning_rate": 2.34340823917859e-05, "loss": 1.2447, "step": 94300 }, { "epoch": 1.32, "learning_rate": 2.342711656612658e-05, "loss": 1.1973, "step": 94400 }, { "epoch": 1.32, "learning_rate": 2.3420150740467268e-05, "loss": 1.2511, "step": 94500 }, { "epoch": 1.32, "learning_rate": 2.3413184914807954e-05, "loss": 1.2381, "step": 94600 }, { "epoch": 1.32, "learning_rate": 2.3406219089148637e-05, "loss": 1.2367, "step": 94700 }, { "epoch": 1.32, "learning_rate": 2.3399253263489323e-05, "loss": 1.2305, "step": 94800 }, { "epoch": 1.32, "learning_rate": 2.3392287437830006e-05, "loss": 1.2151, "step": 94900 }, { "epoch": 1.32, "learning_rate": 2.3385321612170693e-05, "loss": 1.2041, "step": 95000 }, { "epoch": 1.32, "learning_rate": 2.3378355786511376e-05, "loss": 1.2184, "step": 95100 }, { "epoch": 1.33, "learning_rate": 2.3371389960852062e-05, "loss": 1.2311, "step": 95200 }, { "epoch": 1.33, "learning_rate": 2.3364424135192745e-05, "loss": 1.186, "step": 95300 }, { "epoch": 1.33, "learning_rate": 2.335745830953343e-05, "loss": 1.1747, "step": 95400 }, { "epoch": 1.33, "learning_rate": 2.3350492483874115e-05, "loss": 1.2108, "step": 95500 }, { "epoch": 1.33, "learning_rate": 2.33435266582148e-05, "loss": 1.197, "step": 95600 }, { "epoch": 1.33, "learning_rate": 2.3336560832555484e-05, "loss": 1.2411, "step": 95700 }, { "epoch": 1.33, "learning_rate": 2.332959500689617e-05, "loss": 1.1862, "step": 95800 }, { "epoch": 1.34, "learning_rate": 2.3322629181236853e-05, "loss": 1.2287, "step": 95900 }, { "epoch": 1.34, "learning_rate": 2.3315663355577536e-05, "loss": 1.1929, "step": 96000 }, { "epoch": 1.34, "learning_rate": 2.3308767188174816e-05, "loss": 1.1839, "step": 96100 }, { "epoch": 1.34, "learning_rate": 2.33018013625155e-05, "loss": 1.2291, "step": 96200 }, { "epoch": 1.34, "learning_rate": 2.3294835536856182e-05, "loss": 1.2131, "step": 96300 }, { "epoch": 1.34, "learning_rate": 2.328786971119687e-05, "loss": 1.2304, "step": 96400 }, { "epoch": 1.34, "learning_rate": 2.328090388553755e-05, "loss": 1.1881, "step": 96500 }, { "epoch": 1.35, "learning_rate": 2.3273938059878238e-05, "loss": 1.2102, "step": 96600 }, { "epoch": 1.35, "learning_rate": 2.326697223421892e-05, "loss": 1.1755, "step": 96700 }, { "epoch": 1.35, "learning_rate": 2.3260006408559607e-05, "loss": 1.2057, "step": 96800 }, { "epoch": 1.35, "learning_rate": 2.325304058290029e-05, "loss": 1.2049, "step": 96900 }, { "epoch": 1.35, "learning_rate": 2.3246074757240977e-05, "loss": 1.2509, "step": 97000 }, { "epoch": 1.35, "learning_rate": 2.323910893158166e-05, "loss": 1.1966, "step": 97100 }, { "epoch": 1.35, "learning_rate": 2.3232143105922346e-05, "loss": 1.1735, "step": 97200 }, { "epoch": 1.36, "learning_rate": 2.322517728026303e-05, "loss": 1.1989, "step": 97300 }, { "epoch": 1.36, "learning_rate": 2.321828111286031e-05, "loss": 1.2423, "step": 97400 }, { "epoch": 1.36, "learning_rate": 2.321131528720099e-05, "loss": 1.2197, "step": 97500 }, { "epoch": 1.36, "learning_rate": 2.3204349461541678e-05, "loss": 1.2015, "step": 97600 }, { "epoch": 1.36, "learning_rate": 2.319738363588236e-05, "loss": 1.2008, "step": 97700 }, { "epoch": 1.36, "learning_rate": 2.3190417810223047e-05, "loss": 1.1893, "step": 97800 }, { "epoch": 1.36, "learning_rate": 2.318345198456373e-05, "loss": 1.2326, "step": 97900 }, { "epoch": 1.37, "learning_rate": 2.3176486158904417e-05, "loss": 1.2319, "step": 98000 }, { "epoch": 1.37, "learning_rate": 2.31695203332451e-05, "loss": 1.2047, "step": 98100 }, { "epoch": 1.37, "learning_rate": 2.3162554507585786e-05, "loss": 1.21, "step": 98200 }, { "epoch": 1.37, "learning_rate": 2.315558868192647e-05, "loss": 1.1928, "step": 98300 }, { "epoch": 1.37, "learning_rate": 2.3148622856267156e-05, "loss": 1.2059, "step": 98400 }, { "epoch": 1.37, "learning_rate": 2.314165703060784e-05, "loss": 1.2117, "step": 98500 }, { "epoch": 1.37, "learning_rate": 2.313469120494852e-05, "loss": 1.2217, "step": 98600 }, { "epoch": 1.38, "learning_rate": 2.3127725379289208e-05, "loss": 1.191, "step": 98700 }, { "epoch": 1.38, "learning_rate": 2.312075955362989e-05, "loss": 1.2245, "step": 98800 }, { "epoch": 1.38, "learning_rate": 2.3113793727970577e-05, "loss": 1.2086, "step": 98900 }, { "epoch": 1.38, "learning_rate": 2.310682790231126e-05, "loss": 1.1783, "step": 99000 }, { "epoch": 1.38, "learning_rate": 2.3099862076651947e-05, "loss": 1.2024, "step": 99100 }, { "epoch": 1.38, "learning_rate": 2.309289625099263e-05, "loss": 1.1831, "step": 99200 }, { "epoch": 1.38, "learning_rate": 2.3085930425333316e-05, "loss": 1.2045, "step": 99300 }, { "epoch": 1.38, "learning_rate": 2.3078964599674e-05, "loss": 1.1953, "step": 99400 }, { "epoch": 1.39, "learning_rate": 2.3071998774014685e-05, "loss": 1.1615, "step": 99500 }, { "epoch": 1.39, "learning_rate": 2.306503294835537e-05, "loss": 1.2064, "step": 99600 }, { "epoch": 1.39, "learning_rate": 2.3058067122696055e-05, "loss": 1.1868, "step": 99700 }, { "epoch": 1.39, "learning_rate": 2.305117095529333e-05, "loss": 1.1918, "step": 99800 }, { "epoch": 1.39, "learning_rate": 2.3044205129634018e-05, "loss": 1.2122, "step": 99900 }, { "epoch": 1.39, "learning_rate": 2.30372393039747e-05, "loss": 1.1998, "step": 100000 }, { "epoch": 1.39, "learning_rate": 2.3030273478315387e-05, "loss": 1.2122, "step": 100100 }, { "epoch": 1.4, "learning_rate": 2.302330765265607e-05, "loss": 1.1924, "step": 100200 }, { "epoch": 1.4, "learning_rate": 2.3016341826996756e-05, "loss": 1.2294, "step": 100300 }, { "epoch": 1.4, "learning_rate": 2.300937600133744e-05, "loss": 1.198, "step": 100400 }, { "epoch": 1.4, "learning_rate": 2.3002410175678126e-05, "loss": 1.2307, "step": 100500 }, { "epoch": 1.4, "learning_rate": 2.299544435001881e-05, "loss": 1.1966, "step": 100600 }, { "epoch": 1.4, "learning_rate": 2.2988478524359495e-05, "loss": 1.2221, "step": 100700 }, { "epoch": 1.4, "learning_rate": 2.2981512698700178e-05, "loss": 1.2524, "step": 100800 }, { "epoch": 1.41, "learning_rate": 2.2974546873040864e-05, "loss": 1.2349, "step": 100900 }, { "epoch": 1.41, "learning_rate": 2.2967581047381544e-05, "loss": 1.185, "step": 101000 }, { "epoch": 1.41, "learning_rate": 2.296061522172223e-05, "loss": 1.1917, "step": 101100 }, { "epoch": 1.41, "learning_rate": 2.2953649396062913e-05, "loss": 1.1746, "step": 101200 }, { "epoch": 1.41, "learning_rate": 2.29466835704036e-05, "loss": 1.1778, "step": 101300 }, { "epoch": 1.41, "learning_rate": 2.2939717744744283e-05, "loss": 1.1927, "step": 101400 }, { "epoch": 1.41, "learning_rate": 2.293275191908497e-05, "loss": 1.2341, "step": 101500 }, { "epoch": 1.42, "learning_rate": 2.2925786093425652e-05, "loss": 1.209, "step": 101600 }, { "epoch": 1.42, "learning_rate": 2.291882026776634e-05, "loss": 1.2499, "step": 101700 }, { "epoch": 1.42, "learning_rate": 2.291185444210702e-05, "loss": 1.189, "step": 101800 }, { "epoch": 1.42, "learning_rate": 2.2904888616447708e-05, "loss": 1.1962, "step": 101900 }, { "epoch": 1.42, "learning_rate": 2.2897922790788394e-05, "loss": 1.2561, "step": 102000 }, { "epoch": 1.42, "learning_rate": 2.2890956965129077e-05, "loss": 1.2059, "step": 102100 }, { "epoch": 1.42, "learning_rate": 2.2883991139469764e-05, "loss": 1.1972, "step": 102200 }, { "epoch": 1.43, "learning_rate": 2.2877025313810447e-05, "loss": 1.1873, "step": 102300 }, { "epoch": 1.43, "learning_rate": 2.2870059488151133e-05, "loss": 1.2591, "step": 102400 }, { "epoch": 1.43, "learning_rate": 2.2863093662491816e-05, "loss": 1.2266, "step": 102500 }, { "epoch": 1.43, "learning_rate": 2.2856127836832502e-05, "loss": 1.2194, "step": 102600 }, { "epoch": 1.43, "learning_rate": 2.2849162011173185e-05, "loss": 1.2086, "step": 102700 }, { "epoch": 1.43, "learning_rate": 2.284219618551387e-05, "loss": 1.2317, "step": 102800 }, { "epoch": 1.43, "learning_rate": 2.2835230359854555e-05, "loss": 1.2056, "step": 102900 }, { "epoch": 1.43, "learning_rate": 2.282826453419524e-05, "loss": 1.2364, "step": 103000 }, { "epoch": 1.44, "learning_rate": 2.2821298708535924e-05, "loss": 1.1771, "step": 103100 }, { "epoch": 1.44, "learning_rate": 2.281433288287661e-05, "loss": 1.1979, "step": 103200 }, { "epoch": 1.44, "learning_rate": 2.280736705721729e-05, "loss": 1.2382, "step": 103300 }, { "epoch": 1.44, "learning_rate": 2.2800401231557976e-05, "loss": 1.1685, "step": 103400 }, { "epoch": 1.44, "learning_rate": 2.279343540589866e-05, "loss": 1.2082, "step": 103500 }, { "epoch": 1.44, "learning_rate": 2.2786469580239346e-05, "loss": 1.2266, "step": 103600 }, { "epoch": 1.44, "learning_rate": 2.277950375458003e-05, "loss": 1.2132, "step": 103700 }, { "epoch": 1.45, "learning_rate": 2.2772537928920715e-05, "loss": 1.1876, "step": 103800 }, { "epoch": 1.45, "learning_rate": 2.2765572103261398e-05, "loss": 1.222, "step": 103900 }, { "epoch": 1.45, "learning_rate": 2.2758606277602084e-05, "loss": 1.2023, "step": 104000 }, { "epoch": 1.45, "learning_rate": 2.2751640451942767e-05, "loss": 1.1668, "step": 104100 }, { "epoch": 1.45, "learning_rate": 2.2744674626283454e-05, "loss": 1.194, "step": 104200 }, { "epoch": 1.45, "learning_rate": 2.2737708800624137e-05, "loss": 1.2058, "step": 104300 }, { "epoch": 1.45, "learning_rate": 2.2730742974964823e-05, "loss": 1.185, "step": 104400 }, { "epoch": 1.46, "learning_rate": 2.27238468075621e-05, "loss": 1.1681, "step": 104500 }, { "epoch": 1.46, "learning_rate": 2.2716880981902786e-05, "loss": 1.2321, "step": 104600 }, { "epoch": 1.46, "learning_rate": 2.270991515624347e-05, "loss": 1.2104, "step": 104700 }, { "epoch": 1.46, "learning_rate": 2.2702949330584155e-05, "loss": 1.1615, "step": 104800 }, { "epoch": 1.46, "learning_rate": 2.269598350492484e-05, "loss": 1.2305, "step": 104900 }, { "epoch": 1.46, "learning_rate": 2.2689017679265525e-05, "loss": 1.2212, "step": 105000 }, { "epoch": 1.46, "learning_rate": 2.2682051853606208e-05, "loss": 1.1814, "step": 105100 }, { "epoch": 1.47, "learning_rate": 2.2675086027946894e-05, "loss": 1.2132, "step": 105200 }, { "epoch": 1.47, "learning_rate": 2.266812020228758e-05, "loss": 1.1871, "step": 105300 }, { "epoch": 1.47, "learning_rate": 2.2661224034884857e-05, "loss": 1.2022, "step": 105400 }, { "epoch": 1.47, "learning_rate": 2.265425820922554e-05, "loss": 1.2129, "step": 105500 }, { "epoch": 1.47, "learning_rate": 2.2647292383566226e-05, "loss": 1.2058, "step": 105600 }, { "epoch": 1.47, "learning_rate": 2.264032655790691e-05, "loss": 1.2106, "step": 105700 }, { "epoch": 1.47, "learning_rate": 2.2633360732247596e-05, "loss": 1.1971, "step": 105800 }, { "epoch": 1.48, "learning_rate": 2.2626394906588275e-05, "loss": 1.1881, "step": 105900 }, { "epoch": 1.48, "learning_rate": 2.261942908092896e-05, "loss": 1.1922, "step": 106000 }, { "epoch": 1.48, "learning_rate": 2.2612463255269648e-05, "loss": 1.2067, "step": 106100 }, { "epoch": 1.48, "learning_rate": 2.260549742961033e-05, "loss": 1.2249, "step": 106200 }, { "epoch": 1.48, "learning_rate": 2.2598531603951017e-05, "loss": 1.1895, "step": 106300 }, { "epoch": 1.48, "learning_rate": 2.25915657782917e-05, "loss": 1.1837, "step": 106400 }, { "epoch": 1.48, "learning_rate": 2.2584599952632387e-05, "loss": 1.2043, "step": 106500 }, { "epoch": 1.49, "learning_rate": 2.257763412697307e-05, "loss": 1.2104, "step": 106600 }, { "epoch": 1.49, "learning_rate": 2.2570668301313756e-05, "loss": 1.2394, "step": 106700 }, { "epoch": 1.49, "learning_rate": 2.256370247565444e-05, "loss": 1.1965, "step": 106800 }, { "epoch": 1.49, "learning_rate": 2.2556736649995125e-05, "loss": 1.2418, "step": 106900 }, { "epoch": 1.49, "learning_rate": 2.254977082433581e-05, "loss": 1.2125, "step": 107000 }, { "epoch": 1.49, "learning_rate": 2.2542804998676495e-05, "loss": 1.2428, "step": 107100 }, { "epoch": 1.49, "learning_rate": 2.2535839173017178e-05, "loss": 1.2245, "step": 107200 }, { "epoch": 1.49, "learning_rate": 2.2528873347357864e-05, "loss": 1.1913, "step": 107300 }, { "epoch": 1.5, "learning_rate": 2.2521907521698547e-05, "loss": 1.1843, "step": 107400 }, { "epoch": 1.5, "learning_rate": 2.2514941696039234e-05, "loss": 1.2339, "step": 107500 }, { "epoch": 1.5, "learning_rate": 2.2507975870379917e-05, "loss": 1.1878, "step": 107600 }, { "epoch": 1.5, "learning_rate": 2.2501010044720603e-05, "loss": 1.2216, "step": 107700 }, { "epoch": 1.5, "learning_rate": 2.2494044219061286e-05, "loss": 1.1915, "step": 107800 }, { "epoch": 1.5, "learning_rate": 2.2487078393401972e-05, "loss": 1.1568, "step": 107900 }, { "epoch": 1.5, "learning_rate": 2.2480112567742655e-05, "loss": 1.2237, "step": 108000 }, { "epoch": 1.51, "learning_rate": 2.2473146742083338e-05, "loss": 1.1775, "step": 108100 }, { "epoch": 1.51, "learning_rate": 2.246618091642402e-05, "loss": 1.2169, "step": 108200 }, { "epoch": 1.51, "learning_rate": 2.2459215090764708e-05, "loss": 1.1817, "step": 108300 }, { "epoch": 1.51, "learning_rate": 2.2452249265105394e-05, "loss": 1.2129, "step": 108400 }, { "epoch": 1.51, "learning_rate": 2.2445283439446077e-05, "loss": 1.2335, "step": 108500 }, { "epoch": 1.51, "learning_rate": 2.2438317613786763e-05, "loss": 1.2294, "step": 108600 }, { "epoch": 1.51, "learning_rate": 2.2431351788127446e-05, "loss": 1.1848, "step": 108700 }, { "epoch": 1.52, "learning_rate": 2.2424385962468133e-05, "loss": 1.1672, "step": 108800 }, { "epoch": 1.52, "learning_rate": 2.2417420136808816e-05, "loss": 1.194, "step": 108900 }, { "epoch": 1.52, "learning_rate": 2.2410454311149502e-05, "loss": 1.2135, "step": 109000 }, { "epoch": 1.52, "learning_rate": 2.2403488485490185e-05, "loss": 1.2033, "step": 109100 }, { "epoch": 1.52, "learning_rate": 2.239659231808746e-05, "loss": 1.2111, "step": 109200 }, { "epoch": 1.52, "learning_rate": 2.2389626492428148e-05, "loss": 1.216, "step": 109300 }, { "epoch": 1.52, "learning_rate": 2.2382660666768834e-05, "loss": 1.2526, "step": 109400 }, { "epoch": 1.53, "learning_rate": 2.2375694841109517e-05, "loss": 1.172, "step": 109500 }, { "epoch": 1.53, "learning_rate": 2.2368729015450204e-05, "loss": 1.1855, "step": 109600 }, { "epoch": 1.53, "learning_rate": 2.2361763189790887e-05, "loss": 1.2075, "step": 109700 }, { "epoch": 1.53, "learning_rate": 2.2354797364131573e-05, "loss": 1.1811, "step": 109800 }, { "epoch": 1.53, "learning_rate": 2.2347831538472256e-05, "loss": 1.1902, "step": 109900 }, { "epoch": 1.53, "learning_rate": 2.2340865712812942e-05, "loss": 1.2013, "step": 110000 }, { "epoch": 1.53, "learning_rate": 2.2333899887153625e-05, "loss": 1.1856, "step": 110100 }, { "epoch": 1.54, "learning_rate": 2.2326934061494312e-05, "loss": 1.1709, "step": 110200 }, { "epoch": 1.54, "learning_rate": 2.2319968235834995e-05, "loss": 1.1757, "step": 110300 }, { "epoch": 1.54, "learning_rate": 2.231300241017568e-05, "loss": 1.1821, "step": 110400 }, { "epoch": 1.54, "learning_rate": 2.230603658451636e-05, "loss": 1.2171, "step": 110500 }, { "epoch": 1.54, "learning_rate": 2.2299070758857047e-05, "loss": 1.1986, "step": 110600 }, { "epoch": 1.54, "learning_rate": 2.229210493319773e-05, "loss": 1.206, "step": 110700 }, { "epoch": 1.54, "learning_rate": 2.2285139107538416e-05, "loss": 1.2128, "step": 110800 }, { "epoch": 1.55, "learning_rate": 2.22781732818791e-05, "loss": 1.1754, "step": 110900 }, { "epoch": 1.55, "learning_rate": 2.2271207456219786e-05, "loss": 1.172, "step": 111000 }, { "epoch": 1.55, "learning_rate": 2.226424163056047e-05, "loss": 1.1641, "step": 111100 }, { "epoch": 1.55, "learning_rate": 2.2257275804901155e-05, "loss": 1.2322, "step": 111200 }, { "epoch": 1.55, "learning_rate": 2.2250309979241838e-05, "loss": 1.215, "step": 111300 }, { "epoch": 1.55, "learning_rate": 2.2243344153582525e-05, "loss": 1.2181, "step": 111400 }, { "epoch": 1.55, "learning_rate": 2.2236378327923208e-05, "loss": 1.2111, "step": 111500 }, { "epoch": 1.55, "learning_rate": 2.2229412502263894e-05, "loss": 1.2139, "step": 111600 }, { "epoch": 1.56, "learning_rate": 2.222244667660458e-05, "loss": 1.2265, "step": 111700 }, { "epoch": 1.56, "learning_rate": 2.2215480850945263e-05, "loss": 1.1916, "step": 111800 }, { "epoch": 1.56, "learning_rate": 2.220851502528595e-05, "loss": 1.2285, "step": 111900 }, { "epoch": 1.56, "learning_rate": 2.2201549199626633e-05, "loss": 1.2045, "step": 112000 }, { "epoch": 1.56, "learning_rate": 2.219458337396732e-05, "loss": 1.1994, "step": 112100 }, { "epoch": 1.56, "learning_rate": 2.2187617548308002e-05, "loss": 1.2142, "step": 112200 }, { "epoch": 1.56, "learning_rate": 2.218065172264869e-05, "loss": 1.1919, "step": 112300 }, { "epoch": 1.57, "learning_rate": 2.217368589698937e-05, "loss": 1.2523, "step": 112400 }, { "epoch": 1.57, "learning_rate": 2.2166720071330058e-05, "loss": 1.2212, "step": 112500 }, { "epoch": 1.57, "learning_rate": 2.215975424567074e-05, "loss": 1.2299, "step": 112600 }, { "epoch": 1.57, "learning_rate": 2.2152788420011427e-05, "loss": 1.2114, "step": 112700 }, { "epoch": 1.57, "learning_rate": 2.2145822594352107e-05, "loss": 1.2329, "step": 112800 }, { "epoch": 1.57, "learning_rate": 2.2138856768692793e-05, "loss": 1.2369, "step": 112900 }, { "epoch": 1.57, "learning_rate": 2.2131890943033476e-05, "loss": 1.2144, "step": 113000 }, { "epoch": 1.58, "learning_rate": 2.2124925117374162e-05, "loss": 1.2427, "step": 113100 }, { "epoch": 1.58, "learning_rate": 2.2117959291714845e-05, "loss": 1.2138, "step": 113200 }, { "epoch": 1.58, "learning_rate": 2.2110993466055532e-05, "loss": 1.1789, "step": 113300 }, { "epoch": 1.58, "learning_rate": 2.2104027640396215e-05, "loss": 1.2021, "step": 113400 }, { "epoch": 1.58, "learning_rate": 2.20970618147369e-05, "loss": 1.1692, "step": 113500 }, { "epoch": 1.58, "learning_rate": 2.2090165647334178e-05, "loss": 1.1913, "step": 113600 }, { "epoch": 1.58, "learning_rate": 2.2083199821674864e-05, "loss": 1.2328, "step": 113700 }, { "epoch": 1.59, "learning_rate": 2.2076233996015547e-05, "loss": 1.2098, "step": 113800 }, { "epoch": 1.59, "learning_rate": 2.2069268170356233e-05, "loss": 1.2152, "step": 113900 }, { "epoch": 1.59, "learning_rate": 2.2062302344696916e-05, "loss": 1.2237, "step": 114000 }, { "epoch": 1.59, "learning_rate": 2.2055336519037603e-05, "loss": 1.1828, "step": 114100 }, { "epoch": 1.59, "learning_rate": 2.2048370693378286e-05, "loss": 1.1819, "step": 114200 }, { "epoch": 1.59, "learning_rate": 2.2041474525975566e-05, "loss": 1.2185, "step": 114300 }, { "epoch": 1.59, "learning_rate": 2.203450870031625e-05, "loss": 1.1798, "step": 114400 }, { "epoch": 1.6, "learning_rate": 2.2027542874656935e-05, "loss": 1.2069, "step": 114500 }, { "epoch": 1.6, "learning_rate": 2.2020577048997618e-05, "loss": 1.1889, "step": 114600 }, { "epoch": 1.6, "learning_rate": 2.2013611223338304e-05, "loss": 1.2132, "step": 114700 }, { "epoch": 1.6, "learning_rate": 2.2006645397678987e-05, "loss": 1.2078, "step": 114800 }, { "epoch": 1.6, "learning_rate": 2.1999679572019674e-05, "loss": 1.1855, "step": 114900 }, { "epoch": 1.6, "learning_rate": 2.1992713746360357e-05, "loss": 1.181, "step": 115000 }, { "epoch": 1.6, "learning_rate": 2.1985747920701043e-05, "loss": 1.1746, "step": 115100 }, { "epoch": 1.6, "learning_rate": 2.1978782095041726e-05, "loss": 1.1918, "step": 115200 }, { "epoch": 1.61, "learning_rate": 2.1971816269382412e-05, "loss": 1.1932, "step": 115300 }, { "epoch": 1.61, "learning_rate": 2.1964850443723092e-05, "loss": 1.2226, "step": 115400 }, { "epoch": 1.61, "learning_rate": 2.195788461806378e-05, "loss": 1.2463, "step": 115500 }, { "epoch": 1.61, "learning_rate": 2.195091879240446e-05, "loss": 1.1943, "step": 115600 }, { "epoch": 1.61, "learning_rate": 2.1943952966745148e-05, "loss": 1.1542, "step": 115700 }, { "epoch": 1.61, "learning_rate": 2.1936987141085834e-05, "loss": 1.2038, "step": 115800 }, { "epoch": 1.61, "learning_rate": 2.1930021315426517e-05, "loss": 1.1774, "step": 115900 }, { "epoch": 1.62, "learning_rate": 2.1923055489767203e-05, "loss": 1.2212, "step": 116000 }, { "epoch": 1.62, "learning_rate": 2.1916089664107886e-05, "loss": 1.1795, "step": 116100 }, { "epoch": 1.62, "learning_rate": 2.1909123838448573e-05, "loss": 1.2041, "step": 116200 }, { "epoch": 1.62, "learning_rate": 2.1902158012789256e-05, "loss": 1.1721, "step": 116300 }, { "epoch": 1.62, "learning_rate": 2.1895192187129942e-05, "loss": 1.2156, "step": 116400 }, { "epoch": 1.62, "learning_rate": 2.1888226361470625e-05, "loss": 1.1982, "step": 116500 }, { "epoch": 1.62, "learning_rate": 2.188126053581131e-05, "loss": 1.2099, "step": 116600 }, { "epoch": 1.63, "learning_rate": 2.1874294710151995e-05, "loss": 1.1936, "step": 116700 }, { "epoch": 1.63, "learning_rate": 2.1867398542749274e-05, "loss": 1.1853, "step": 116800 }, { "epoch": 1.63, "learning_rate": 2.1860432717089957e-05, "loss": 1.2059, "step": 116900 }, { "epoch": 1.63, "learning_rate": 2.1853466891430644e-05, "loss": 1.1683, "step": 117000 }, { "epoch": 1.63, "learning_rate": 2.1846501065771327e-05, "loss": 1.1649, "step": 117100 }, { "epoch": 1.63, "learning_rate": 2.1839535240112013e-05, "loss": 1.1969, "step": 117200 }, { "epoch": 1.63, "learning_rate": 2.1832569414452696e-05, "loss": 1.1924, "step": 117300 }, { "epoch": 1.64, "learning_rate": 2.1825603588793382e-05, "loss": 1.2366, "step": 117400 }, { "epoch": 1.64, "learning_rate": 2.1818637763134065e-05, "loss": 1.1937, "step": 117500 }, { "epoch": 1.64, "learning_rate": 2.1811671937474752e-05, "loss": 1.1794, "step": 117600 }, { "epoch": 1.64, "learning_rate": 2.1804706111815435e-05, "loss": 1.2114, "step": 117700 }, { "epoch": 1.64, "learning_rate": 2.1797740286156118e-05, "loss": 1.2059, "step": 117800 }, { "epoch": 1.64, "learning_rate": 2.17907744604968e-05, "loss": 1.1833, "step": 117900 }, { "epoch": 1.64, "learning_rate": 2.1783808634837487e-05, "loss": 1.1492, "step": 118000 }, { "epoch": 1.65, "learning_rate": 2.177684280917817e-05, "loss": 1.1716, "step": 118100 }, { "epoch": 1.65, "learning_rate": 2.1769876983518857e-05, "loss": 1.156, "step": 118200 }, { "epoch": 1.65, "learning_rate": 2.176291115785954e-05, "loss": 1.1807, "step": 118300 }, { "epoch": 1.65, "learning_rate": 2.1755945332200226e-05, "loss": 1.1942, "step": 118400 }, { "epoch": 1.65, "learning_rate": 2.174897950654091e-05, "loss": 1.2004, "step": 118500 }, { "epoch": 1.65, "learning_rate": 2.1742013680881595e-05, "loss": 1.227, "step": 118600 }, { "epoch": 1.65, "learning_rate": 2.1735047855222278e-05, "loss": 1.2213, "step": 118700 }, { "epoch": 1.66, "learning_rate": 2.1728082029562965e-05, "loss": 1.2016, "step": 118800 }, { "epoch": 1.66, "learning_rate": 2.1721116203903648e-05, "loss": 1.2137, "step": 118900 }, { "epoch": 1.66, "learning_rate": 2.1714150378244334e-05, "loss": 1.1818, "step": 119000 }, { "epoch": 1.66, "learning_rate": 2.170718455258502e-05, "loss": 1.182, "step": 119100 }, { "epoch": 1.66, "learning_rate": 2.1700218726925703e-05, "loss": 1.22, "step": 119200 }, { "epoch": 1.66, "learning_rate": 2.169325290126639e-05, "loss": 1.1563, "step": 119300 }, { "epoch": 1.66, "learning_rate": 2.1686287075607073e-05, "loss": 1.2058, "step": 119400 }, { "epoch": 1.66, "learning_rate": 2.167932124994776e-05, "loss": 1.2657, "step": 119500 }, { "epoch": 1.67, "learning_rate": 2.1672355424288442e-05, "loss": 1.182, "step": 119600 }, { "epoch": 1.67, "learning_rate": 2.166538959862913e-05, "loss": 1.1985, "step": 119700 }, { "epoch": 1.67, "learning_rate": 2.165842377296981e-05, "loss": 1.1724, "step": 119800 }, { "epoch": 1.67, "learning_rate": 2.1651457947310498e-05, "loss": 1.1759, "step": 119900 }, { "epoch": 1.67, "learning_rate": 2.164449212165118e-05, "loss": 1.208, "step": 120000 }, { "epoch": 1.67, "learning_rate": 2.1637526295991864e-05, "loss": 1.2373, "step": 120100 }, { "epoch": 1.67, "learning_rate": 2.1630560470332547e-05, "loss": 1.1772, "step": 120200 }, { "epoch": 1.68, "learning_rate": 2.1623594644673233e-05, "loss": 1.1855, "step": 120300 }, { "epoch": 1.68, "learning_rate": 2.1616628819013916e-05, "loss": 1.2035, "step": 120400 }, { "epoch": 1.68, "learning_rate": 2.1609662993354603e-05, "loss": 1.1757, "step": 120500 }, { "epoch": 1.68, "learning_rate": 2.1602697167695286e-05, "loss": 1.2168, "step": 120600 }, { "epoch": 1.68, "learning_rate": 2.1595731342035972e-05, "loss": 1.1868, "step": 120700 }, { "epoch": 1.68, "learning_rate": 2.1588765516376655e-05, "loss": 1.1752, "step": 120800 }, { "epoch": 1.68, "learning_rate": 2.158179969071734e-05, "loss": 1.1586, "step": 120900 }, { "epoch": 1.69, "learning_rate": 2.1574903523314618e-05, "loss": 1.1997, "step": 121000 }, { "epoch": 1.69, "learning_rate": 2.1567937697655304e-05, "loss": 1.1929, "step": 121100 }, { "epoch": 1.69, "learning_rate": 2.1560971871995987e-05, "loss": 1.213, "step": 121200 }, { "epoch": 1.69, "learning_rate": 2.1554006046336673e-05, "loss": 1.2179, "step": 121300 }, { "epoch": 1.69, "learning_rate": 2.1547040220677356e-05, "loss": 1.1815, "step": 121400 }, { "epoch": 1.69, "learning_rate": 2.1540074395018043e-05, "loss": 1.2464, "step": 121500 }, { "epoch": 1.69, "learning_rate": 2.1533108569358726e-05, "loss": 1.2206, "step": 121600 }, { "epoch": 1.7, "learning_rate": 2.1526142743699412e-05, "loss": 1.2226, "step": 121700 }, { "epoch": 1.7, "learning_rate": 2.1519176918040095e-05, "loss": 1.2398, "step": 121800 }, { "epoch": 1.7, "learning_rate": 2.151221109238078e-05, "loss": 1.1899, "step": 121900 }, { "epoch": 1.7, "learning_rate": 2.1505245266721465e-05, "loss": 1.1707, "step": 122000 }, { "epoch": 1.7, "learning_rate": 2.149827944106215e-05, "loss": 1.206, "step": 122100 }, { "epoch": 1.7, "learning_rate": 2.1491313615402834e-05, "loss": 1.1963, "step": 122200 }, { "epoch": 1.7, "learning_rate": 2.148434778974352e-05, "loss": 1.1995, "step": 122300 }, { "epoch": 1.71, "learning_rate": 2.1477451622340797e-05, "loss": 1.1909, "step": 122400 }, { "epoch": 1.71, "learning_rate": 2.1470485796681483e-05, "loss": 1.1914, "step": 122500 }, { "epoch": 1.71, "learning_rate": 2.1463519971022166e-05, "loss": 1.2262, "step": 122600 }, { "epoch": 1.71, "learning_rate": 2.145655414536285e-05, "loss": 1.1817, "step": 122700 }, { "epoch": 1.71, "learning_rate": 2.1449588319703532e-05, "loss": 1.1918, "step": 122800 }, { "epoch": 1.71, "learning_rate": 2.144262249404422e-05, "loss": 1.1855, "step": 122900 }, { "epoch": 1.71, "learning_rate": 2.14356566683849e-05, "loss": 1.202, "step": 123000 }, { "epoch": 1.71, "learning_rate": 2.1428690842725588e-05, "loss": 1.2127, "step": 123100 }, { "epoch": 1.72, "learning_rate": 2.1421725017066274e-05, "loss": 1.1939, "step": 123200 }, { "epoch": 1.72, "learning_rate": 2.1414759191406957e-05, "loss": 1.1893, "step": 123300 }, { "epoch": 1.72, "learning_rate": 2.1407793365747644e-05, "loss": 1.1822, "step": 123400 }, { "epoch": 1.72, "learning_rate": 2.1400827540088327e-05, "loss": 1.203, "step": 123500 }, { "epoch": 1.72, "learning_rate": 2.1393861714429013e-05, "loss": 1.2094, "step": 123600 }, { "epoch": 1.72, "learning_rate": 2.1386895888769696e-05, "loss": 1.1569, "step": 123700 }, { "epoch": 1.72, "learning_rate": 2.1379930063110382e-05, "loss": 1.1824, "step": 123800 }, { "epoch": 1.73, "learning_rate": 2.1372964237451065e-05, "loss": 1.2077, "step": 123900 }, { "epoch": 1.73, "learning_rate": 2.136599841179175e-05, "loss": 1.241, "step": 124000 }, { "epoch": 1.73, "learning_rate": 2.1359032586132435e-05, "loss": 1.2163, "step": 124100 }, { "epoch": 1.73, "learning_rate": 2.135206676047312e-05, "loss": 1.1717, "step": 124200 }, { "epoch": 1.73, "learning_rate": 2.1345100934813804e-05, "loss": 1.1579, "step": 124300 }, { "epoch": 1.73, "learning_rate": 2.133813510915449e-05, "loss": 1.1677, "step": 124400 }, { "epoch": 1.73, "learning_rate": 2.1331169283495173e-05, "loss": 1.17, "step": 124500 }, { "epoch": 1.74, "learning_rate": 2.132420345783586e-05, "loss": 1.2317, "step": 124600 }, { "epoch": 1.74, "learning_rate": 2.1317237632176543e-05, "loss": 1.2105, "step": 124700 }, { "epoch": 1.74, "learning_rate": 2.131027180651723e-05, "loss": 1.2142, "step": 124800 }, { "epoch": 1.74, "learning_rate": 2.130330598085791e-05, "loss": 1.2362, "step": 124900 }, { "epoch": 1.74, "learning_rate": 2.1296340155198595e-05, "loss": 1.2074, "step": 125000 }, { "epoch": 1.74, "learning_rate": 2.1289374329539278e-05, "loss": 1.2149, "step": 125100 }, { "epoch": 1.74, "learning_rate": 2.1282408503879965e-05, "loss": 1.1955, "step": 125200 }, { "epoch": 1.75, "learning_rate": 2.1275442678220647e-05, "loss": 1.1841, "step": 125300 }, { "epoch": 1.75, "learning_rate": 2.1268476852561334e-05, "loss": 1.1668, "step": 125400 }, { "epoch": 1.75, "learning_rate": 2.1261511026902017e-05, "loss": 1.2053, "step": 125500 }, { "epoch": 1.75, "learning_rate": 2.1254545201242703e-05, "loss": 1.2002, "step": 125600 }, { "epoch": 1.75, "learning_rate": 2.124764903383998e-05, "loss": 1.1777, "step": 125700 }, { "epoch": 1.75, "learning_rate": 2.1240683208180666e-05, "loss": 1.207, "step": 125800 }, { "epoch": 1.75, "learning_rate": 2.123371738252135e-05, "loss": 1.1941, "step": 125900 }, { "epoch": 1.76, "learning_rate": 2.1226751556862035e-05, "loss": 1.2201, "step": 126000 }, { "epoch": 1.76, "learning_rate": 2.121978573120272e-05, "loss": 1.1776, "step": 126100 }, { "epoch": 1.76, "learning_rate": 2.1212819905543405e-05, "loss": 1.2108, "step": 126200 }, { "epoch": 1.76, "learning_rate": 2.1205854079884088e-05, "loss": 1.203, "step": 126300 }, { "epoch": 1.76, "learning_rate": 2.1198888254224774e-05, "loss": 1.1849, "step": 126400 }, { "epoch": 1.76, "learning_rate": 2.119192242856546e-05, "loss": 1.2132, "step": 126500 }, { "epoch": 1.76, "learning_rate": 2.1184956602906144e-05, "loss": 1.1879, "step": 126600 }, { "epoch": 1.77, "learning_rate": 2.117799077724683e-05, "loss": 1.2181, "step": 126700 }, { "epoch": 1.77, "learning_rate": 2.1171024951587513e-05, "loss": 1.2248, "step": 126800 }, { "epoch": 1.77, "learning_rate": 2.11640591259282e-05, "loss": 1.184, "step": 126900 }, { "epoch": 1.77, "learning_rate": 2.1157093300268882e-05, "loss": 1.1824, "step": 127000 }, { "epoch": 1.77, "learning_rate": 2.115012747460957e-05, "loss": 1.207, "step": 127100 }, { "epoch": 1.77, "learning_rate": 2.114316164895025e-05, "loss": 1.1663, "step": 127200 }, { "epoch": 1.77, "learning_rate": 2.1136195823290935e-05, "loss": 1.1765, "step": 127300 }, { "epoch": 1.77, "learning_rate": 2.1129229997631618e-05, "loss": 1.2084, "step": 127400 }, { "epoch": 1.78, "learning_rate": 2.1122264171972304e-05, "loss": 1.1856, "step": 127500 }, { "epoch": 1.78, "learning_rate": 2.1115298346312987e-05, "loss": 1.2146, "step": 127600 }, { "epoch": 1.78, "learning_rate": 2.1108332520653673e-05, "loss": 1.2269, "step": 127700 }, { "epoch": 1.78, "learning_rate": 2.110143635325095e-05, "loss": 1.1899, "step": 127800 }, { "epoch": 1.78, "learning_rate": 2.1094470527591636e-05, "loss": 1.212, "step": 127900 }, { "epoch": 1.78, "learning_rate": 2.108750470193232e-05, "loss": 1.178, "step": 128000 }, { "epoch": 1.78, "learning_rate": 2.1080538876273006e-05, "loss": 1.2044, "step": 128100 }, { "epoch": 1.79, "learning_rate": 2.107357305061369e-05, "loss": 1.1994, "step": 128200 }, { "epoch": 1.79, "learning_rate": 2.1066607224954375e-05, "loss": 1.16, "step": 128300 }, { "epoch": 1.79, "learning_rate": 2.1059641399295058e-05, "loss": 1.2212, "step": 128400 }, { "epoch": 1.79, "learning_rate": 2.1052675573635744e-05, "loss": 1.1828, "step": 128500 }, { "epoch": 1.79, "learning_rate": 2.1045709747976427e-05, "loss": 1.185, "step": 128600 }, { "epoch": 1.79, "learning_rate": 2.1038743922317114e-05, "loss": 1.2089, "step": 128700 }, { "epoch": 1.79, "learning_rate": 2.1031778096657797e-05, "loss": 1.2048, "step": 128800 }, { "epoch": 1.8, "learning_rate": 2.1024812270998483e-05, "loss": 1.2108, "step": 128900 }, { "epoch": 1.8, "learning_rate": 2.1017846445339166e-05, "loss": 1.1907, "step": 129000 }, { "epoch": 1.8, "learning_rate": 2.1010880619679852e-05, "loss": 1.1854, "step": 129100 }, { "epoch": 1.8, "learning_rate": 2.1003914794020535e-05, "loss": 1.1757, "step": 129200 }, { "epoch": 1.8, "learning_rate": 2.0996948968361222e-05, "loss": 1.1574, "step": 129300 }, { "epoch": 1.8, "learning_rate": 2.0989983142701905e-05, "loss": 1.2082, "step": 129400 }, { "epoch": 1.8, "learning_rate": 2.098301731704259e-05, "loss": 1.1877, "step": 129500 }, { "epoch": 1.81, "learning_rate": 2.0976051491383274e-05, "loss": 1.1789, "step": 129600 }, { "epoch": 1.81, "learning_rate": 2.0969085665723957e-05, "loss": 1.1804, "step": 129700 }, { "epoch": 1.81, "learning_rate": 2.0962119840064643e-05, "loss": 1.212, "step": 129800 }, { "epoch": 1.81, "learning_rate": 2.0955154014405326e-05, "loss": 1.2151, "step": 129900 }, { "epoch": 1.81, "learning_rate": 2.0948188188746013e-05, "loss": 1.1798, "step": 130000 }, { "epoch": 1.81, "learning_rate": 2.0941222363086696e-05, "loss": 1.1934, "step": 130100 }, { "epoch": 1.81, "learning_rate": 2.0934256537427382e-05, "loss": 1.1635, "step": 130200 }, { "epoch": 1.82, "learning_rate": 2.0927290711768065e-05, "loss": 1.2152, "step": 130300 }, { "epoch": 1.82, "learning_rate": 2.092032488610875e-05, "loss": 1.1814, "step": 130400 }, { "epoch": 1.82, "learning_rate": 2.0913359060449435e-05, "loss": 1.2153, "step": 130500 }, { "epoch": 1.82, "learning_rate": 2.090639323479012e-05, "loss": 1.1722, "step": 130600 }, { "epoch": 1.82, "learning_rate": 2.0899427409130804e-05, "loss": 1.1957, "step": 130700 }, { "epoch": 1.82, "learning_rate": 2.089246158347149e-05, "loss": 1.1688, "step": 130800 }, { "epoch": 1.82, "learning_rate": 2.0885495757812173e-05, "loss": 1.2204, "step": 130900 }, { "epoch": 1.83, "learning_rate": 2.087852993215286e-05, "loss": 1.1939, "step": 131000 }, { "epoch": 1.83, "learning_rate": 2.0871564106493543e-05, "loss": 1.1904, "step": 131100 }, { "epoch": 1.83, "learning_rate": 2.0864667939090822e-05, "loss": 1.187, "step": 131200 }, { "epoch": 1.83, "learning_rate": 2.0857702113431505e-05, "loss": 1.1394, "step": 131300 }, { "epoch": 1.83, "learning_rate": 2.0850736287772192e-05, "loss": 1.1678, "step": 131400 }, { "epoch": 1.83, "learning_rate": 2.0843770462112875e-05, "loss": 1.2065, "step": 131500 }, { "epoch": 1.83, "learning_rate": 2.083680463645356e-05, "loss": 1.2385, "step": 131600 }, { "epoch": 1.83, "learning_rate": 2.0829838810794244e-05, "loss": 1.1825, "step": 131700 }, { "epoch": 1.84, "learning_rate": 2.082287298513493e-05, "loss": 1.2232, "step": 131800 }, { "epoch": 1.84, "learning_rate": 2.0815907159475614e-05, "loss": 1.2067, "step": 131900 }, { "epoch": 1.84, "learning_rate": 2.08089413338163e-05, "loss": 1.2003, "step": 132000 }, { "epoch": 1.84, "learning_rate": 2.0801975508156983e-05, "loss": 1.1924, "step": 132100 }, { "epoch": 1.84, "learning_rate": 2.0795009682497666e-05, "loss": 1.1814, "step": 132200 }, { "epoch": 1.84, "learning_rate": 2.078804385683835e-05, "loss": 1.2183, "step": 132300 }, { "epoch": 1.84, "learning_rate": 2.0781078031179035e-05, "loss": 1.1836, "step": 132400 }, { "epoch": 1.85, "learning_rate": 2.0774112205519718e-05, "loss": 1.2281, "step": 132500 }, { "epoch": 1.85, "learning_rate": 2.0767146379860405e-05, "loss": 1.2025, "step": 132600 }, { "epoch": 1.85, "learning_rate": 2.0760180554201088e-05, "loss": 1.2013, "step": 132700 }, { "epoch": 1.85, "learning_rate": 2.0753214728541774e-05, "loss": 1.2316, "step": 132800 }, { "epoch": 1.85, "learning_rate": 2.074624890288246e-05, "loss": 1.2001, "step": 132900 }, { "epoch": 1.85, "learning_rate": 2.0739283077223143e-05, "loss": 1.2178, "step": 133000 }, { "epoch": 1.85, "learning_rate": 2.073231725156383e-05, "loss": 1.1835, "step": 133100 }, { "epoch": 1.86, "learning_rate": 2.0725351425904513e-05, "loss": 1.2126, "step": 133200 }, { "epoch": 1.86, "learning_rate": 2.07183856002452e-05, "loss": 1.1815, "step": 133300 }, { "epoch": 1.86, "learning_rate": 2.0711419774585882e-05, "loss": 1.1677, "step": 133400 }, { "epoch": 1.86, "learning_rate": 2.070445394892657e-05, "loss": 1.1902, "step": 133500 }, { "epoch": 1.86, "learning_rate": 2.069748812326725e-05, "loss": 1.1912, "step": 133600 }, { "epoch": 1.86, "learning_rate": 2.0690522297607938e-05, "loss": 1.2141, "step": 133700 }, { "epoch": 1.86, "learning_rate": 2.068355647194862e-05, "loss": 1.229, "step": 133800 }, { "epoch": 1.87, "learning_rate": 2.0676590646289307e-05, "loss": 1.1643, "step": 133900 }, { "epoch": 1.87, "learning_rate": 2.066962482062999e-05, "loss": 1.2217, "step": 134000 }, { "epoch": 1.87, "learning_rate": 2.0662658994970677e-05, "loss": 1.164, "step": 134100 }, { "epoch": 1.87, "learning_rate": 2.065569316931136e-05, "loss": 1.2169, "step": 134200 }, { "epoch": 1.87, "learning_rate": 2.0648727343652046e-05, "loss": 1.2057, "step": 134300 }, { "epoch": 1.87, "learning_rate": 2.0641761517992726e-05, "loss": 1.1871, "step": 134400 }, { "epoch": 1.87, "learning_rate": 2.063486535059001e-05, "loss": 1.2043, "step": 134500 }, { "epoch": 1.88, "learning_rate": 2.062789952493069e-05, "loss": 1.2072, "step": 134600 }, { "epoch": 1.88, "learning_rate": 2.0620933699271375e-05, "loss": 1.2292, "step": 134700 }, { "epoch": 1.88, "learning_rate": 2.0613967873612058e-05, "loss": 1.2426, "step": 134800 }, { "epoch": 1.88, "learning_rate": 2.0607002047952744e-05, "loss": 1.1745, "step": 134900 }, { "epoch": 1.88, "learning_rate": 2.0600036222293427e-05, "loss": 1.2162, "step": 135000 }, { "epoch": 1.88, "learning_rate": 2.0593070396634113e-05, "loss": 1.1857, "step": 135100 }, { "epoch": 1.88, "learning_rate": 2.0586104570974796e-05, "loss": 1.2112, "step": 135200 }, { "epoch": 1.88, "learning_rate": 2.0579138745315483e-05, "loss": 1.1718, "step": 135300 }, { "epoch": 1.89, "learning_rate": 2.0572172919656166e-05, "loss": 1.189, "step": 135400 }, { "epoch": 1.89, "learning_rate": 2.0565207093996852e-05, "loss": 1.1931, "step": 135500 }, { "epoch": 1.89, "learning_rate": 2.0558241268337535e-05, "loss": 1.1882, "step": 135600 }, { "epoch": 1.89, "learning_rate": 2.055127544267822e-05, "loss": 1.1711, "step": 135700 }, { "epoch": 1.89, "learning_rate": 2.0544309617018905e-05, "loss": 1.1907, "step": 135800 }, { "epoch": 1.89, "learning_rate": 2.053734379135959e-05, "loss": 1.2113, "step": 135900 }, { "epoch": 1.89, "learning_rate": 2.0530377965700274e-05, "loss": 1.2093, "step": 136000 }, { "epoch": 1.9, "learning_rate": 2.052341214004096e-05, "loss": 1.1927, "step": 136100 }, { "epoch": 1.9, "learning_rate": 2.0516446314381647e-05, "loss": 1.2145, "step": 136200 }, { "epoch": 1.9, "learning_rate": 2.050948048872233e-05, "loss": 1.1858, "step": 136300 }, { "epoch": 1.9, "learning_rate": 2.0502514663063016e-05, "loss": 1.1903, "step": 136400 }, { "epoch": 1.9, "learning_rate": 2.04955488374037e-05, "loss": 1.1671, "step": 136500 }, { "epoch": 1.9, "learning_rate": 2.0488583011744385e-05, "loss": 1.1913, "step": 136600 }, { "epoch": 1.9, "learning_rate": 2.048161718608507e-05, "loss": 1.2293, "step": 136700 }, { "epoch": 1.91, "learning_rate": 2.047465136042575e-05, "loss": 1.1974, "step": 136800 }, { "epoch": 1.91, "learning_rate": 2.0467685534766434e-05, "loss": 1.2024, "step": 136900 }, { "epoch": 1.91, "learning_rate": 2.046071970910712e-05, "loss": 1.2, "step": 137000 }, { "epoch": 1.91, "learning_rate": 2.0453753883447804e-05, "loss": 1.1975, "step": 137100 }, { "epoch": 1.91, "learning_rate": 2.044678805778849e-05, "loss": 1.2117, "step": 137200 }, { "epoch": 1.91, "learning_rate": 2.0439822232129173e-05, "loss": 1.1786, "step": 137300 }, { "epoch": 1.91, "learning_rate": 2.043285640646986e-05, "loss": 1.193, "step": 137400 }, { "epoch": 1.92, "learning_rate": 2.0425890580810542e-05, "loss": 1.2061, "step": 137500 }, { "epoch": 1.92, "learning_rate": 2.041892475515123e-05, "loss": 1.1696, "step": 137600 }, { "epoch": 1.92, "learning_rate": 2.0411958929491912e-05, "loss": 1.1869, "step": 137700 }, { "epoch": 1.92, "learning_rate": 2.0404993103832598e-05, "loss": 1.2024, "step": 137800 }, { "epoch": 1.92, "learning_rate": 2.039802727817328e-05, "loss": 1.1441, "step": 137900 }, { "epoch": 1.92, "learning_rate": 2.0391061452513968e-05, "loss": 1.1947, "step": 138000 }, { "epoch": 1.92, "learning_rate": 2.038409562685465e-05, "loss": 1.1952, "step": 138100 }, { "epoch": 1.93, "learning_rate": 2.0377129801195337e-05, "loss": 1.2063, "step": 138200 }, { "epoch": 1.93, "learning_rate": 2.037016397553602e-05, "loss": 1.1833, "step": 138300 }, { "epoch": 1.93, "learning_rate": 2.0363198149876706e-05, "loss": 1.199, "step": 138400 }, { "epoch": 1.93, "learning_rate": 2.035623232421739e-05, "loss": 1.176, "step": 138500 }, { "epoch": 1.93, "learning_rate": 2.0349266498558076e-05, "loss": 1.1947, "step": 138600 }, { "epoch": 1.93, "learning_rate": 2.0342300672898762e-05, "loss": 1.1985, "step": 138700 }, { "epoch": 1.93, "learning_rate": 2.0335334847239445e-05, "loss": 1.19, "step": 138800 }, { "epoch": 1.94, "learning_rate": 2.032836902158013e-05, "loss": 1.1761, "step": 138900 }, { "epoch": 1.94, "learning_rate": 2.0321472854177408e-05, "loss": 1.1637, "step": 139000 }, { "epoch": 1.94, "learning_rate": 2.031450702851809e-05, "loss": 1.1896, "step": 139100 }, { "epoch": 1.94, "learning_rate": 2.0307541202858777e-05, "loss": 1.1838, "step": 139200 }, { "epoch": 1.94, "learning_rate": 2.0300575377199457e-05, "loss": 1.2248, "step": 139300 }, { "epoch": 1.94, "learning_rate": 2.0293609551540143e-05, "loss": 1.2136, "step": 139400 }, { "epoch": 1.94, "learning_rate": 2.028664372588083e-05, "loss": 1.1767, "step": 139500 }, { "epoch": 1.94, "learning_rate": 2.0279677900221513e-05, "loss": 1.2188, "step": 139600 }, { "epoch": 1.95, "learning_rate": 2.02727120745622e-05, "loss": 1.1943, "step": 139700 }, { "epoch": 1.95, "learning_rate": 2.0265746248902882e-05, "loss": 1.1964, "step": 139800 }, { "epoch": 1.95, "learning_rate": 2.0258780423243568e-05, "loss": 1.1937, "step": 139900 }, { "epoch": 1.95, "learning_rate": 2.025181459758425e-05, "loss": 1.1766, "step": 140000 }, { "epoch": 1.95, "learning_rate": 2.0244848771924938e-05, "loss": 1.2146, "step": 140100 }, { "epoch": 1.95, "learning_rate": 2.023788294626562e-05, "loss": 1.2048, "step": 140200 }, { "epoch": 1.95, "learning_rate": 2.0230917120606307e-05, "loss": 1.1679, "step": 140300 }, { "epoch": 1.96, "learning_rate": 2.022395129494699e-05, "loss": 1.2035, "step": 140400 }, { "epoch": 1.96, "learning_rate": 2.0216985469287676e-05, "loss": 1.1797, "step": 140500 }, { "epoch": 1.96, "learning_rate": 2.021001964362836e-05, "loss": 1.1798, "step": 140600 }, { "epoch": 1.96, "learning_rate": 2.0203053817969046e-05, "loss": 1.1807, "step": 140700 }, { "epoch": 1.96, "learning_rate": 2.0196157650566322e-05, "loss": 1.2107, "step": 140800 }, { "epoch": 1.96, "learning_rate": 2.018919182490701e-05, "loss": 1.1618, "step": 140900 }, { "epoch": 1.96, "learning_rate": 2.018222599924769e-05, "loss": 1.18, "step": 141000 }, { "epoch": 1.97, "learning_rate": 2.0175260173588378e-05, "loss": 1.257, "step": 141100 }, { "epoch": 1.97, "learning_rate": 2.016829434792906e-05, "loss": 1.1782, "step": 141200 }, { "epoch": 1.97, "learning_rate": 2.0161328522269747e-05, "loss": 1.2087, "step": 141300 }, { "epoch": 1.97, "learning_rate": 2.015436269661043e-05, "loss": 1.1758, "step": 141400 }, { "epoch": 1.97, "learning_rate": 2.0147396870951117e-05, "loss": 1.1777, "step": 141500 }, { "epoch": 1.97, "learning_rate": 2.01404310452918e-05, "loss": 1.1925, "step": 141600 }, { "epoch": 1.97, "learning_rate": 2.0133465219632483e-05, "loss": 1.1688, "step": 141700 }, { "epoch": 1.98, "learning_rate": 2.0126499393973166e-05, "loss": 1.1683, "step": 141800 }, { "epoch": 1.98, "learning_rate": 2.0119533568313852e-05, "loss": 1.156, "step": 141900 }, { "epoch": 1.98, "learning_rate": 2.0112567742654535e-05, "loss": 1.1716, "step": 142000 }, { "epoch": 1.98, "learning_rate": 2.010560191699522e-05, "loss": 1.1522, "step": 142100 }, { "epoch": 1.98, "learning_rate": 2.0098636091335904e-05, "loss": 1.184, "step": 142200 }, { "epoch": 1.98, "learning_rate": 2.009167026567659e-05, "loss": 1.1877, "step": 142300 }, { "epoch": 1.98, "learning_rate": 2.0084704440017274e-05, "loss": 1.1699, "step": 142400 }, { "epoch": 1.99, "learning_rate": 2.007773861435796e-05, "loss": 1.1616, "step": 142500 }, { "epoch": 1.99, "learning_rate": 2.0070772788698643e-05, "loss": 1.1984, "step": 142600 }, { "epoch": 1.99, "learning_rate": 2.006380696303933e-05, "loss": 1.1819, "step": 142700 }, { "epoch": 1.99, "learning_rate": 2.0056841137380016e-05, "loss": 1.2229, "step": 142800 }, { "epoch": 1.99, "learning_rate": 2.00498753117207e-05, "loss": 1.1845, "step": 142900 }, { "epoch": 1.99, "learning_rate": 2.0042909486061385e-05, "loss": 1.1822, "step": 143000 }, { "epoch": 1.99, "learning_rate": 2.003601331865866e-05, "loss": 1.2037, "step": 143100 }, { "epoch": 2.0, "learning_rate": 2.0029047492999345e-05, "loss": 1.1949, "step": 143200 }, { "epoch": 2.0, "learning_rate": 2.0022151325596624e-05, "loss": 1.1887, "step": 143300 }, { "epoch": 2.0, "learning_rate": 2.00152551581939e-05, "loss": 1.1854, "step": 143400 }, { "epoch": 2.0, "learning_rate": 2.0008289332534587e-05, "loss": 1.191, "step": 143500 }, { "epoch": 2.0, "eval_gen_len": 20.0, "eval_loss": 1.1614574193954468, "eval_rouge1": 11.8484, "eval_rouge2": 3.363, "eval_rougeL": 11.4175, "eval_rougeLsum": 11.5037, "eval_runtime": 1532.2202, "eval_samples_per_second": 8.725, "eval_steps_per_second": 2.181, "step": 143558 }, { "epoch": 2.0, "learning_rate": 2.000132350687527e-05, "loss": 1.1515, "step": 143600 }, { "epoch": 2.0, "learning_rate": 1.9994357681215957e-05, "loss": 1.0722, "step": 143700 }, { "epoch": 2.0, "learning_rate": 1.998739185555664e-05, "loss": 1.0699, "step": 143800 }, { "epoch": 2.0, "learning_rate": 1.9980426029897326e-05, "loss": 1.0917, "step": 143900 }, { "epoch": 2.01, "learning_rate": 1.997346020423801e-05, "loss": 1.0699, "step": 144000 }, { "epoch": 2.01, "learning_rate": 1.9966494378578695e-05, "loss": 1.0534, "step": 144100 }, { "epoch": 2.01, "learning_rate": 1.995952855291938e-05, "loss": 1.0903, "step": 144200 }, { "epoch": 2.01, "learning_rate": 1.9952562727260065e-05, "loss": 1.0532, "step": 144300 }, { "epoch": 2.01, "learning_rate": 1.9945596901600748e-05, "loss": 1.0783, "step": 144400 }, { "epoch": 2.01, "learning_rate": 1.993863107594143e-05, "loss": 1.0558, "step": 144500 }, { "epoch": 2.01, "learning_rate": 1.9931665250282114e-05, "loss": 1.0627, "step": 144600 }, { "epoch": 2.02, "learning_rate": 1.99246994246228e-05, "loss": 1.1152, "step": 144700 }, { "epoch": 2.02, "learning_rate": 1.9917733598963483e-05, "loss": 1.0542, "step": 144800 }, { "epoch": 2.02, "learning_rate": 1.991076777330417e-05, "loss": 1.0506, "step": 144900 }, { "epoch": 2.02, "learning_rate": 1.9903801947644852e-05, "loss": 1.0801, "step": 145000 }, { "epoch": 2.02, "learning_rate": 1.989683612198554e-05, "loss": 1.0663, "step": 145100 }, { "epoch": 2.02, "learning_rate": 1.9889870296326225e-05, "loss": 1.1189, "step": 145200 }, { "epoch": 2.02, "learning_rate": 1.9882904470666908e-05, "loss": 1.0802, "step": 145300 }, { "epoch": 2.03, "learning_rate": 1.9875938645007595e-05, "loss": 1.0668, "step": 145400 }, { "epoch": 2.03, "learning_rate": 1.9868972819348278e-05, "loss": 1.0909, "step": 145500 }, { "epoch": 2.03, "learning_rate": 1.9862006993688964e-05, "loss": 1.0837, "step": 145600 }, { "epoch": 2.03, "learning_rate": 1.9855041168029647e-05, "loss": 1.1061, "step": 145700 }, { "epoch": 2.03, "learning_rate": 1.9848075342370333e-05, "loss": 1.0728, "step": 145800 }, { "epoch": 2.03, "learning_rate": 1.9841109516711016e-05, "loss": 1.0877, "step": 145900 }, { "epoch": 2.03, "learning_rate": 1.9834143691051703e-05, "loss": 1.0797, "step": 146000 }, { "epoch": 2.04, "learning_rate": 1.9827177865392386e-05, "loss": 1.0839, "step": 146100 }, { "epoch": 2.04, "learning_rate": 1.9820212039733072e-05, "loss": 1.0552, "step": 146200 }, { "epoch": 2.04, "learning_rate": 1.9813246214073755e-05, "loss": 1.1046, "step": 146300 }, { "epoch": 2.04, "learning_rate": 1.980628038841444e-05, "loss": 1.066, "step": 146400 }, { "epoch": 2.04, "learning_rate": 1.9799314562755124e-05, "loss": 1.0863, "step": 146500 }, { "epoch": 2.04, "learning_rate": 1.979234873709581e-05, "loss": 1.0575, "step": 146600 }, { "epoch": 2.04, "learning_rate": 1.978538291143649e-05, "loss": 1.0753, "step": 146700 }, { "epoch": 2.05, "learning_rate": 1.9778417085777177e-05, "loss": 1.0724, "step": 146800 }, { "epoch": 2.05, "learning_rate": 1.977145126011786e-05, "loss": 1.0711, "step": 146900 }, { "epoch": 2.05, "learning_rate": 1.9764485434458546e-05, "loss": 1.0756, "step": 147000 }, { "epoch": 2.05, "learning_rate": 1.975751960879923e-05, "loss": 1.0942, "step": 147100 }, { "epoch": 2.05, "learning_rate": 1.9750553783139915e-05, "loss": 1.0993, "step": 147200 }, { "epoch": 2.05, "learning_rate": 1.97435879574806e-05, "loss": 1.0571, "step": 147300 }, { "epoch": 2.05, "learning_rate": 1.9736622131821285e-05, "loss": 1.0705, "step": 147400 }, { "epoch": 2.05, "learning_rate": 1.9729656306161968e-05, "loss": 1.0765, "step": 147500 }, { "epoch": 2.06, "learning_rate": 1.9722690480502654e-05, "loss": 1.0937, "step": 147600 }, { "epoch": 2.06, "learning_rate": 1.971572465484334e-05, "loss": 1.0741, "step": 147700 }, { "epoch": 2.06, "learning_rate": 1.9708758829184024e-05, "loss": 1.08, "step": 147800 }, { "epoch": 2.06, "learning_rate": 1.970179300352471e-05, "loss": 1.0438, "step": 147900 }, { "epoch": 2.06, "learning_rate": 1.9694827177865393e-05, "loss": 1.086, "step": 148000 }, { "epoch": 2.06, "learning_rate": 1.968786135220608e-05, "loss": 1.0679, "step": 148100 }, { "epoch": 2.06, "learning_rate": 1.9680895526546762e-05, "loss": 1.0575, "step": 148200 }, { "epoch": 2.07, "learning_rate": 1.967392970088745e-05, "loss": 1.0817, "step": 148300 }, { "epoch": 2.07, "learning_rate": 1.966696387522813e-05, "loss": 1.0588, "step": 148400 }, { "epoch": 2.07, "learning_rate": 1.9659998049568818e-05, "loss": 1.0579, "step": 148500 }, { "epoch": 2.07, "learning_rate": 1.96530322239095e-05, "loss": 1.1229, "step": 148600 }, { "epoch": 2.07, "learning_rate": 1.9646066398250187e-05, "loss": 1.0805, "step": 148700 }, { "epoch": 2.07, "learning_rate": 1.963910057259087e-05, "loss": 1.0663, "step": 148800 }, { "epoch": 2.07, "learning_rate": 1.9632134746931553e-05, "loss": 1.0779, "step": 148900 }, { "epoch": 2.08, "learning_rate": 1.9625168921272236e-05, "loss": 1.0883, "step": 149000 }, { "epoch": 2.08, "learning_rate": 1.9618203095612923e-05, "loss": 1.0812, "step": 149100 }, { "epoch": 2.08, "learning_rate": 1.9611237269953606e-05, "loss": 1.0774, "step": 149200 }, { "epoch": 2.08, "learning_rate": 1.9604271444294292e-05, "loss": 1.0757, "step": 149300 }, { "epoch": 2.08, "learning_rate": 1.9597305618634975e-05, "loss": 1.0626, "step": 149400 }, { "epoch": 2.08, "learning_rate": 1.959033979297566e-05, "loss": 1.0566, "step": 149500 }, { "epoch": 2.08, "learning_rate": 1.9583373967316344e-05, "loss": 1.0388, "step": 149600 }, { "epoch": 2.09, "learning_rate": 1.957640814165703e-05, "loss": 1.0847, "step": 149700 }, { "epoch": 2.09, "learning_rate": 1.9569442315997714e-05, "loss": 1.0748, "step": 149800 }, { "epoch": 2.09, "learning_rate": 1.95624764903384e-05, "loss": 1.0418, "step": 149900 }, { "epoch": 2.09, "learning_rate": 1.9555580322935677e-05, "loss": 1.0878, "step": 150000 }, { "epoch": 2.09, "learning_rate": 1.9548614497276363e-05, "loss": 1.0756, "step": 150100 }, { "epoch": 2.09, "learning_rate": 1.9541648671617046e-05, "loss": 1.11, "step": 150200 }, { "epoch": 2.09, "learning_rate": 1.9534682845957732e-05, "loss": 1.0875, "step": 150300 }, { "epoch": 2.1, "learning_rate": 1.9527717020298415e-05, "loss": 1.1018, "step": 150400 }, { "epoch": 2.1, "learning_rate": 1.9520751194639102e-05, "loss": 1.0643, "step": 150500 }, { "epoch": 2.1, "learning_rate": 1.9513855027236378e-05, "loss": 1.0752, "step": 150600 }, { "epoch": 2.1, "learning_rate": 1.9506889201577065e-05, "loss": 1.0931, "step": 150700 }, { "epoch": 2.1, "learning_rate": 1.9499923375917748e-05, "loss": 1.0546, "step": 150800 }, { "epoch": 2.1, "learning_rate": 1.9492957550258434e-05, "loss": 1.0705, "step": 150900 }, { "epoch": 2.1, "learning_rate": 1.9485991724599117e-05, "loss": 1.1041, "step": 151000 }, { "epoch": 2.11, "learning_rate": 1.9479025898939803e-05, "loss": 1.052, "step": 151100 }, { "epoch": 2.11, "learning_rate": 1.9472060073280486e-05, "loss": 1.0799, "step": 151200 }, { "epoch": 2.11, "learning_rate": 1.9465094247621173e-05, "loss": 1.0159, "step": 151300 }, { "epoch": 2.11, "learning_rate": 1.9458128421961856e-05, "loss": 1.0645, "step": 151400 }, { "epoch": 2.11, "learning_rate": 1.945116259630254e-05, "loss": 1.0853, "step": 151500 }, { "epoch": 2.11, "learning_rate": 1.9444196770643225e-05, "loss": 1.0862, "step": 151600 }, { "epoch": 2.11, "learning_rate": 1.9437230944983908e-05, "loss": 1.0347, "step": 151700 }, { "epoch": 2.11, "learning_rate": 1.9430265119324594e-05, "loss": 1.0947, "step": 151800 }, { "epoch": 2.12, "learning_rate": 1.9423299293665277e-05, "loss": 1.0781, "step": 151900 }, { "epoch": 2.12, "learning_rate": 1.9416333468005964e-05, "loss": 1.0664, "step": 152000 }, { "epoch": 2.12, "learning_rate": 1.9409367642346647e-05, "loss": 1.0936, "step": 152100 }, { "epoch": 2.12, "learning_rate": 1.9402401816687333e-05, "loss": 1.0828, "step": 152200 }, { "epoch": 2.12, "learning_rate": 1.9395435991028016e-05, "loss": 1.0902, "step": 152300 }, { "epoch": 2.12, "learning_rate": 1.9388470165368702e-05, "loss": 1.0435, "step": 152400 }, { "epoch": 2.12, "learning_rate": 1.9381504339709385e-05, "loss": 1.07, "step": 152500 }, { "epoch": 2.13, "learning_rate": 1.9374538514050072e-05, "loss": 1.0898, "step": 152600 }, { "epoch": 2.13, "learning_rate": 1.9367572688390755e-05, "loss": 1.0498, "step": 152700 }, { "epoch": 2.13, "learning_rate": 1.936060686273144e-05, "loss": 1.0636, "step": 152800 }, { "epoch": 2.13, "learning_rate": 1.9353641037072124e-05, "loss": 1.0898, "step": 152900 }, { "epoch": 2.13, "learning_rate": 1.934667521141281e-05, "loss": 1.0735, "step": 153000 }, { "epoch": 2.13, "learning_rate": 1.9339709385753494e-05, "loss": 1.0621, "step": 153100 }, { "epoch": 2.13, "learning_rate": 1.933274356009418e-05, "loss": 1.0774, "step": 153200 }, { "epoch": 2.14, "learning_rate": 1.9325777734434863e-05, "loss": 1.0564, "step": 153300 }, { "epoch": 2.14, "learning_rate": 1.931881190877555e-05, "loss": 1.1064, "step": 153400 }, { "epoch": 2.14, "learning_rate": 1.9311846083116232e-05, "loss": 1.0568, "step": 153500 }, { "epoch": 2.14, "learning_rate": 1.930488025745692e-05, "loss": 1.0978, "step": 153600 }, { "epoch": 2.14, "learning_rate": 1.92979144317976e-05, "loss": 1.0948, "step": 153700 }, { "epoch": 2.14, "learning_rate": 1.9290948606138285e-05, "loss": 1.0576, "step": 153800 }, { "epoch": 2.14, "learning_rate": 1.9283982780478968e-05, "loss": 1.0557, "step": 153900 }, { "epoch": 2.15, "learning_rate": 1.9277016954819654e-05, "loss": 1.0683, "step": 154000 }, { "epoch": 2.15, "learning_rate": 1.927005112916034e-05, "loss": 1.1081, "step": 154100 }, { "epoch": 2.15, "learning_rate": 1.9263085303501023e-05, "loss": 1.0581, "step": 154200 }, { "epoch": 2.15, "learning_rate": 1.925611947784171e-05, "loss": 1.0605, "step": 154300 }, { "epoch": 2.15, "learning_rate": 1.9249153652182393e-05, "loss": 1.0912, "step": 154400 }, { "epoch": 2.15, "learning_rate": 1.924218782652308e-05, "loss": 1.0877, "step": 154500 }, { "epoch": 2.15, "learning_rate": 1.9235291659120356e-05, "loss": 1.0878, "step": 154600 }, { "epoch": 2.16, "learning_rate": 1.922832583346104e-05, "loss": 1.1009, "step": 154700 }, { "epoch": 2.16, "learning_rate": 1.9221360007801725e-05, "loss": 1.0887, "step": 154800 }, { "epoch": 2.16, "learning_rate": 1.921439418214241e-05, "loss": 1.0782, "step": 154900 }, { "epoch": 2.16, "learning_rate": 1.9207428356483094e-05, "loss": 1.0584, "step": 155000 }, { "epoch": 2.16, "learning_rate": 1.920046253082378e-05, "loss": 1.0652, "step": 155100 }, { "epoch": 2.16, "learning_rate": 1.9193496705164464e-05, "loss": 1.0993, "step": 155200 }, { "epoch": 2.16, "learning_rate": 1.918653087950515e-05, "loss": 1.1073, "step": 155300 }, { "epoch": 2.16, "learning_rate": 1.9179565053845833e-05, "loss": 1.0592, "step": 155400 }, { "epoch": 2.17, "learning_rate": 1.917259922818652e-05, "loss": 1.0819, "step": 155500 }, { "epoch": 2.17, "learning_rate": 1.9165633402527202e-05, "loss": 1.0764, "step": 155600 }, { "epoch": 2.17, "learning_rate": 1.915866757686789e-05, "loss": 1.103, "step": 155700 }, { "epoch": 2.17, "learning_rate": 1.9151701751208572e-05, "loss": 1.0686, "step": 155800 }, { "epoch": 2.17, "learning_rate": 1.9144735925549258e-05, "loss": 1.106, "step": 155900 }, { "epoch": 2.17, "learning_rate": 1.913777009988994e-05, "loss": 1.0904, "step": 156000 }, { "epoch": 2.17, "learning_rate": 1.9130804274230627e-05, "loss": 1.0756, "step": 156100 }, { "epoch": 2.18, "learning_rate": 1.9123838448571307e-05, "loss": 1.0832, "step": 156200 }, { "epoch": 2.18, "learning_rate": 1.9116872622911993e-05, "loss": 1.0617, "step": 156300 }, { "epoch": 2.18, "learning_rate": 1.9109906797252676e-05, "loss": 1.0715, "step": 156400 }, { "epoch": 2.18, "learning_rate": 1.9102940971593363e-05, "loss": 1.0961, "step": 156500 }, { "epoch": 2.18, "learning_rate": 1.9095975145934046e-05, "loss": 1.0632, "step": 156600 }, { "epoch": 2.18, "learning_rate": 1.9089009320274732e-05, "loss": 1.0478, "step": 156700 }, { "epoch": 2.18, "learning_rate": 1.9082043494615415e-05, "loss": 1.0861, "step": 156800 }, { "epoch": 2.19, "learning_rate": 1.90750776689561e-05, "loss": 1.0971, "step": 156900 }, { "epoch": 2.19, "learning_rate": 1.9068181501553378e-05, "loss": 1.0603, "step": 157000 }, { "epoch": 2.19, "learning_rate": 1.9061215675894064e-05, "loss": 1.0885, "step": 157100 }, { "epoch": 2.19, "learning_rate": 1.9054249850234747e-05, "loss": 1.0893, "step": 157200 }, { "epoch": 2.19, "learning_rate": 1.9047284024575434e-05, "loss": 1.085, "step": 157300 }, { "epoch": 2.19, "learning_rate": 1.9040318198916117e-05, "loss": 1.0892, "step": 157400 }, { "epoch": 2.19, "learning_rate": 1.9033352373256803e-05, "loss": 1.0486, "step": 157500 }, { "epoch": 2.2, "learning_rate": 1.9026386547597486e-05, "loss": 1.1074, "step": 157600 }, { "epoch": 2.2, "learning_rate": 1.9019420721938172e-05, "loss": 1.0732, "step": 157700 }, { "epoch": 2.2, "learning_rate": 1.9012454896278855e-05, "loss": 1.0925, "step": 157800 }, { "epoch": 2.2, "learning_rate": 1.9005489070619542e-05, "loss": 1.0564, "step": 157900 }, { "epoch": 2.2, "learning_rate": 1.8998523244960225e-05, "loss": 1.0608, "step": 158000 }, { "epoch": 2.2, "learning_rate": 1.899155741930091e-05, "loss": 1.0669, "step": 158100 }, { "epoch": 2.2, "learning_rate": 1.8984591593641598e-05, "loss": 1.066, "step": 158200 }, { "epoch": 2.21, "learning_rate": 1.897762576798228e-05, "loss": 1.08, "step": 158300 }, { "epoch": 2.21, "learning_rate": 1.8970659942322967e-05, "loss": 1.0728, "step": 158400 }, { "epoch": 2.21, "learning_rate": 1.896369411666365e-05, "loss": 1.0898, "step": 158500 }, { "epoch": 2.21, "learning_rate": 1.8956728291004333e-05, "loss": 1.0327, "step": 158600 }, { "epoch": 2.21, "learning_rate": 1.8949762465345016e-05, "loss": 1.1012, "step": 158700 }, { "epoch": 2.21, "learning_rate": 1.8942796639685702e-05, "loss": 1.1121, "step": 158800 }, { "epoch": 2.21, "learning_rate": 1.8935830814026385e-05, "loss": 1.0478, "step": 158900 }, { "epoch": 2.22, "learning_rate": 1.892886498836707e-05, "loss": 1.0447, "step": 159000 }, { "epoch": 2.22, "learning_rate": 1.8921899162707755e-05, "loss": 1.0683, "step": 159100 }, { "epoch": 2.22, "learning_rate": 1.891493333704844e-05, "loss": 1.0817, "step": 159200 }, { "epoch": 2.22, "learning_rate": 1.8907967511389124e-05, "loss": 1.0836, "step": 159300 }, { "epoch": 2.22, "learning_rate": 1.8901071343986404e-05, "loss": 1.1046, "step": 159400 }, { "epoch": 2.22, "learning_rate": 1.8894105518327087e-05, "loss": 1.1115, "step": 159500 }, { "epoch": 2.22, "learning_rate": 1.8887139692667773e-05, "loss": 1.1063, "step": 159600 }, { "epoch": 2.22, "learning_rate": 1.8880173867008456e-05, "loss": 1.1138, "step": 159700 }, { "epoch": 2.23, "learning_rate": 1.8873208041349143e-05, "loss": 1.1093, "step": 159800 }, { "epoch": 2.23, "learning_rate": 1.8866242215689826e-05, "loss": 1.0654, "step": 159900 }, { "epoch": 2.23, "learning_rate": 1.8859276390030512e-05, "loss": 1.0395, "step": 160000 }, { "epoch": 2.23, "learning_rate": 1.8852310564371195e-05, "loss": 1.0777, "step": 160100 }, { "epoch": 2.23, "learning_rate": 1.884534473871188e-05, "loss": 1.0917, "step": 160200 }, { "epoch": 2.23, "learning_rate": 1.8838378913052564e-05, "loss": 1.096, "step": 160300 }, { "epoch": 2.23, "learning_rate": 1.883141308739325e-05, "loss": 1.0607, "step": 160400 }, { "epoch": 2.24, "learning_rate": 1.8824447261733934e-05, "loss": 1.0552, "step": 160500 }, { "epoch": 2.24, "learning_rate": 1.881748143607462e-05, "loss": 1.0705, "step": 160600 }, { "epoch": 2.24, "learning_rate": 1.8810515610415303e-05, "loss": 1.0704, "step": 160700 }, { "epoch": 2.24, "learning_rate": 1.880354978475599e-05, "loss": 1.0819, "step": 160800 }, { "epoch": 2.24, "learning_rate": 1.8796583959096672e-05, "loss": 1.0863, "step": 160900 }, { "epoch": 2.24, "learning_rate": 1.878961813343736e-05, "loss": 1.0816, "step": 161000 }, { "epoch": 2.24, "learning_rate": 1.878265230777804e-05, "loss": 1.0417, "step": 161100 }, { "epoch": 2.25, "learning_rate": 1.8775686482118725e-05, "loss": 1.08, "step": 161200 }, { "epoch": 2.25, "learning_rate": 1.8768720656459408e-05, "loss": 1.0572, "step": 161300 }, { "epoch": 2.25, "learning_rate": 1.8761754830800094e-05, "loss": 1.0864, "step": 161400 }, { "epoch": 2.25, "learning_rate": 1.875478900514078e-05, "loss": 1.0694, "step": 161500 }, { "epoch": 2.25, "learning_rate": 1.8747892837738057e-05, "loss": 1.0714, "step": 161600 }, { "epoch": 2.25, "learning_rate": 1.8740996670335333e-05, "loss": 1.0979, "step": 161700 }, { "epoch": 2.25, "learning_rate": 1.873403084467602e-05, "loss": 1.0908, "step": 161800 }, { "epoch": 2.26, "learning_rate": 1.8727065019016703e-05, "loss": 1.0693, "step": 161900 }, { "epoch": 2.26, "learning_rate": 1.872009919335739e-05, "loss": 1.0842, "step": 162000 }, { "epoch": 2.26, "learning_rate": 1.8713133367698072e-05, "loss": 1.1071, "step": 162100 }, { "epoch": 2.26, "learning_rate": 1.870616754203876e-05, "loss": 1.0485, "step": 162200 }, { "epoch": 2.26, "learning_rate": 1.869920171637944e-05, "loss": 1.0638, "step": 162300 }, { "epoch": 2.26, "learning_rate": 1.8692235890720128e-05, "loss": 1.0964, "step": 162400 }, { "epoch": 2.26, "learning_rate": 1.868527006506081e-05, "loss": 1.0537, "step": 162500 }, { "epoch": 2.27, "learning_rate": 1.8678304239401497e-05, "loss": 1.0453, "step": 162600 }, { "epoch": 2.27, "learning_rate": 1.867133841374218e-05, "loss": 1.0647, "step": 162700 }, { "epoch": 2.27, "learning_rate": 1.8664372588082867e-05, "loss": 1.1006, "step": 162800 }, { "epoch": 2.27, "learning_rate": 1.865740676242355e-05, "loss": 1.0989, "step": 162900 }, { "epoch": 2.27, "learning_rate": 1.8650440936764236e-05, "loss": 1.0766, "step": 163000 }, { "epoch": 2.27, "learning_rate": 1.8643475111104922e-05, "loss": 1.0662, "step": 163100 }, { "epoch": 2.27, "learning_rate": 1.8636509285445605e-05, "loss": 1.1073, "step": 163200 }, { "epoch": 2.28, "learning_rate": 1.862954345978629e-05, "loss": 1.0682, "step": 163300 }, { "epoch": 2.28, "learning_rate": 1.8622577634126975e-05, "loss": 1.0908, "step": 163400 }, { "epoch": 2.28, "learning_rate": 1.861561180846766e-05, "loss": 1.0464, "step": 163500 }, { "epoch": 2.28, "learning_rate": 1.8608645982808344e-05, "loss": 1.0401, "step": 163600 }, { "epoch": 2.28, "learning_rate": 1.8601680157149027e-05, "loss": 1.1031, "step": 163700 }, { "epoch": 2.28, "learning_rate": 1.859471433148971e-05, "loss": 1.0626, "step": 163800 }, { "epoch": 2.28, "learning_rate": 1.8587748505830396e-05, "loss": 1.1199, "step": 163900 }, { "epoch": 2.28, "learning_rate": 1.858078268017108e-05, "loss": 1.0946, "step": 164000 }, { "epoch": 2.29, "learning_rate": 1.8573816854511766e-05, "loss": 1.0756, "step": 164100 }, { "epoch": 2.29, "learning_rate": 1.856685102885245e-05, "loss": 1.0578, "step": 164200 }, { "epoch": 2.29, "learning_rate": 1.8559885203193135e-05, "loss": 1.0652, "step": 164300 }, { "epoch": 2.29, "learning_rate": 1.8552919377533818e-05, "loss": 1.0721, "step": 164400 }, { "epoch": 2.29, "learning_rate": 1.8546092868387688e-05, "loss": 1.0962, "step": 164500 }, { "epoch": 2.29, "learning_rate": 1.8539127042728374e-05, "loss": 1.0605, "step": 164600 }, { "epoch": 2.29, "learning_rate": 1.853216121706906e-05, "loss": 1.0535, "step": 164700 }, { "epoch": 2.3, "learning_rate": 1.8525195391409744e-05, "loss": 1.0563, "step": 164800 }, { "epoch": 2.3, "learning_rate": 1.851822956575043e-05, "loss": 1.1039, "step": 164900 }, { "epoch": 2.3, "learning_rate": 1.8511263740091113e-05, "loss": 1.0619, "step": 165000 }, { "epoch": 2.3, "learning_rate": 1.85042979144318e-05, "loss": 1.0629, "step": 165100 }, { "epoch": 2.3, "learning_rate": 1.8497332088772482e-05, "loss": 1.0738, "step": 165200 }, { "epoch": 2.3, "learning_rate": 1.849036626311317e-05, "loss": 1.0515, "step": 165300 }, { "epoch": 2.3, "learning_rate": 1.8483400437453852e-05, "loss": 1.0813, "step": 165400 }, { "epoch": 2.31, "learning_rate": 1.8476434611794538e-05, "loss": 1.0457, "step": 165500 }, { "epoch": 2.31, "learning_rate": 1.846946878613522e-05, "loss": 1.0754, "step": 165600 }, { "epoch": 2.31, "learning_rate": 1.8462502960475908e-05, "loss": 1.1107, "step": 165700 }, { "epoch": 2.31, "learning_rate": 1.845553713481659e-05, "loss": 1.0671, "step": 165800 }, { "epoch": 2.31, "learning_rate": 1.8448571309157277e-05, "loss": 1.1161, "step": 165900 }, { "epoch": 2.31, "learning_rate": 1.844160548349796e-05, "loss": 1.0534, "step": 166000 }, { "epoch": 2.31, "learning_rate": 1.8434639657838646e-05, "loss": 1.0737, "step": 166100 }, { "epoch": 2.32, "learning_rate": 1.842767383217933e-05, "loss": 1.0896, "step": 166200 }, { "epoch": 2.32, "learning_rate": 1.8420708006520012e-05, "loss": 1.0861, "step": 166300 }, { "epoch": 2.32, "learning_rate": 1.8413742180860695e-05, "loss": 1.0763, "step": 166400 }, { "epoch": 2.32, "learning_rate": 1.840677635520138e-05, "loss": 1.0291, "step": 166500 }, { "epoch": 2.32, "learning_rate": 1.8399810529542065e-05, "loss": 1.0815, "step": 166600 }, { "epoch": 2.32, "learning_rate": 1.839284470388275e-05, "loss": 1.0771, "step": 166700 }, { "epoch": 2.32, "learning_rate": 1.8385878878223434e-05, "loss": 1.0989, "step": 166800 }, { "epoch": 2.33, "learning_rate": 1.837891305256412e-05, "loss": 1.0677, "step": 166900 }, { "epoch": 2.33, "learning_rate": 1.8371947226904803e-05, "loss": 1.0529, "step": 167000 }, { "epoch": 2.33, "learning_rate": 1.836498140124549e-05, "loss": 1.0796, "step": 167100 }, { "epoch": 2.33, "learning_rate": 1.8358015575586176e-05, "loss": 1.0991, "step": 167200 }, { "epoch": 2.33, "learning_rate": 1.835104974992686e-05, "loss": 1.0839, "step": 167300 }, { "epoch": 2.33, "learning_rate": 1.8344083924267545e-05, "loss": 1.1401, "step": 167400 }, { "epoch": 2.33, "learning_rate": 1.833711809860823e-05, "loss": 1.0324, "step": 167500 }, { "epoch": 2.33, "learning_rate": 1.8330152272948915e-05, "loss": 1.0832, "step": 167600 }, { "epoch": 2.34, "learning_rate": 1.8323186447289598e-05, "loss": 1.0761, "step": 167700 }, { "epoch": 2.34, "learning_rate": 1.8316220621630284e-05, "loss": 1.0775, "step": 167800 }, { "epoch": 2.34, "learning_rate": 1.8309254795970967e-05, "loss": 1.1074, "step": 167900 }, { "epoch": 2.34, "learning_rate": 1.8302288970311654e-05, "loss": 1.0927, "step": 168000 }, { "epoch": 2.34, "learning_rate": 1.8295323144652337e-05, "loss": 1.0714, "step": 168100 }, { "epoch": 2.34, "learning_rate": 1.8288357318993023e-05, "loss": 1.0387, "step": 168200 }, { "epoch": 2.34, "learning_rate": 1.8281391493333706e-05, "loss": 1.096, "step": 168300 }, { "epoch": 2.35, "learning_rate": 1.8274425667674392e-05, "loss": 1.0691, "step": 168400 }, { "epoch": 2.35, "learning_rate": 1.8267459842015072e-05, "loss": 1.0612, "step": 168500 }, { "epoch": 2.35, "learning_rate": 1.8260494016355758e-05, "loss": 1.0746, "step": 168600 }, { "epoch": 2.35, "learning_rate": 1.825352819069644e-05, "loss": 1.0709, "step": 168700 }, { "epoch": 2.35, "learning_rate": 1.8246562365037128e-05, "loss": 1.0541, "step": 168800 }, { "epoch": 2.35, "learning_rate": 1.823959653937781e-05, "loss": 1.0693, "step": 168900 }, { "epoch": 2.35, "learning_rate": 1.8232630713718497e-05, "loss": 1.0334, "step": 169000 }, { "epoch": 2.36, "learning_rate": 1.8225734546315773e-05, "loss": 1.0684, "step": 169100 }, { "epoch": 2.36, "learning_rate": 1.821876872065646e-05, "loss": 1.1119, "step": 169200 }, { "epoch": 2.36, "learning_rate": 1.8211802894997143e-05, "loss": 1.0625, "step": 169300 }, { "epoch": 2.36, "learning_rate": 1.820483706933783e-05, "loss": 1.078, "step": 169400 }, { "epoch": 2.36, "learning_rate": 1.8197871243678512e-05, "loss": 1.0941, "step": 169500 }, { "epoch": 2.36, "learning_rate": 1.81909054180192e-05, "loss": 1.1107, "step": 169600 }, { "epoch": 2.36, "learning_rate": 1.818393959235988e-05, "loss": 1.0978, "step": 169700 }, { "epoch": 2.37, "learning_rate": 1.8176973766700568e-05, "loss": 1.0845, "step": 169800 }, { "epoch": 2.37, "learning_rate": 1.817000794104125e-05, "loss": 1.058, "step": 169900 }, { "epoch": 2.37, "learning_rate": 1.8163042115381937e-05, "loss": 1.0849, "step": 170000 }, { "epoch": 2.37, "learning_rate": 1.815607628972262e-05, "loss": 1.1009, "step": 170100 }, { "epoch": 2.37, "learning_rate": 1.8149110464063307e-05, "loss": 1.1185, "step": 170200 }, { "epoch": 2.37, "learning_rate": 1.814214463840399e-05, "loss": 1.0886, "step": 170300 }, { "epoch": 2.37, "learning_rate": 1.8135178812744676e-05, "loss": 1.0958, "step": 170400 }, { "epoch": 2.38, "learning_rate": 1.8128212987085362e-05, "loss": 1.0732, "step": 170500 }, { "epoch": 2.38, "learning_rate": 1.8121247161426045e-05, "loss": 1.0757, "step": 170600 }, { "epoch": 2.38, "learning_rate": 1.8114281335766732e-05, "loss": 1.0901, "step": 170700 }, { "epoch": 2.38, "learning_rate": 1.8107315510107415e-05, "loss": 1.1048, "step": 170800 }, { "epoch": 2.38, "learning_rate": 1.8100349684448098e-05, "loss": 1.0625, "step": 170900 }, { "epoch": 2.38, "learning_rate": 1.809338385878878e-05, "loss": 1.0933, "step": 171000 }, { "epoch": 2.38, "learning_rate": 1.8086418033129467e-05, "loss": 1.0984, "step": 171100 }, { "epoch": 2.39, "learning_rate": 1.807945220747015e-05, "loss": 1.0805, "step": 171200 }, { "epoch": 2.39, "learning_rate": 1.8072486381810836e-05, "loss": 1.0746, "step": 171300 }, { "epoch": 2.39, "learning_rate": 1.806552055615152e-05, "loss": 1.0634, "step": 171400 }, { "epoch": 2.39, "learning_rate": 1.8058554730492206e-05, "loss": 1.074, "step": 171500 }, { "epoch": 2.39, "learning_rate": 1.805158890483289e-05, "loss": 1.0932, "step": 171600 }, { "epoch": 2.39, "learning_rate": 1.8044623079173575e-05, "loss": 1.0733, "step": 171700 }, { "epoch": 2.39, "learning_rate": 1.8037657253514258e-05, "loss": 1.0979, "step": 171800 }, { "epoch": 2.39, "learning_rate": 1.8030691427854945e-05, "loss": 1.1366, "step": 171900 }, { "epoch": 2.4, "learning_rate": 1.8023725602195628e-05, "loss": 1.0854, "step": 172000 }, { "epoch": 2.4, "learning_rate": 1.8016759776536314e-05, "loss": 1.0839, "step": 172100 }, { "epoch": 2.4, "learning_rate": 1.8009793950876997e-05, "loss": 1.0771, "step": 172200 }, { "epoch": 2.4, "learning_rate": 1.8002828125217683e-05, "loss": 1.0912, "step": 172300 }, { "epoch": 2.4, "learning_rate": 1.7995862299558366e-05, "loss": 1.0832, "step": 172400 }, { "epoch": 2.4, "learning_rate": 1.7988896473899053e-05, "loss": 1.0872, "step": 172500 }, { "epoch": 2.4, "learning_rate": 1.7981930648239736e-05, "loss": 1.1261, "step": 172600 }, { "epoch": 2.41, "learning_rate": 1.7974964822580422e-05, "loss": 1.0622, "step": 172700 }, { "epoch": 2.41, "learning_rate": 1.7967998996921105e-05, "loss": 1.0816, "step": 172800 }, { "epoch": 2.41, "learning_rate": 1.796103317126179e-05, "loss": 1.0919, "step": 172900 }, { "epoch": 2.41, "learning_rate": 1.7954067345602478e-05, "loss": 1.0751, "step": 173000 }, { "epoch": 2.41, "learning_rate": 1.794710151994316e-05, "loss": 1.0811, "step": 173100 }, { "epoch": 2.41, "learning_rate": 1.7940135694283844e-05, "loss": 1.1039, "step": 173200 }, { "epoch": 2.41, "learning_rate": 1.7933169868624527e-05, "loss": 1.079, "step": 173300 }, { "epoch": 2.42, "learning_rate": 1.7926204042965213e-05, "loss": 1.074, "step": 173400 }, { "epoch": 2.42, "learning_rate": 1.7919238217305896e-05, "loss": 1.1371, "step": 173500 }, { "epoch": 2.42, "learning_rate": 1.7912272391646583e-05, "loss": 1.0723, "step": 173600 }, { "epoch": 2.42, "learning_rate": 1.7905306565987266e-05, "loss": 1.1076, "step": 173700 }, { "epoch": 2.42, "learning_rate": 1.7898340740327952e-05, "loss": 1.0711, "step": 173800 }, { "epoch": 2.42, "learning_rate": 1.7891374914668635e-05, "loss": 1.0799, "step": 173900 }, { "epoch": 2.42, "learning_rate": 1.788440908900932e-05, "loss": 1.0692, "step": 174000 }, { "epoch": 2.43, "learning_rate": 1.7877443263350004e-05, "loss": 1.0965, "step": 174100 }, { "epoch": 2.43, "learning_rate": 1.7870547095947284e-05, "loss": 1.061, "step": 174200 }, { "epoch": 2.43, "learning_rate": 1.7863581270287967e-05, "loss": 1.0913, "step": 174300 }, { "epoch": 2.43, "learning_rate": 1.7856615444628653e-05, "loss": 1.0876, "step": 174400 }, { "epoch": 2.43, "learning_rate": 1.7849649618969336e-05, "loss": 1.0911, "step": 174500 }, { "epoch": 2.43, "learning_rate": 1.7842683793310023e-05, "loss": 1.0707, "step": 174600 }, { "epoch": 2.43, "learning_rate": 1.7835717967650706e-05, "loss": 1.0763, "step": 174700 }, { "epoch": 2.44, "learning_rate": 1.7828752141991392e-05, "loss": 1.1131, "step": 174800 }, { "epoch": 2.44, "learning_rate": 1.7821786316332075e-05, "loss": 1.0556, "step": 174900 }, { "epoch": 2.44, "learning_rate": 1.781482049067276e-05, "loss": 1.0817, "step": 175000 }, { "epoch": 2.44, "learning_rate": 1.7807854665013445e-05, "loss": 1.052, "step": 175100 }, { "epoch": 2.44, "learning_rate": 1.780088883935413e-05, "loss": 1.0742, "step": 175200 }, { "epoch": 2.44, "learning_rate": 1.7793923013694814e-05, "loss": 1.1105, "step": 175300 }, { "epoch": 2.44, "learning_rate": 1.77869571880355e-05, "loss": 1.0677, "step": 175400 }, { "epoch": 2.45, "learning_rate": 1.7779991362376183e-05, "loss": 1.0662, "step": 175500 }, { "epoch": 2.45, "learning_rate": 1.7773025536716866e-05, "loss": 1.0562, "step": 175600 }, { "epoch": 2.45, "learning_rate": 1.776605971105755e-05, "loss": 1.0825, "step": 175700 }, { "epoch": 2.45, "learning_rate": 1.7759093885398236e-05, "loss": 1.0602, "step": 175800 }, { "epoch": 2.45, "learning_rate": 1.775212805973892e-05, "loss": 1.0643, "step": 175900 }, { "epoch": 2.45, "learning_rate": 1.7745162234079605e-05, "loss": 1.099, "step": 176000 }, { "epoch": 2.45, "learning_rate": 1.773819640842029e-05, "loss": 1.0983, "step": 176100 }, { "epoch": 2.45, "learning_rate": 1.7731230582760974e-05, "loss": 1.0967, "step": 176200 }, { "epoch": 2.46, "learning_rate": 1.772426475710166e-05, "loss": 1.0932, "step": 176300 }, { "epoch": 2.46, "learning_rate": 1.7717298931442344e-05, "loss": 1.1169, "step": 176400 }, { "epoch": 2.46, "learning_rate": 1.771033310578303e-05, "loss": 1.0628, "step": 176500 }, { "epoch": 2.46, "learning_rate": 1.7703367280123713e-05, "loss": 1.0565, "step": 176600 }, { "epoch": 2.46, "learning_rate": 1.76964014544644e-05, "loss": 1.0957, "step": 176700 }, { "epoch": 2.46, "learning_rate": 1.7689435628805082e-05, "loss": 1.102, "step": 176800 }, { "epoch": 2.46, "learning_rate": 1.768246980314577e-05, "loss": 1.0591, "step": 176900 }, { "epoch": 2.47, "learning_rate": 1.7675503977486452e-05, "loss": 1.0839, "step": 177000 }, { "epoch": 2.47, "learning_rate": 1.7668538151827138e-05, "loss": 1.0878, "step": 177100 }, { "epoch": 2.47, "learning_rate": 1.766157232616782e-05, "loss": 1.126, "step": 177200 }, { "epoch": 2.47, "learning_rate": 1.7654606500508508e-05, "loss": 1.0994, "step": 177300 }, { "epoch": 2.47, "learning_rate": 1.764764067484919e-05, "loss": 1.0516, "step": 177400 }, { "epoch": 2.47, "learning_rate": 1.7640674849189877e-05, "loss": 1.0712, "step": 177500 }, { "epoch": 2.47, "learning_rate": 1.763370902353056e-05, "loss": 1.0929, "step": 177600 }, { "epoch": 2.48, "learning_rate": 1.7626743197871246e-05, "loss": 1.0808, "step": 177700 }, { "epoch": 2.48, "learning_rate": 1.7619777372211926e-05, "loss": 1.0607, "step": 177800 }, { "epoch": 2.48, "learning_rate": 1.7612811546552612e-05, "loss": 1.1114, "step": 177900 }, { "epoch": 2.48, "learning_rate": 1.7605845720893295e-05, "loss": 1.0941, "step": 178000 }, { "epoch": 2.48, "learning_rate": 1.759887989523398e-05, "loss": 1.0774, "step": 178100 }, { "epoch": 2.48, "learning_rate": 1.7591914069574665e-05, "loss": 1.0861, "step": 178200 }, { "epoch": 2.48, "learning_rate": 1.758494824391535e-05, "loss": 1.0952, "step": 178300 }, { "epoch": 2.49, "learning_rate": 1.7577982418256034e-05, "loss": 1.081, "step": 178400 }, { "epoch": 2.49, "learning_rate": 1.7571086250853314e-05, "loss": 1.0808, "step": 178500 }, { "epoch": 2.49, "learning_rate": 1.7564120425193997e-05, "loss": 1.069, "step": 178600 }, { "epoch": 2.49, "learning_rate": 1.7557154599534683e-05, "loss": 1.0716, "step": 178700 }, { "epoch": 2.49, "learning_rate": 1.7550188773875366e-05, "loss": 1.0677, "step": 178800 }, { "epoch": 2.49, "learning_rate": 1.7543222948216053e-05, "loss": 1.0636, "step": 178900 }, { "epoch": 2.49, "learning_rate": 1.753632678081333e-05, "loss": 1.074, "step": 179000 }, { "epoch": 2.5, "learning_rate": 1.7529360955154015e-05, "loss": 1.0737, "step": 179100 }, { "epoch": 2.5, "learning_rate": 1.75223951294947e-05, "loss": 1.0935, "step": 179200 }, { "epoch": 2.5, "learning_rate": 1.7515429303835385e-05, "loss": 1.0565, "step": 179300 }, { "epoch": 2.5, "learning_rate": 1.7508463478176068e-05, "loss": 1.0837, "step": 179400 }, { "epoch": 2.5, "learning_rate": 1.7501497652516754e-05, "loss": 1.0872, "step": 179500 }, { "epoch": 2.5, "learning_rate": 1.7494531826857437e-05, "loss": 1.0564, "step": 179600 }, { "epoch": 2.5, "learning_rate": 1.7487566001198123e-05, "loss": 1.0401, "step": 179700 }, { "epoch": 2.5, "learning_rate": 1.7480600175538806e-05, "loss": 1.0782, "step": 179800 }, { "epoch": 2.51, "learning_rate": 1.7473634349879493e-05, "loss": 1.0679, "step": 179900 }, { "epoch": 2.51, "learning_rate": 1.7466668524220176e-05, "loss": 1.07, "step": 180000 }, { "epoch": 2.51, "learning_rate": 1.7459702698560862e-05, "loss": 1.0541, "step": 180100 }, { "epoch": 2.51, "learning_rate": 1.745273687290155e-05, "loss": 1.0779, "step": 180200 }, { "epoch": 2.51, "learning_rate": 1.744577104724223e-05, "loss": 1.0653, "step": 180300 }, { "epoch": 2.51, "learning_rate": 1.7438805221582915e-05, "loss": 1.0546, "step": 180400 }, { "epoch": 2.51, "learning_rate": 1.7431839395923598e-05, "loss": 1.1075, "step": 180500 }, { "epoch": 2.52, "learning_rate": 1.7424873570264284e-05, "loss": 1.0602, "step": 180600 }, { "epoch": 2.52, "learning_rate": 1.7417907744604967e-05, "loss": 1.0899, "step": 180700 }, { "epoch": 2.52, "learning_rate": 1.7410941918945653e-05, "loss": 1.0677, "step": 180800 }, { "epoch": 2.52, "learning_rate": 1.7403976093286336e-05, "loss": 1.1195, "step": 180900 }, { "epoch": 2.52, "learning_rate": 1.7397010267627023e-05, "loss": 1.0993, "step": 181000 }, { "epoch": 2.52, "learning_rate": 1.7390044441967706e-05, "loss": 1.0639, "step": 181100 }, { "epoch": 2.52, "learning_rate": 1.7383078616308392e-05, "loss": 1.0561, "step": 181200 }, { "epoch": 2.53, "learning_rate": 1.7376112790649075e-05, "loss": 1.0863, "step": 181300 }, { "epoch": 2.53, "learning_rate": 1.736914696498976e-05, "loss": 1.1163, "step": 181400 }, { "epoch": 2.53, "learning_rate": 1.7362181139330444e-05, "loss": 1.1151, "step": 181500 }, { "epoch": 2.53, "learning_rate": 1.735521531367113e-05, "loss": 1.093, "step": 181600 }, { "epoch": 2.53, "learning_rate": 1.7348249488011814e-05, "loss": 1.0777, "step": 181700 }, { "epoch": 2.53, "learning_rate": 1.73412836623525e-05, "loss": 1.0636, "step": 181800 }, { "epoch": 2.53, "learning_rate": 1.7334317836693183e-05, "loss": 1.0995, "step": 181900 }, { "epoch": 2.54, "learning_rate": 1.732735201103387e-05, "loss": 1.1191, "step": 182000 }, { "epoch": 2.54, "learning_rate": 1.7320386185374552e-05, "loss": 1.0695, "step": 182100 }, { "epoch": 2.54, "learning_rate": 1.731342035971524e-05, "loss": 1.078, "step": 182200 }, { "epoch": 2.54, "learning_rate": 1.7306524192312515e-05, "loss": 1.0821, "step": 182300 }, { "epoch": 2.54, "learning_rate": 1.72995583666532e-05, "loss": 1.103, "step": 182400 }, { "epoch": 2.54, "learning_rate": 1.7292592540993885e-05, "loss": 1.0857, "step": 182500 }, { "epoch": 2.54, "learning_rate": 1.728562671533457e-05, "loss": 1.0897, "step": 182600 }, { "epoch": 2.55, "learning_rate": 1.7278660889675254e-05, "loss": 1.082, "step": 182700 }, { "epoch": 2.55, "learning_rate": 1.727169506401594e-05, "loss": 1.0776, "step": 182800 }, { "epoch": 2.55, "learning_rate": 1.726472923835662e-05, "loss": 1.0197, "step": 182900 }, { "epoch": 2.55, "learning_rate": 1.7257763412697306e-05, "loss": 1.0843, "step": 183000 }, { "epoch": 2.55, "learning_rate": 1.725079758703799e-05, "loss": 1.0958, "step": 183100 }, { "epoch": 2.55, "learning_rate": 1.7243831761378676e-05, "loss": 1.1242, "step": 183200 }, { "epoch": 2.55, "learning_rate": 1.723686593571936e-05, "loss": 1.0766, "step": 183300 }, { "epoch": 2.56, "learning_rate": 1.7229900110060045e-05, "loss": 1.0642, "step": 183400 }, { "epoch": 2.56, "learning_rate": 1.722293428440073e-05, "loss": 1.0896, "step": 183500 }, { "epoch": 2.56, "learning_rate": 1.7215968458741414e-05, "loss": 1.0786, "step": 183600 }, { "epoch": 2.56, "learning_rate": 1.72090026330821e-05, "loss": 1.1146, "step": 183700 }, { "epoch": 2.56, "learning_rate": 1.7202036807422784e-05, "loss": 1.1031, "step": 183800 }, { "epoch": 2.56, "learning_rate": 1.719507098176347e-05, "loss": 1.0745, "step": 183900 }, { "epoch": 2.56, "learning_rate": 1.7188105156104153e-05, "loss": 1.0727, "step": 184000 }, { "epoch": 2.56, "learning_rate": 1.718113933044484e-05, "loss": 1.0804, "step": 184100 }, { "epoch": 2.57, "learning_rate": 1.7174173504785523e-05, "loss": 1.0792, "step": 184200 }, { "epoch": 2.57, "learning_rate": 1.716720767912621e-05, "loss": 1.0806, "step": 184300 }, { "epoch": 2.57, "learning_rate": 1.7160241853466892e-05, "loss": 1.0611, "step": 184400 }, { "epoch": 2.57, "learning_rate": 1.7153276027807578e-05, "loss": 1.0855, "step": 184500 }, { "epoch": 2.57, "learning_rate": 1.7146379860404855e-05, "loss": 1.0496, "step": 184600 }, { "epoch": 2.57, "learning_rate": 1.713941403474554e-05, "loss": 1.0807, "step": 184700 }, { "epoch": 2.57, "learning_rate": 1.7132448209086224e-05, "loss": 1.0464, "step": 184800 }, { "epoch": 2.58, "learning_rate": 1.712548238342691e-05, "loss": 1.0566, "step": 184900 }, { "epoch": 2.58, "learning_rate": 1.7118516557767593e-05, "loss": 1.07, "step": 185000 }, { "epoch": 2.58, "learning_rate": 1.711155073210828e-05, "loss": 1.0464, "step": 185100 }, { "epoch": 2.58, "learning_rate": 1.7104584906448963e-05, "loss": 1.0723, "step": 185200 }, { "epoch": 2.58, "learning_rate": 1.7097619080789646e-05, "loss": 1.0604, "step": 185300 }, { "epoch": 2.58, "learning_rate": 1.709065325513033e-05, "loss": 1.0921, "step": 185400 }, { "epoch": 2.58, "learning_rate": 1.7083687429471015e-05, "loss": 1.0819, "step": 185500 }, { "epoch": 2.59, "learning_rate": 1.7076721603811698e-05, "loss": 1.0824, "step": 185600 }, { "epoch": 2.59, "learning_rate": 1.7069755778152385e-05, "loss": 1.0815, "step": 185700 }, { "epoch": 2.59, "learning_rate": 1.7062789952493068e-05, "loss": 1.0748, "step": 185800 }, { "epoch": 2.59, "learning_rate": 1.7055824126833754e-05, "loss": 1.0746, "step": 185900 }, { "epoch": 2.59, "learning_rate": 1.7048858301174437e-05, "loss": 1.1041, "step": 186000 }, { "epoch": 2.59, "learning_rate": 1.7041892475515123e-05, "loss": 1.056, "step": 186100 }, { "epoch": 2.59, "learning_rate": 1.7034926649855806e-05, "loss": 1.0595, "step": 186200 }, { "epoch": 2.6, "learning_rate": 1.7027960824196493e-05, "loss": 1.1017, "step": 186300 }, { "epoch": 2.6, "learning_rate": 1.7020994998537176e-05, "loss": 1.0718, "step": 186400 }, { "epoch": 2.6, "learning_rate": 1.7014029172877862e-05, "loss": 1.0939, "step": 186500 }, { "epoch": 2.6, "learning_rate": 1.7007063347218545e-05, "loss": 1.0789, "step": 186600 }, { "epoch": 2.6, "learning_rate": 1.700009752155923e-05, "loss": 1.0848, "step": 186700 }, { "epoch": 2.6, "learning_rate": 1.6993201354156508e-05, "loss": 1.1197, "step": 186800 }, { "epoch": 2.6, "learning_rate": 1.6986235528497194e-05, "loss": 1.0867, "step": 186900 }, { "epoch": 2.61, "learning_rate": 1.6979269702837877e-05, "loss": 1.1139, "step": 187000 }, { "epoch": 2.61, "learning_rate": 1.6972303877178564e-05, "loss": 1.1051, "step": 187100 }, { "epoch": 2.61, "learning_rate": 1.6965338051519247e-05, "loss": 1.1226, "step": 187200 }, { "epoch": 2.61, "learning_rate": 1.6958372225859933e-05, "loss": 1.0646, "step": 187300 }, { "epoch": 2.61, "learning_rate": 1.6951406400200616e-05, "loss": 1.079, "step": 187400 }, { "epoch": 2.61, "learning_rate": 1.6944440574541302e-05, "loss": 1.1046, "step": 187500 }, { "epoch": 2.61, "learning_rate": 1.693747474888199e-05, "loss": 1.0883, "step": 187600 }, { "epoch": 2.61, "learning_rate": 1.6930508923222668e-05, "loss": 1.0878, "step": 187700 }, { "epoch": 2.62, "learning_rate": 1.6923543097563355e-05, "loss": 1.045, "step": 187800 }, { "epoch": 2.62, "learning_rate": 1.6916577271904038e-05, "loss": 1.1132, "step": 187900 }, { "epoch": 2.62, "learning_rate": 1.6909611446244724e-05, "loss": 1.0981, "step": 188000 }, { "epoch": 2.62, "learning_rate": 1.6902645620585407e-05, "loss": 1.0895, "step": 188100 }, { "epoch": 2.62, "learning_rate": 1.6895679794926093e-05, "loss": 1.0495, "step": 188200 }, { "epoch": 2.62, "learning_rate": 1.6888713969266776e-05, "loss": 1.0456, "step": 188300 }, { "epoch": 2.62, "learning_rate": 1.6881748143607463e-05, "loss": 1.1049, "step": 188400 }, { "epoch": 2.63, "learning_rate": 1.6874782317948146e-05, "loss": 1.0673, "step": 188500 }, { "epoch": 2.63, "learning_rate": 1.6867816492288832e-05, "loss": 1.0426, "step": 188600 }, { "epoch": 2.63, "learning_rate": 1.6860850666629515e-05, "loss": 1.1219, "step": 188700 }, { "epoch": 2.63, "learning_rate": 1.68538848409702e-05, "loss": 1.1085, "step": 188800 }, { "epoch": 2.63, "learning_rate": 1.6846919015310884e-05, "loss": 1.1014, "step": 188900 }, { "epoch": 2.63, "learning_rate": 1.683995318965157e-05, "loss": 1.1089, "step": 189000 }, { "epoch": 2.63, "learning_rate": 1.6832987363992254e-05, "loss": 1.0614, "step": 189100 }, { "epoch": 2.64, "learning_rate": 1.682602153833294e-05, "loss": 1.0551, "step": 189200 }, { "epoch": 2.64, "learning_rate": 1.6819055712673623e-05, "loss": 1.0755, "step": 189300 }, { "epoch": 2.64, "learning_rate": 1.681208988701431e-05, "loss": 1.062, "step": 189400 }, { "epoch": 2.64, "learning_rate": 1.6805124061354993e-05, "loss": 1.0667, "step": 189500 }, { "epoch": 2.64, "learning_rate": 1.679815823569568e-05, "loss": 1.0606, "step": 189600 }, { "epoch": 2.64, "learning_rate": 1.6791192410036362e-05, "loss": 1.0815, "step": 189700 }, { "epoch": 2.64, "learning_rate": 1.6784226584377048e-05, "loss": 1.0908, "step": 189800 }, { "epoch": 2.65, "learning_rate": 1.6777260758717728e-05, "loss": 1.0862, "step": 189900 }, { "epoch": 2.65, "learning_rate": 1.6770294933058414e-05, "loss": 1.0912, "step": 190000 }, { "epoch": 2.65, "learning_rate": 1.676339876565569e-05, "loss": 1.1108, "step": 190100 }, { "epoch": 2.65, "learning_rate": 1.6756432939996377e-05, "loss": 1.0989, "step": 190200 }, { "epoch": 2.65, "learning_rate": 1.674946711433706e-05, "loss": 1.0668, "step": 190300 }, { "epoch": 2.65, "learning_rate": 1.6742501288677746e-05, "loss": 1.1185, "step": 190400 }, { "epoch": 2.65, "learning_rate": 1.673553546301843e-05, "loss": 1.0967, "step": 190500 }, { "epoch": 2.66, "learning_rate": 1.6728569637359116e-05, "loss": 1.0713, "step": 190600 }, { "epoch": 2.66, "learning_rate": 1.67216038116998e-05, "loss": 1.072, "step": 190700 }, { "epoch": 2.66, "learning_rate": 1.6714637986040485e-05, "loss": 1.0556, "step": 190800 }, { "epoch": 2.66, "learning_rate": 1.670767216038117e-05, "loss": 1.1068, "step": 190900 }, { "epoch": 2.66, "learning_rate": 1.6700706334721855e-05, "loss": 1.0761, "step": 191000 }, { "epoch": 2.66, "learning_rate": 1.669374050906254e-05, "loss": 1.08, "step": 191100 }, { "epoch": 2.66, "learning_rate": 1.6686774683403224e-05, "loss": 1.0995, "step": 191200 }, { "epoch": 2.67, "learning_rate": 1.667980885774391e-05, "loss": 1.1142, "step": 191300 }, { "epoch": 2.67, "learning_rate": 1.6672843032084593e-05, "loss": 1.0918, "step": 191400 }, { "epoch": 2.67, "learning_rate": 1.666587720642528e-05, "loss": 1.0756, "step": 191500 }, { "epoch": 2.67, "learning_rate": 1.6658911380765963e-05, "loss": 1.1105, "step": 191600 }, { "epoch": 2.67, "learning_rate": 1.665194555510665e-05, "loss": 1.0649, "step": 191700 }, { "epoch": 2.67, "learning_rate": 1.6644979729447332e-05, "loss": 1.0424, "step": 191800 }, { "epoch": 2.67, "learning_rate": 1.663801390378802e-05, "loss": 1.0836, "step": 191900 }, { "epoch": 2.67, "learning_rate": 1.66310480781287e-05, "loss": 1.0363, "step": 192000 }, { "epoch": 2.68, "learning_rate": 1.6624082252469388e-05, "loss": 1.085, "step": 192100 }, { "epoch": 2.68, "learning_rate": 1.661711642681007e-05, "loss": 1.1017, "step": 192200 }, { "epoch": 2.68, "learning_rate": 1.6610150601150757e-05, "loss": 1.0552, "step": 192300 }, { "epoch": 2.68, "learning_rate": 1.6603184775491437e-05, "loss": 1.0586, "step": 192400 }, { "epoch": 2.68, "learning_rate": 1.6596218949832123e-05, "loss": 1.0569, "step": 192500 }, { "epoch": 2.68, "learning_rate": 1.6589253124172806e-05, "loss": 1.0751, "step": 192600 }, { "epoch": 2.68, "learning_rate": 1.6582287298513492e-05, "loss": 1.0993, "step": 192700 }, { "epoch": 2.69, "learning_rate": 1.6575321472854175e-05, "loss": 1.0761, "step": 192800 }, { "epoch": 2.69, "learning_rate": 1.6568355647194862e-05, "loss": 1.0457, "step": 192900 }, { "epoch": 2.69, "learning_rate": 1.6561389821535545e-05, "loss": 1.0316, "step": 193000 }, { "epoch": 2.69, "learning_rate": 1.655442399587623e-05, "loss": 1.0627, "step": 193100 }, { "epoch": 2.69, "learning_rate": 1.6547458170216914e-05, "loss": 1.0716, "step": 193200 }, { "epoch": 2.69, "learning_rate": 1.65404923445576e-05, "loss": 1.1109, "step": 193300 }, { "epoch": 2.69, "learning_rate": 1.6533526518898287e-05, "loss": 1.1173, "step": 193400 }, { "epoch": 2.7, "learning_rate": 1.652656069323897e-05, "loss": 1.0714, "step": 193500 }, { "epoch": 2.7, "learning_rate": 1.6519594867579656e-05, "loss": 1.0919, "step": 193600 }, { "epoch": 2.7, "learning_rate": 1.651262904192034e-05, "loss": 1.0783, "step": 193700 }, { "epoch": 2.7, "learning_rate": 1.6505663216261026e-05, "loss": 1.1043, "step": 193800 }, { "epoch": 2.7, "learning_rate": 1.649869739060171e-05, "loss": 1.0848, "step": 193900 }, { "epoch": 2.7, "learning_rate": 1.6491731564942395e-05, "loss": 1.1154, "step": 194000 }, { "epoch": 2.7, "learning_rate": 1.6484765739283078e-05, "loss": 1.077, "step": 194100 }, { "epoch": 2.71, "learning_rate": 1.6477799913623764e-05, "loss": 1.126, "step": 194200 }, { "epoch": 2.71, "learning_rate": 1.647090374622104e-05, "loss": 1.1002, "step": 194300 }, { "epoch": 2.71, "learning_rate": 1.6463937920561727e-05, "loss": 1.075, "step": 194400 }, { "epoch": 2.71, "learning_rate": 1.645697209490241e-05, "loss": 1.0668, "step": 194500 }, { "epoch": 2.71, "learning_rate": 1.6450075927499687e-05, "loss": 1.0743, "step": 194600 }, { "epoch": 2.71, "learning_rate": 1.6443110101840373e-05, "loss": 1.0968, "step": 194700 }, { "epoch": 2.71, "learning_rate": 1.6436144276181056e-05, "loss": 1.0653, "step": 194800 }, { "epoch": 2.72, "learning_rate": 1.6429178450521742e-05, "loss": 1.0899, "step": 194900 }, { "epoch": 2.72, "learning_rate": 1.6422212624862425e-05, "loss": 1.0995, "step": 195000 }, { "epoch": 2.72, "learning_rate": 1.641524679920311e-05, "loss": 1.0823, "step": 195100 }, { "epoch": 2.72, "learning_rate": 1.6408280973543795e-05, "loss": 1.0645, "step": 195200 }, { "epoch": 2.72, "learning_rate": 1.6401315147884478e-05, "loss": 1.0751, "step": 195300 }, { "epoch": 2.72, "learning_rate": 1.6394349322225164e-05, "loss": 1.0748, "step": 195400 }, { "epoch": 2.72, "learning_rate": 1.6387383496565847e-05, "loss": 1.1133, "step": 195500 }, { "epoch": 2.73, "learning_rate": 1.6380417670906533e-05, "loss": 1.086, "step": 195600 }, { "epoch": 2.73, "learning_rate": 1.6373451845247216e-05, "loss": 1.0626, "step": 195700 }, { "epoch": 2.73, "learning_rate": 1.6366486019587903e-05, "loss": 1.0763, "step": 195800 }, { "epoch": 2.73, "learning_rate": 1.6359520193928586e-05, "loss": 1.0454, "step": 195900 }, { "epoch": 2.73, "learning_rate": 1.6352554368269272e-05, "loss": 1.1118, "step": 196000 }, { "epoch": 2.73, "learning_rate": 1.6345588542609955e-05, "loss": 1.0515, "step": 196100 }, { "epoch": 2.73, "learning_rate": 1.633862271695064e-05, "loss": 1.0801, "step": 196200 }, { "epoch": 2.73, "learning_rate": 1.6331656891291325e-05, "loss": 1.0742, "step": 196300 }, { "epoch": 2.74, "learning_rate": 1.632469106563201e-05, "loss": 1.0821, "step": 196400 }, { "epoch": 2.74, "learning_rate": 1.6317725239972694e-05, "loss": 1.1027, "step": 196500 }, { "epoch": 2.74, "learning_rate": 1.631075941431338e-05, "loss": 1.0542, "step": 196600 }, { "epoch": 2.74, "learning_rate": 1.6303793588654063e-05, "loss": 1.092, "step": 196700 }, { "epoch": 2.74, "learning_rate": 1.629682776299475e-05, "loss": 1.1127, "step": 196800 }, { "epoch": 2.74, "learning_rate": 1.6289861937335433e-05, "loss": 1.0915, "step": 196900 }, { "epoch": 2.74, "learning_rate": 1.628289611167612e-05, "loss": 1.0801, "step": 197000 }, { "epoch": 2.75, "learning_rate": 1.6275930286016802e-05, "loss": 1.0764, "step": 197100 }, { "epoch": 2.75, "learning_rate": 1.6268964460357485e-05, "loss": 1.073, "step": 197200 }, { "epoch": 2.75, "learning_rate": 1.626199863469817e-05, "loss": 1.1099, "step": 197300 }, { "epoch": 2.75, "learning_rate": 1.6255032809038854e-05, "loss": 1.087, "step": 197400 }, { "epoch": 2.75, "learning_rate": 1.624806698337954e-05, "loss": 1.0516, "step": 197500 }, { "epoch": 2.75, "learning_rate": 1.6241101157720224e-05, "loss": 1.0765, "step": 197600 }, { "epoch": 2.75, "learning_rate": 1.62342049903175e-05, "loss": 1.0956, "step": 197700 }, { "epoch": 2.76, "learning_rate": 1.6227239164658187e-05, "loss": 1.0769, "step": 197800 }, { "epoch": 2.76, "learning_rate": 1.622027333899887e-05, "loss": 1.0753, "step": 197900 }, { "epoch": 2.76, "learning_rate": 1.6213307513339556e-05, "loss": 1.0535, "step": 198000 }, { "epoch": 2.76, "learning_rate": 1.6206341687680242e-05, "loss": 1.0612, "step": 198100 }, { "epoch": 2.76, "learning_rate": 1.6199375862020925e-05, "loss": 1.0586, "step": 198200 }, { "epoch": 2.76, "learning_rate": 1.619241003636161e-05, "loss": 1.0834, "step": 198300 }, { "epoch": 2.76, "learning_rate": 1.6185444210702295e-05, "loss": 1.1046, "step": 198400 }, { "epoch": 2.77, "learning_rate": 1.617847838504298e-05, "loss": 1.0751, "step": 198500 }, { "epoch": 2.77, "learning_rate": 1.6171512559383664e-05, "loss": 1.0657, "step": 198600 }, { "epoch": 2.77, "learning_rate": 1.616454673372435e-05, "loss": 1.0797, "step": 198700 }, { "epoch": 2.77, "learning_rate": 1.6157580908065033e-05, "loss": 1.0562, "step": 198800 }, { "epoch": 2.77, "learning_rate": 1.615061508240572e-05, "loss": 1.0602, "step": 198900 }, { "epoch": 2.77, "learning_rate": 1.6143649256746403e-05, "loss": 1.0542, "step": 199000 }, { "epoch": 2.77, "learning_rate": 1.6136753089343683e-05, "loss": 1.0838, "step": 199100 }, { "epoch": 2.78, "learning_rate": 1.6129787263684366e-05, "loss": 1.0975, "step": 199200 }, { "epoch": 2.78, "learning_rate": 1.6122821438025052e-05, "loss": 1.0557, "step": 199300 }, { "epoch": 2.78, "learning_rate": 1.6115855612365735e-05, "loss": 1.0981, "step": 199400 }, { "epoch": 2.78, "learning_rate": 1.610888978670642e-05, "loss": 1.1035, "step": 199500 }, { "epoch": 2.78, "learning_rate": 1.6101923961047104e-05, "loss": 1.0754, "step": 199600 }, { "epoch": 2.78, "learning_rate": 1.609495813538779e-05, "loss": 1.0956, "step": 199700 }, { "epoch": 2.78, "learning_rate": 1.608799230972847e-05, "loss": 1.0734, "step": 199800 }, { "epoch": 2.78, "learning_rate": 1.6081026484069157e-05, "loss": 1.0515, "step": 199900 }, { "epoch": 2.79, "learning_rate": 1.607406065840984e-05, "loss": 1.0866, "step": 200000 }, { "epoch": 2.79, "learning_rate": 1.6067094832750526e-05, "loss": 1.0826, "step": 200100 }, { "epoch": 2.79, "learning_rate": 1.606012900709121e-05, "loss": 1.0871, "step": 200200 }, { "epoch": 2.79, "learning_rate": 1.6053163181431895e-05, "loss": 1.0809, "step": 200300 }, { "epoch": 2.79, "learning_rate": 1.604619735577258e-05, "loss": 1.1262, "step": 200400 }, { "epoch": 2.79, "learning_rate": 1.6039231530113265e-05, "loss": 1.1024, "step": 200500 }, { "epoch": 2.79, "learning_rate": 1.6032265704453948e-05, "loss": 1.0786, "step": 200600 }, { "epoch": 2.8, "learning_rate": 1.6025299878794634e-05, "loss": 1.0486, "step": 200700 }, { "epoch": 2.8, "learning_rate": 1.6018334053135317e-05, "loss": 1.0435, "step": 200800 }, { "epoch": 2.8, "learning_rate": 1.6011368227476003e-05, "loss": 1.1039, "step": 200900 }, { "epoch": 2.8, "learning_rate": 1.6004402401816686e-05, "loss": 1.0653, "step": 201000 }, { "epoch": 2.8, "learning_rate": 1.5997436576157373e-05, "loss": 1.0518, "step": 201100 }, { "epoch": 2.8, "learning_rate": 1.5990470750498056e-05, "loss": 1.104, "step": 201200 }, { "epoch": 2.8, "learning_rate": 1.5983504924838742e-05, "loss": 1.0759, "step": 201300 }, { "epoch": 2.81, "learning_rate": 1.597653909917943e-05, "loss": 1.0754, "step": 201400 }, { "epoch": 2.81, "learning_rate": 1.5969642931776705e-05, "loss": 1.0443, "step": 201500 }, { "epoch": 2.81, "learning_rate": 1.5962677106117388e-05, "loss": 1.0767, "step": 201600 }, { "epoch": 2.81, "learning_rate": 1.5955711280458074e-05, "loss": 1.0629, "step": 201700 }, { "epoch": 2.81, "learning_rate": 1.5948745454798757e-05, "loss": 1.0433, "step": 201800 }, { "epoch": 2.81, "learning_rate": 1.5941779629139444e-05, "loss": 1.1352, "step": 201900 }, { "epoch": 2.81, "learning_rate": 1.5934813803480127e-05, "loss": 1.0735, "step": 202000 }, { "epoch": 2.82, "learning_rate": 1.5927847977820813e-05, "loss": 1.0932, "step": 202100 }, { "epoch": 2.82, "learning_rate": 1.5920882152161496e-05, "loss": 1.0893, "step": 202200 }, { "epoch": 2.82, "learning_rate": 1.591391632650218e-05, "loss": 1.0984, "step": 202300 }, { "epoch": 2.82, "learning_rate": 1.5906950500842865e-05, "loss": 1.0618, "step": 202400 }, { "epoch": 2.82, "learning_rate": 1.589998467518355e-05, "loss": 1.0906, "step": 202500 }, { "epoch": 2.82, "learning_rate": 1.5893018849524235e-05, "loss": 1.078, "step": 202600 }, { "epoch": 2.82, "learning_rate": 1.5886053023864918e-05, "loss": 1.1111, "step": 202700 }, { "epoch": 2.83, "learning_rate": 1.5879087198205604e-05, "loss": 1.0833, "step": 202800 }, { "epoch": 2.83, "learning_rate": 1.5872121372546287e-05, "loss": 1.0899, "step": 202900 }, { "epoch": 2.83, "learning_rate": 1.5865155546886974e-05, "loss": 1.0476, "step": 203000 }, { "epoch": 2.83, "learning_rate": 1.5858189721227657e-05, "loss": 1.0827, "step": 203100 }, { "epoch": 2.83, "learning_rate": 1.5851223895568343e-05, "loss": 1.0933, "step": 203200 }, { "epoch": 2.83, "learning_rate": 1.5844258069909026e-05, "loss": 1.136, "step": 203300 }, { "epoch": 2.83, "learning_rate": 1.5837292244249712e-05, "loss": 1.1106, "step": 203400 }, { "epoch": 2.84, "learning_rate": 1.5830326418590395e-05, "loss": 1.0957, "step": 203500 }, { "epoch": 2.84, "learning_rate": 1.582336059293108e-05, "loss": 1.1121, "step": 203600 }, { "epoch": 2.84, "learning_rate": 1.5816394767271765e-05, "loss": 1.0751, "step": 203700 }, { "epoch": 2.84, "learning_rate": 1.580942894161245e-05, "loss": 1.0797, "step": 203800 }, { "epoch": 2.84, "learning_rate": 1.5802463115953134e-05, "loss": 1.0954, "step": 203900 }, { "epoch": 2.84, "learning_rate": 1.579549729029382e-05, "loss": 1.0925, "step": 204000 }, { "epoch": 2.84, "learning_rate": 1.5788531464634503e-05, "loss": 1.0731, "step": 204100 }, { "epoch": 2.84, "learning_rate": 1.578156563897519e-05, "loss": 1.0628, "step": 204200 }, { "epoch": 2.85, "learning_rate": 1.5774599813315873e-05, "loss": 1.095, "step": 204300 }, { "epoch": 2.85, "learning_rate": 1.576763398765656e-05, "loss": 1.074, "step": 204400 }, { "epoch": 2.85, "learning_rate": 1.576066816199724e-05, "loss": 1.0821, "step": 204500 }, { "epoch": 2.85, "learning_rate": 1.5753702336337925e-05, "loss": 1.122, "step": 204600 }, { "epoch": 2.85, "learning_rate": 1.574673651067861e-05, "loss": 1.0234, "step": 204700 }, { "epoch": 2.85, "learning_rate": 1.5739770685019294e-05, "loss": 1.0927, "step": 204800 }, { "epoch": 2.85, "learning_rate": 1.573280485935998e-05, "loss": 1.0797, "step": 204900 }, { "epoch": 2.86, "learning_rate": 1.5725839033700664e-05, "loss": 1.0855, "step": 205000 }, { "epoch": 2.86, "learning_rate": 1.571887320804135e-05, "loss": 1.0915, "step": 205100 }, { "epoch": 2.86, "learning_rate": 1.5711907382382033e-05, "loss": 1.0885, "step": 205200 }, { "epoch": 2.86, "learning_rate": 1.570494155672272e-05, "loss": 1.0944, "step": 205300 }, { "epoch": 2.86, "learning_rate": 1.5697975731063403e-05, "loss": 1.1138, "step": 205400 }, { "epoch": 2.86, "learning_rate": 1.569100990540409e-05, "loss": 1.0337, "step": 205500 }, { "epoch": 2.86, "learning_rate": 1.5684044079744772e-05, "loss": 1.0726, "step": 205600 }, { "epoch": 2.87, "learning_rate": 1.567707825408546e-05, "loss": 1.0861, "step": 205700 }, { "epoch": 2.87, "learning_rate": 1.567011242842614e-05, "loss": 1.0717, "step": 205800 }, { "epoch": 2.87, "learning_rate": 1.5663146602766828e-05, "loss": 1.1152, "step": 205900 }, { "epoch": 2.87, "learning_rate": 1.565618077710751e-05, "loss": 1.0978, "step": 206000 }, { "epoch": 2.87, "learning_rate": 1.5649214951448197e-05, "loss": 1.0851, "step": 206100 }, { "epoch": 2.87, "learning_rate": 1.564224912578888e-05, "loss": 1.0665, "step": 206200 }, { "epoch": 2.87, "learning_rate": 1.5635283300129566e-05, "loss": 1.0686, "step": 206300 }, { "epoch": 2.88, "learning_rate": 1.562831747447025e-05, "loss": 1.0869, "step": 206400 }, { "epoch": 2.88, "learning_rate": 1.5621351648810936e-05, "loss": 1.1102, "step": 206500 }, { "epoch": 2.88, "learning_rate": 1.561438582315162e-05, "loss": 1.0685, "step": 206600 }, { "epoch": 2.88, "learning_rate": 1.5607419997492302e-05, "loss": 1.0487, "step": 206700 }, { "epoch": 2.88, "learning_rate": 1.5600454171832985e-05, "loss": 1.0682, "step": 206800 }, { "epoch": 2.88, "learning_rate": 1.559348834617367e-05, "loss": 1.0777, "step": 206900 }, { "epoch": 2.88, "learning_rate": 1.5586522520514354e-05, "loss": 1.0471, "step": 207000 }, { "epoch": 2.89, "learning_rate": 1.557955669485504e-05, "loss": 1.0578, "step": 207100 }, { "epoch": 2.89, "learning_rate": 1.5572590869195727e-05, "loss": 1.0603, "step": 207200 }, { "epoch": 2.89, "learning_rate": 1.556562504353641e-05, "loss": 1.0783, "step": 207300 }, { "epoch": 2.89, "learning_rate": 1.5558659217877096e-05, "loss": 1.1009, "step": 207400 }, { "epoch": 2.89, "learning_rate": 1.555169339221778e-05, "loss": 1.0468, "step": 207500 }, { "epoch": 2.89, "learning_rate": 1.5544797224815056e-05, "loss": 1.0694, "step": 207600 }, { "epoch": 2.89, "learning_rate": 1.5537831399155742e-05, "loss": 1.0634, "step": 207700 }, { "epoch": 2.89, "learning_rate": 1.5530865573496425e-05, "loss": 1.0974, "step": 207800 }, { "epoch": 2.9, "learning_rate": 1.552389974783711e-05, "loss": 1.1073, "step": 207900 }, { "epoch": 2.9, "learning_rate": 1.5516933922177798e-05, "loss": 1.1259, "step": 208000 }, { "epoch": 2.9, "learning_rate": 1.550996809651848e-05, "loss": 1.0786, "step": 208100 }, { "epoch": 2.9, "learning_rate": 1.5503002270859167e-05, "loss": 1.0566, "step": 208200 }, { "epoch": 2.9, "learning_rate": 1.549603644519985e-05, "loss": 1.1088, "step": 208300 }, { "epoch": 2.9, "learning_rate": 1.5489070619540537e-05, "loss": 1.0866, "step": 208400 }, { "epoch": 2.9, "learning_rate": 1.548210479388122e-05, "loss": 1.1046, "step": 208500 }, { "epoch": 2.91, "learning_rate": 1.5475138968221906e-05, "loss": 1.0708, "step": 208600 }, { "epoch": 2.91, "learning_rate": 1.546817314256259e-05, "loss": 1.0877, "step": 208700 }, { "epoch": 2.91, "learning_rate": 1.546127697515987e-05, "loss": 1.0964, "step": 208800 }, { "epoch": 2.91, "learning_rate": 1.545431114950055e-05, "loss": 1.0895, "step": 208900 }, { "epoch": 2.91, "learning_rate": 1.5447345323841238e-05, "loss": 1.0904, "step": 209000 }, { "epoch": 2.91, "learning_rate": 1.544037949818192e-05, "loss": 1.1009, "step": 209100 }, { "epoch": 2.91, "learning_rate": 1.5433413672522607e-05, "loss": 1.0585, "step": 209200 }, { "epoch": 2.92, "learning_rate": 1.5426447846863287e-05, "loss": 1.0974, "step": 209300 }, { "epoch": 2.92, "learning_rate": 1.5419482021203973e-05, "loss": 1.1168, "step": 209400 }, { "epoch": 2.92, "learning_rate": 1.5412516195544656e-05, "loss": 1.0974, "step": 209500 }, { "epoch": 2.92, "learning_rate": 1.5405550369885343e-05, "loss": 1.0855, "step": 209600 }, { "epoch": 2.92, "learning_rate": 1.5398584544226026e-05, "loss": 1.0696, "step": 209700 }, { "epoch": 2.92, "learning_rate": 1.5391618718566712e-05, "loss": 1.0956, "step": 209800 }, { "epoch": 2.92, "learning_rate": 1.5384652892907395e-05, "loss": 1.0661, "step": 209900 }, { "epoch": 2.93, "learning_rate": 1.537768706724808e-05, "loss": 1.0785, "step": 210000 }, { "epoch": 2.93, "learning_rate": 1.5370721241588764e-05, "loss": 1.0751, "step": 210100 }, { "epoch": 2.93, "learning_rate": 1.536375541592945e-05, "loss": 1.1149, "step": 210200 }, { "epoch": 2.93, "learning_rate": 1.5356789590270134e-05, "loss": 1.1071, "step": 210300 }, { "epoch": 2.93, "learning_rate": 1.534982376461082e-05, "loss": 1.0892, "step": 210400 }, { "epoch": 2.93, "learning_rate": 1.5342857938951503e-05, "loss": 1.0664, "step": 210500 }, { "epoch": 2.93, "learning_rate": 1.533589211329219e-05, "loss": 1.0815, "step": 210600 }, { "epoch": 2.94, "learning_rate": 1.5328926287632873e-05, "loss": 1.07, "step": 210700 }, { "epoch": 2.94, "learning_rate": 1.532196046197356e-05, "loss": 1.0735, "step": 210800 }, { "epoch": 2.94, "learning_rate": 1.5314994636314242e-05, "loss": 1.0877, "step": 210900 }, { "epoch": 2.94, "learning_rate": 1.530802881065493e-05, "loss": 1.0944, "step": 211000 }, { "epoch": 2.94, "learning_rate": 1.530106298499561e-05, "loss": 1.0892, "step": 211100 }, { "epoch": 2.94, "learning_rate": 1.5294097159336298e-05, "loss": 1.0595, "step": 211200 }, { "epoch": 2.94, "learning_rate": 1.5287131333676984e-05, "loss": 1.0714, "step": 211300 }, { "epoch": 2.95, "learning_rate": 1.5280165508017667e-05, "loss": 1.1189, "step": 211400 }, { "epoch": 2.95, "learning_rate": 1.5273199682358353e-05, "loss": 1.0914, "step": 211500 }, { "epoch": 2.95, "learning_rate": 1.5266233856699033e-05, "loss": 1.0677, "step": 211600 }, { "epoch": 2.95, "learning_rate": 1.525926803103972e-05, "loss": 1.0775, "step": 211700 }, { "epoch": 2.95, "learning_rate": 1.5252302205380402e-05, "loss": 1.065, "step": 211800 }, { "epoch": 2.95, "learning_rate": 1.524540603797768e-05, "loss": 1.0778, "step": 211900 }, { "epoch": 2.95, "learning_rate": 1.5238440212318365e-05, "loss": 1.0582, "step": 212000 }, { "epoch": 2.95, "learning_rate": 1.523147438665905e-05, "loss": 1.0656, "step": 212100 }, { "epoch": 2.96, "learning_rate": 1.5224508560999735e-05, "loss": 1.0801, "step": 212200 }, { "epoch": 2.96, "learning_rate": 1.521754273534042e-05, "loss": 1.0756, "step": 212300 }, { "epoch": 2.96, "learning_rate": 1.5210576909681104e-05, "loss": 1.0706, "step": 212400 }, { "epoch": 2.96, "learning_rate": 1.5203611084021789e-05, "loss": 1.0565, "step": 212500 }, { "epoch": 2.96, "learning_rate": 1.5196645258362473e-05, "loss": 1.1021, "step": 212600 }, { "epoch": 2.96, "learning_rate": 1.5189749090959751e-05, "loss": 1.0911, "step": 212700 }, { "epoch": 2.96, "learning_rate": 1.5182783265300436e-05, "loss": 1.0772, "step": 212800 }, { "epoch": 2.97, "learning_rate": 1.517581743964112e-05, "loss": 1.0479, "step": 212900 }, { "epoch": 2.97, "learning_rate": 1.5168851613981805e-05, "loss": 1.0724, "step": 213000 }, { "epoch": 2.97, "learning_rate": 1.516188578832249e-05, "loss": 1.0897, "step": 213100 }, { "epoch": 2.97, "learning_rate": 1.5154919962663175e-05, "loss": 1.0473, "step": 213200 }, { "epoch": 2.97, "learning_rate": 1.514795413700386e-05, "loss": 1.0869, "step": 213300 }, { "epoch": 2.97, "learning_rate": 1.5140988311344544e-05, "loss": 1.0665, "step": 213400 }, { "epoch": 2.97, "learning_rate": 1.5134022485685229e-05, "loss": 1.076, "step": 213500 }, { "epoch": 2.98, "learning_rate": 1.5127056660025914e-05, "loss": 1.107, "step": 213600 }, { "epoch": 2.98, "learning_rate": 1.5120090834366598e-05, "loss": 1.0896, "step": 213700 }, { "epoch": 2.98, "learning_rate": 1.5113125008707283e-05, "loss": 1.0652, "step": 213800 }, { "epoch": 2.98, "learning_rate": 1.5106159183047968e-05, "loss": 1.0795, "step": 213900 }, { "epoch": 2.98, "learning_rate": 1.5099193357388652e-05, "loss": 1.0711, "step": 214000 }, { "epoch": 2.98, "learning_rate": 1.5092227531729339e-05, "loss": 1.0763, "step": 214100 }, { "epoch": 2.98, "learning_rate": 1.508526170607002e-05, "loss": 1.0809, "step": 214200 }, { "epoch": 2.99, "learning_rate": 1.5078295880410705e-05, "loss": 1.0848, "step": 214300 }, { "epoch": 2.99, "learning_rate": 1.507133005475139e-05, "loss": 1.1072, "step": 214400 }, { "epoch": 2.99, "learning_rate": 1.5064364229092074e-05, "loss": 1.0776, "step": 214500 }, { "epoch": 2.99, "learning_rate": 1.5057398403432759e-05, "loss": 1.0591, "step": 214600 }, { "epoch": 2.99, "learning_rate": 1.5050432577773443e-05, "loss": 1.0677, "step": 214700 }, { "epoch": 2.99, "learning_rate": 1.5043466752114128e-05, "loss": 1.0943, "step": 214800 }, { "epoch": 2.99, "learning_rate": 1.5036500926454813e-05, "loss": 1.1069, "step": 214900 }, { "epoch": 3.0, "learning_rate": 1.5029535100795497e-05, "loss": 1.0775, "step": 215000 }, { "epoch": 3.0, "learning_rate": 1.5022569275136182e-05, "loss": 1.0876, "step": 215100 }, { "epoch": 3.0, "learning_rate": 1.5015603449476867e-05, "loss": 1.108, "step": 215200 }, { "epoch": 3.0, "learning_rate": 1.5008637623817552e-05, "loss": 1.0907, "step": 215300 }, { "epoch": 3.0, "eval_gen_len": 20.0, "eval_loss": 1.145164132118225, "eval_rouge1": 12.6221, "eval_rouge2": 3.773, "eval_rougeL": 12.1226, "eval_rougeLsum": 12.2359, "eval_runtime": 1533.982, "eval_samples_per_second": 8.715, "eval_steps_per_second": 2.179, "step": 215337 }, { "epoch": 3.0, "learning_rate": 1.5001671798158236e-05, "loss": 0.9923, "step": 215400 }, { "epoch": 3.0, "learning_rate": 1.4994705972498921e-05, "loss": 0.9906, "step": 215500 }, { "epoch": 3.0, "learning_rate": 1.4987740146839606e-05, "loss": 0.9636, "step": 215600 }, { "epoch": 3.01, "learning_rate": 1.498077432118029e-05, "loss": 0.9861, "step": 215700 }, { "epoch": 3.01, "learning_rate": 1.4973808495520975e-05, "loss": 0.9771, "step": 215800 }, { "epoch": 3.01, "learning_rate": 1.4966842669861658e-05, "loss": 0.9615, "step": 215900 }, { "epoch": 3.01, "learning_rate": 1.4959876844202343e-05, "loss": 0.9824, "step": 216000 }, { "epoch": 3.01, "learning_rate": 1.4952911018543027e-05, "loss": 0.9312, "step": 216100 }, { "epoch": 3.01, "learning_rate": 1.4945945192883712e-05, "loss": 0.9745, "step": 216200 }, { "epoch": 3.01, "learning_rate": 1.4938979367224397e-05, "loss": 0.9686, "step": 216300 }, { "epoch": 3.01, "learning_rate": 1.4932013541565081e-05, "loss": 0.9777, "step": 216400 }, { "epoch": 3.02, "learning_rate": 1.4925047715905768e-05, "loss": 0.9322, "step": 216500 }, { "epoch": 3.02, "learning_rate": 1.4918081890246452e-05, "loss": 0.9771, "step": 216600 }, { "epoch": 3.02, "learning_rate": 1.4911116064587137e-05, "loss": 0.9627, "step": 216700 }, { "epoch": 3.02, "learning_rate": 1.4904150238927822e-05, "loss": 0.9897, "step": 216800 }, { "epoch": 3.02, "learning_rate": 1.4897184413268506e-05, "loss": 0.9642, "step": 216900 }, { "epoch": 3.02, "learning_rate": 1.489021858760919e-05, "loss": 0.9726, "step": 217000 }, { "epoch": 3.02, "learning_rate": 1.4883252761949874e-05, "loss": 0.9797, "step": 217100 }, { "epoch": 3.03, "learning_rate": 1.4876286936290559e-05, "loss": 0.991, "step": 217200 }, { "epoch": 3.03, "learning_rate": 1.4869321110631243e-05, "loss": 0.9746, "step": 217300 }, { "epoch": 3.03, "learning_rate": 1.4862355284971928e-05, "loss": 0.9629, "step": 217400 }, { "epoch": 3.03, "learning_rate": 1.4855389459312613e-05, "loss": 0.967, "step": 217500 }, { "epoch": 3.03, "learning_rate": 1.4848423633653298e-05, "loss": 0.985, "step": 217600 }, { "epoch": 3.03, "learning_rate": 1.4841457807993982e-05, "loss": 0.9809, "step": 217700 }, { "epoch": 3.03, "learning_rate": 1.4834491982334667e-05, "loss": 0.9763, "step": 217800 }, { "epoch": 3.04, "learning_rate": 1.4827526156675352e-05, "loss": 0.9669, "step": 217900 }, { "epoch": 3.04, "learning_rate": 1.4820560331016036e-05, "loss": 0.9642, "step": 218000 }, { "epoch": 3.04, "learning_rate": 1.481359450535672e-05, "loss": 0.9634, "step": 218100 }, { "epoch": 3.04, "learning_rate": 1.4806628679697404e-05, "loss": 0.9793, "step": 218200 }, { "epoch": 3.04, "learning_rate": 1.4799662854038089e-05, "loss": 0.9267, "step": 218300 }, { "epoch": 3.04, "learning_rate": 1.4792697028378773e-05, "loss": 0.9831, "step": 218400 }, { "epoch": 3.04, "learning_rate": 1.4785731202719458e-05, "loss": 0.9944, "step": 218500 }, { "epoch": 3.05, "learning_rate": 1.4778765377060143e-05, "loss": 0.9952, "step": 218600 }, { "epoch": 3.05, "learning_rate": 1.4771799551400827e-05, "loss": 0.9637, "step": 218700 }, { "epoch": 3.05, "learning_rate": 1.4764903383998105e-05, "loss": 0.9783, "step": 218800 }, { "epoch": 3.05, "learning_rate": 1.475793755833879e-05, "loss": 0.9377, "step": 218900 }, { "epoch": 3.05, "learning_rate": 1.4750971732679475e-05, "loss": 0.9509, "step": 219000 }, { "epoch": 3.05, "learning_rate": 1.474400590702016e-05, "loss": 0.99, "step": 219100 }, { "epoch": 3.05, "learning_rate": 1.4737040081360844e-05, "loss": 0.9931, "step": 219200 }, { "epoch": 3.06, "learning_rate": 1.4730074255701529e-05, "loss": 1.02, "step": 219300 }, { "epoch": 3.06, "learning_rate": 1.4723108430042212e-05, "loss": 0.9756, "step": 219400 }, { "epoch": 3.06, "learning_rate": 1.4716142604382897e-05, "loss": 1.015, "step": 219500 }, { "epoch": 3.06, "learning_rate": 1.4709176778723581e-05, "loss": 0.9802, "step": 219600 }, { "epoch": 3.06, "learning_rate": 1.4702210953064266e-05, "loss": 1.0137, "step": 219700 }, { "epoch": 3.06, "learning_rate": 1.4695314785661544e-05, "loss": 0.9492, "step": 219800 }, { "epoch": 3.06, "learning_rate": 1.4688348960002229e-05, "loss": 0.9642, "step": 219900 }, { "epoch": 3.06, "learning_rate": 1.4681383134342913e-05, "loss": 0.9745, "step": 220000 }, { "epoch": 3.07, "learning_rate": 1.4674417308683598e-05, "loss": 0.9685, "step": 220100 }, { "epoch": 3.07, "learning_rate": 1.4667451483024283e-05, "loss": 0.9723, "step": 220200 }, { "epoch": 3.07, "learning_rate": 1.4660485657364967e-05, "loss": 1.0045, "step": 220300 }, { "epoch": 3.07, "learning_rate": 1.4653519831705652e-05, "loss": 0.9695, "step": 220400 }, { "epoch": 3.07, "learning_rate": 1.4646554006046337e-05, "loss": 0.9483, "step": 220500 }, { "epoch": 3.07, "learning_rate": 1.4639588180387023e-05, "loss": 0.9441, "step": 220600 }, { "epoch": 3.07, "learning_rate": 1.4632622354727706e-05, "loss": 0.9568, "step": 220700 }, { "epoch": 3.08, "learning_rate": 1.4625656529068391e-05, "loss": 0.9447, "step": 220800 }, { "epoch": 3.08, "learning_rate": 1.4618690703409076e-05, "loss": 0.9856, "step": 220900 }, { "epoch": 3.08, "learning_rate": 1.461172487774976e-05, "loss": 0.9654, "step": 221000 }, { "epoch": 3.08, "learning_rate": 1.4604759052090445e-05, "loss": 0.9757, "step": 221100 }, { "epoch": 3.08, "learning_rate": 1.459779322643113e-05, "loss": 0.9999, "step": 221200 }, { "epoch": 3.08, "learning_rate": 1.4590827400771814e-05, "loss": 0.9977, "step": 221300 }, { "epoch": 3.08, "learning_rate": 1.4583861575112499e-05, "loss": 0.9694, "step": 221400 }, { "epoch": 3.09, "learning_rate": 1.4576895749453184e-05, "loss": 0.9623, "step": 221500 }, { "epoch": 3.09, "learning_rate": 1.4569929923793868e-05, "loss": 0.9558, "step": 221600 }, { "epoch": 3.09, "learning_rate": 1.4562964098134553e-05, "loss": 0.9392, "step": 221700 }, { "epoch": 3.09, "learning_rate": 1.4555998272475236e-05, "loss": 0.9699, "step": 221800 }, { "epoch": 3.09, "learning_rate": 1.454903244681592e-05, "loss": 0.9772, "step": 221900 }, { "epoch": 3.09, "learning_rate": 1.4542066621156605e-05, "loss": 0.9531, "step": 222000 }, { "epoch": 3.09, "learning_rate": 1.453510079549729e-05, "loss": 0.9337, "step": 222100 }, { "epoch": 3.1, "learning_rate": 1.4528134969837975e-05, "loss": 1.0065, "step": 222200 }, { "epoch": 3.1, "learning_rate": 1.452116914417866e-05, "loss": 0.9943, "step": 222300 }, { "epoch": 3.1, "learning_rate": 1.4514203318519344e-05, "loss": 0.9692, "step": 222400 }, { "epoch": 3.1, "learning_rate": 1.4507237492860029e-05, "loss": 0.9519, "step": 222500 }, { "epoch": 3.1, "learning_rate": 1.4500271667200713e-05, "loss": 0.9975, "step": 222600 }, { "epoch": 3.1, "learning_rate": 1.4493305841541398e-05, "loss": 0.9832, "step": 222700 }, { "epoch": 3.1, "learning_rate": 1.4486340015882083e-05, "loss": 0.956, "step": 222800 }, { "epoch": 3.11, "learning_rate": 1.4479374190222766e-05, "loss": 0.9643, "step": 222900 }, { "epoch": 3.11, "learning_rate": 1.4472408364563452e-05, "loss": 0.9638, "step": 223000 }, { "epoch": 3.11, "learning_rate": 1.4465442538904137e-05, "loss": 0.976, "step": 223100 }, { "epoch": 3.11, "learning_rate": 1.4458476713244822e-05, "loss": 1.0135, "step": 223200 }, { "epoch": 3.11, "learning_rate": 1.4451510887585506e-05, "loss": 0.9846, "step": 223300 }, { "epoch": 3.11, "learning_rate": 1.4444545061926191e-05, "loss": 1.0115, "step": 223400 }, { "epoch": 3.11, "learning_rate": 1.4437579236266876e-05, "loss": 0.9863, "step": 223500 }, { "epoch": 3.12, "learning_rate": 1.443061341060756e-05, "loss": 0.9686, "step": 223600 }, { "epoch": 3.12, "learning_rate": 1.4423647584948245e-05, "loss": 0.9709, "step": 223700 }, { "epoch": 3.12, "learning_rate": 1.441668175928893e-05, "loss": 0.9712, "step": 223800 }, { "epoch": 3.12, "learning_rate": 1.4409785591886208e-05, "loss": 0.9408, "step": 223900 }, { "epoch": 3.12, "learning_rate": 1.4402819766226892e-05, "loss": 0.9603, "step": 224000 }, { "epoch": 3.12, "learning_rate": 1.4395853940567577e-05, "loss": 0.9748, "step": 224100 }, { "epoch": 3.12, "learning_rate": 1.438888811490826e-05, "loss": 0.9562, "step": 224200 }, { "epoch": 3.12, "learning_rate": 1.4381922289248945e-05, "loss": 1.0136, "step": 224300 }, { "epoch": 3.13, "learning_rate": 1.437495646358963e-05, "loss": 0.9822, "step": 224400 }, { "epoch": 3.13, "learning_rate": 1.4367990637930314e-05, "loss": 1.0055, "step": 224500 }, { "epoch": 3.13, "learning_rate": 1.4361024812270999e-05, "loss": 0.9875, "step": 224600 }, { "epoch": 3.13, "learning_rate": 1.4354058986611684e-05, "loss": 1.0064, "step": 224700 }, { "epoch": 3.13, "learning_rate": 1.4347162819208962e-05, "loss": 0.9762, "step": 224800 }, { "epoch": 3.13, "learning_rate": 1.4340196993549646e-05, "loss": 0.9445, "step": 224900 }, { "epoch": 3.13, "learning_rate": 1.4333231167890331e-05, "loss": 0.9514, "step": 225000 }, { "epoch": 3.14, "learning_rate": 1.4326265342231016e-05, "loss": 0.9584, "step": 225100 }, { "epoch": 3.14, "learning_rate": 1.43192995165717e-05, "loss": 1.0148, "step": 225200 }, { "epoch": 3.14, "learning_rate": 1.4312333690912385e-05, "loss": 0.9802, "step": 225300 }, { "epoch": 3.14, "learning_rate": 1.430536786525307e-05, "loss": 0.98, "step": 225400 }, { "epoch": 3.14, "learning_rate": 1.4298402039593753e-05, "loss": 0.9218, "step": 225500 }, { "epoch": 3.14, "learning_rate": 1.4291436213934437e-05, "loss": 0.9785, "step": 225600 }, { "epoch": 3.14, "learning_rate": 1.4284470388275122e-05, "loss": 0.9941, "step": 225700 }, { "epoch": 3.15, "learning_rate": 1.4277504562615807e-05, "loss": 0.9878, "step": 225800 }, { "epoch": 3.15, "learning_rate": 1.4270538736956492e-05, "loss": 0.9692, "step": 225900 }, { "epoch": 3.15, "learning_rate": 1.4263572911297176e-05, "loss": 0.9958, "step": 226000 }, { "epoch": 3.15, "learning_rate": 1.4256607085637861e-05, "loss": 0.9872, "step": 226100 }, { "epoch": 3.15, "learning_rate": 1.4249641259978546e-05, "loss": 0.9813, "step": 226200 }, { "epoch": 3.15, "learning_rate": 1.424267543431923e-05, "loss": 0.9601, "step": 226300 }, { "epoch": 3.15, "learning_rate": 1.4235709608659915e-05, "loss": 0.9642, "step": 226400 }, { "epoch": 3.16, "learning_rate": 1.42287437830006e-05, "loss": 0.9719, "step": 226500 }, { "epoch": 3.16, "learning_rate": 1.4221777957341284e-05, "loss": 0.9609, "step": 226600 }, { "epoch": 3.16, "learning_rate": 1.4214812131681967e-05, "loss": 0.9422, "step": 226700 }, { "epoch": 3.16, "learning_rate": 1.4207846306022652e-05, "loss": 0.9826, "step": 226800 }, { "epoch": 3.16, "learning_rate": 1.4200880480363337e-05, "loss": 0.9508, "step": 226900 }, { "epoch": 3.16, "learning_rate": 1.4193914654704021e-05, "loss": 0.9779, "step": 227000 }, { "epoch": 3.16, "learning_rate": 1.4186948829044708e-05, "loss": 0.9608, "step": 227100 }, { "epoch": 3.17, "learning_rate": 1.4179983003385392e-05, "loss": 0.9876, "step": 227200 }, { "epoch": 3.17, "learning_rate": 1.4173017177726077e-05, "loss": 0.9501, "step": 227300 }, { "epoch": 3.17, "learning_rate": 1.4166051352066762e-05, "loss": 0.9924, "step": 227400 }, { "epoch": 3.17, "learning_rate": 1.4159085526407446e-05, "loss": 0.9669, "step": 227500 }, { "epoch": 3.17, "learning_rate": 1.4152119700748131e-05, "loss": 0.9911, "step": 227600 }, { "epoch": 3.17, "learning_rate": 1.4145153875088816e-05, "loss": 0.9584, "step": 227700 }, { "epoch": 3.17, "learning_rate": 1.4138188049429499e-05, "loss": 0.9577, "step": 227800 }, { "epoch": 3.18, "learning_rate": 1.4131222223770183e-05, "loss": 0.9707, "step": 227900 }, { "epoch": 3.18, "learning_rate": 1.4124326056367462e-05, "loss": 0.9537, "step": 228000 }, { "epoch": 3.18, "learning_rate": 1.4117360230708146e-05, "loss": 0.9705, "step": 228100 }, { "epoch": 3.18, "learning_rate": 1.4110394405048831e-05, "loss": 0.9787, "step": 228200 }, { "epoch": 3.18, "learning_rate": 1.4103428579389516e-05, "loss": 0.9366, "step": 228300 }, { "epoch": 3.18, "learning_rate": 1.40964627537302e-05, "loss": 0.9885, "step": 228400 }, { "epoch": 3.18, "learning_rate": 1.4089496928070885e-05, "loss": 1.0184, "step": 228500 }, { "epoch": 3.18, "learning_rate": 1.408253110241157e-05, "loss": 1.0104, "step": 228600 }, { "epoch": 3.19, "learning_rate": 1.4075565276752254e-05, "loss": 0.9694, "step": 228700 }, { "epoch": 3.19, "learning_rate": 1.4068599451092939e-05, "loss": 0.9776, "step": 228800 }, { "epoch": 3.19, "learning_rate": 1.4061633625433624e-05, "loss": 0.9612, "step": 228900 }, { "epoch": 3.19, "learning_rate": 1.4054667799774308e-05, "loss": 0.9772, "step": 229000 }, { "epoch": 3.19, "learning_rate": 1.4047701974114991e-05, "loss": 0.9404, "step": 229100 }, { "epoch": 3.19, "learning_rate": 1.4040736148455676e-05, "loss": 0.927, "step": 229200 }, { "epoch": 3.19, "learning_rate": 1.403377032279636e-05, "loss": 0.9735, "step": 229300 }, { "epoch": 3.2, "learning_rate": 1.4026804497137045e-05, "loss": 0.9728, "step": 229400 }, { "epoch": 3.2, "learning_rate": 1.401983867147773e-05, "loss": 0.9853, "step": 229500 }, { "epoch": 3.2, "learning_rate": 1.4012872845818415e-05, "loss": 0.9633, "step": 229600 }, { "epoch": 3.2, "learning_rate": 1.40059070201591e-05, "loss": 0.9723, "step": 229700 }, { "epoch": 3.2, "learning_rate": 1.3998941194499784e-05, "loss": 0.9575, "step": 229800 }, { "epoch": 3.2, "learning_rate": 1.3991975368840469e-05, "loss": 0.9777, "step": 229900 }, { "epoch": 3.2, "learning_rate": 1.3985009543181154e-05, "loss": 0.9754, "step": 230000 }, { "epoch": 3.21, "learning_rate": 1.3978043717521838e-05, "loss": 0.9709, "step": 230100 }, { "epoch": 3.21, "learning_rate": 1.3971077891862521e-05, "loss": 0.9731, "step": 230200 }, { "epoch": 3.21, "learning_rate": 1.3964112066203206e-05, "loss": 0.9468, "step": 230300 }, { "epoch": 3.21, "learning_rate": 1.3957146240543892e-05, "loss": 1.0028, "step": 230400 }, { "epoch": 3.21, "learning_rate": 1.3950180414884577e-05, "loss": 0.9617, "step": 230500 }, { "epoch": 3.21, "learning_rate": 1.3943214589225262e-05, "loss": 0.9848, "step": 230600 }, { "epoch": 3.21, "learning_rate": 1.3936248763565946e-05, "loss": 0.9833, "step": 230700 }, { "epoch": 3.22, "learning_rate": 1.3929282937906631e-05, "loss": 0.9816, "step": 230800 }, { "epoch": 3.22, "learning_rate": 1.3922317112247316e-05, "loss": 0.9549, "step": 230900 }, { "epoch": 3.22, "learning_rate": 1.3915351286588e-05, "loss": 0.9583, "step": 231000 }, { "epoch": 3.22, "learning_rate": 1.3908385460928685e-05, "loss": 0.99, "step": 231100 }, { "epoch": 3.22, "learning_rate": 1.390141963526937e-05, "loss": 0.9897, "step": 231200 }, { "epoch": 3.22, "learning_rate": 1.3894453809610053e-05, "loss": 0.9987, "step": 231300 }, { "epoch": 3.22, "learning_rate": 1.3887487983950737e-05, "loss": 0.9997, "step": 231400 }, { "epoch": 3.23, "learning_rate": 1.3880522158291422e-05, "loss": 0.9936, "step": 231500 }, { "epoch": 3.23, "learning_rate": 1.3873556332632107e-05, "loss": 0.9856, "step": 231600 }, { "epoch": 3.23, "learning_rate": 1.3866660165229385e-05, "loss": 0.9799, "step": 231700 }, { "epoch": 3.23, "learning_rate": 1.385969433957007e-05, "loss": 0.9946, "step": 231800 }, { "epoch": 3.23, "learning_rate": 1.3852728513910754e-05, "loss": 0.9955, "step": 231900 }, { "epoch": 3.23, "learning_rate": 1.3845762688251439e-05, "loss": 0.9836, "step": 232000 }, { "epoch": 3.23, "learning_rate": 1.3838796862592124e-05, "loss": 0.9599, "step": 232100 }, { "epoch": 3.23, "learning_rate": 1.3831831036932808e-05, "loss": 0.9956, "step": 232200 }, { "epoch": 3.24, "learning_rate": 1.3824865211273493e-05, "loss": 1.0097, "step": 232300 }, { "epoch": 3.24, "learning_rate": 1.3817899385614178e-05, "loss": 1.0054, "step": 232400 }, { "epoch": 3.24, "learning_rate": 1.3810933559954862e-05, "loss": 0.9995, "step": 232500 }, { "epoch": 3.24, "learning_rate": 1.3803967734295545e-05, "loss": 0.9805, "step": 232600 }, { "epoch": 3.24, "learning_rate": 1.379700190863623e-05, "loss": 0.9738, "step": 232700 }, { "epoch": 3.24, "learning_rate": 1.3790036082976915e-05, "loss": 0.969, "step": 232800 }, { "epoch": 3.24, "learning_rate": 1.37830702573176e-05, "loss": 0.9727, "step": 232900 }, { "epoch": 3.25, "learning_rate": 1.3776104431658284e-05, "loss": 0.9536, "step": 233000 }, { "epoch": 3.25, "learning_rate": 1.3769138605998969e-05, "loss": 0.9871, "step": 233100 }, { "epoch": 3.25, "learning_rate": 1.3762172780339654e-05, "loss": 1.0029, "step": 233200 }, { "epoch": 3.25, "learning_rate": 1.3755206954680338e-05, "loss": 0.9848, "step": 233300 }, { "epoch": 3.25, "learning_rate": 1.3748241129021023e-05, "loss": 1.0025, "step": 233400 }, { "epoch": 3.25, "learning_rate": 1.3741275303361708e-05, "loss": 0.9891, "step": 233500 }, { "epoch": 3.25, "learning_rate": 1.3734309477702394e-05, "loss": 0.997, "step": 233600 }, { "epoch": 3.26, "learning_rate": 1.3727343652043079e-05, "loss": 0.9582, "step": 233700 }, { "epoch": 3.26, "learning_rate": 1.3720377826383762e-05, "loss": 0.9687, "step": 233800 }, { "epoch": 3.26, "learning_rate": 1.3713412000724446e-05, "loss": 0.9688, "step": 233900 }, { "epoch": 3.26, "learning_rate": 1.3706446175065131e-05, "loss": 0.9618, "step": 234000 }, { "epoch": 3.26, "learning_rate": 1.3699550007662407e-05, "loss": 0.9736, "step": 234100 }, { "epoch": 3.26, "learning_rate": 1.3692584182003092e-05, "loss": 0.983, "step": 234200 }, { "epoch": 3.26, "learning_rate": 1.3685618356343777e-05, "loss": 0.963, "step": 234300 }, { "epoch": 3.27, "learning_rate": 1.3678652530684463e-05, "loss": 1.0001, "step": 234400 }, { "epoch": 3.27, "learning_rate": 1.3671686705025148e-05, "loss": 0.9748, "step": 234500 }, { "epoch": 3.27, "learning_rate": 1.3664720879365833e-05, "loss": 0.9721, "step": 234600 }, { "epoch": 3.27, "learning_rate": 1.3657755053706517e-05, "loss": 0.9612, "step": 234700 }, { "epoch": 3.27, "learning_rate": 1.3650789228047202e-05, "loss": 0.993, "step": 234800 }, { "epoch": 3.27, "learning_rate": 1.3643823402387887e-05, "loss": 0.9825, "step": 234900 }, { "epoch": 3.27, "learning_rate": 1.3636857576728571e-05, "loss": 1.0081, "step": 235000 }, { "epoch": 3.28, "learning_rate": 1.3629891751069254e-05, "loss": 0.9997, "step": 235100 }, { "epoch": 3.28, "learning_rate": 1.3622925925409939e-05, "loss": 0.9768, "step": 235200 }, { "epoch": 3.28, "learning_rate": 1.3615960099750624e-05, "loss": 1.0161, "step": 235300 }, { "epoch": 3.28, "learning_rate": 1.3608994274091308e-05, "loss": 1.0124, "step": 235400 }, { "epoch": 3.28, "learning_rate": 1.3602028448431993e-05, "loss": 0.9675, "step": 235500 }, { "epoch": 3.28, "learning_rate": 1.3595062622772678e-05, "loss": 0.9708, "step": 235600 }, { "epoch": 3.28, "learning_rate": 1.3588096797113362e-05, "loss": 0.9902, "step": 235700 }, { "epoch": 3.29, "learning_rate": 1.3581130971454047e-05, "loss": 0.983, "step": 235800 }, { "epoch": 3.29, "learning_rate": 1.3574165145794732e-05, "loss": 0.9454, "step": 235900 }, { "epoch": 3.29, "learning_rate": 1.3567199320135416e-05, "loss": 0.9914, "step": 236000 }, { "epoch": 3.29, "learning_rate": 1.3560233494476101e-05, "loss": 1.0168, "step": 236100 }, { "epoch": 3.29, "learning_rate": 1.3553267668816784e-05, "loss": 0.9914, "step": 236200 }, { "epoch": 3.29, "learning_rate": 1.3546301843157469e-05, "loss": 0.9612, "step": 236300 }, { "epoch": 3.29, "learning_rate": 1.3539336017498153e-05, "loss": 0.974, "step": 236400 }, { "epoch": 3.29, "learning_rate": 1.3532370191838838e-05, "loss": 0.9705, "step": 236500 }, { "epoch": 3.3, "learning_rate": 1.3525404366179523e-05, "loss": 0.9984, "step": 236600 }, { "epoch": 3.3, "learning_rate": 1.3518438540520207e-05, "loss": 0.9975, "step": 236700 }, { "epoch": 3.3, "learning_rate": 1.3511472714860892e-05, "loss": 0.9893, "step": 236800 }, { "epoch": 3.3, "learning_rate": 1.3504506889201579e-05, "loss": 0.9882, "step": 236900 }, { "epoch": 3.3, "learning_rate": 1.3497541063542263e-05, "loss": 1.0054, "step": 237000 }, { "epoch": 3.3, "learning_rate": 1.3490575237882948e-05, "loss": 0.9796, "step": 237100 }, { "epoch": 3.3, "learning_rate": 1.3483609412223633e-05, "loss": 0.9589, "step": 237200 }, { "epoch": 3.31, "learning_rate": 1.3476713244820909e-05, "loss": 1.0009, "step": 237300 }, { "epoch": 3.31, "learning_rate": 1.3469747419161594e-05, "loss": 0.9728, "step": 237400 }, { "epoch": 3.31, "learning_rate": 1.3462781593502277e-05, "loss": 0.9735, "step": 237500 }, { "epoch": 3.31, "learning_rate": 1.3455815767842961e-05, "loss": 0.9765, "step": 237600 }, { "epoch": 3.31, "learning_rate": 1.3448849942183648e-05, "loss": 0.9593, "step": 237700 }, { "epoch": 3.31, "learning_rate": 1.3441884116524332e-05, "loss": 1.0069, "step": 237800 }, { "epoch": 3.31, "learning_rate": 1.3434918290865017e-05, "loss": 0.9707, "step": 237900 }, { "epoch": 3.32, "learning_rate": 1.3427952465205702e-05, "loss": 0.9854, "step": 238000 }, { "epoch": 3.32, "learning_rate": 1.3420986639546386e-05, "loss": 0.9702, "step": 238100 }, { "epoch": 3.32, "learning_rate": 1.3414020813887071e-05, "loss": 1.0152, "step": 238200 }, { "epoch": 3.32, "learning_rate": 1.3407054988227756e-05, "loss": 1.0205, "step": 238300 }, { "epoch": 3.32, "learning_rate": 1.340008916256844e-05, "loss": 0.9861, "step": 238400 }, { "epoch": 3.32, "learning_rate": 1.3393123336909125e-05, "loss": 1.0026, "step": 238500 }, { "epoch": 3.32, "learning_rate": 1.3386157511249808e-05, "loss": 1.0146, "step": 238600 }, { "epoch": 3.33, "learning_rate": 1.3379191685590493e-05, "loss": 0.9556, "step": 238700 }, { "epoch": 3.33, "learning_rate": 1.3372225859931178e-05, "loss": 0.9612, "step": 238800 }, { "epoch": 3.33, "learning_rate": 1.3365260034271862e-05, "loss": 0.9914, "step": 238900 }, { "epoch": 3.33, "learning_rate": 1.3358294208612547e-05, "loss": 0.9871, "step": 239000 }, { "epoch": 3.33, "learning_rate": 1.3351328382953232e-05, "loss": 0.995, "step": 239100 }, { "epoch": 3.33, "learning_rate": 1.3344362557293916e-05, "loss": 0.9541, "step": 239200 }, { "epoch": 3.33, "learning_rate": 1.3337396731634601e-05, "loss": 0.9721, "step": 239300 }, { "epoch": 3.34, "learning_rate": 1.3330430905975286e-05, "loss": 1.0121, "step": 239400 }, { "epoch": 3.34, "learning_rate": 1.332346508031597e-05, "loss": 0.9509, "step": 239500 }, { "epoch": 3.34, "learning_rate": 1.3316568912913248e-05, "loss": 0.9673, "step": 239600 }, { "epoch": 3.34, "learning_rate": 1.3309603087253933e-05, "loss": 0.9598, "step": 239700 }, { "epoch": 3.34, "learning_rate": 1.3302637261594618e-05, "loss": 0.982, "step": 239800 }, { "epoch": 3.34, "learning_rate": 1.32956714359353e-05, "loss": 0.9916, "step": 239900 }, { "epoch": 3.34, "learning_rate": 1.3288705610275986e-05, "loss": 0.9989, "step": 240000 }, { "epoch": 3.34, "learning_rate": 1.328173978461667e-05, "loss": 0.9761, "step": 240100 }, { "epoch": 3.35, "learning_rate": 1.3274773958957355e-05, "loss": 0.9695, "step": 240200 }, { "epoch": 3.35, "learning_rate": 1.326780813329804e-05, "loss": 0.9946, "step": 240300 }, { "epoch": 3.35, "learning_rate": 1.3260842307638724e-05, "loss": 0.9662, "step": 240400 }, { "epoch": 3.35, "learning_rate": 1.3253876481979409e-05, "loss": 1.0043, "step": 240500 }, { "epoch": 3.35, "learning_rate": 1.3246910656320094e-05, "loss": 0.9889, "step": 240600 }, { "epoch": 3.35, "learning_rate": 1.3239944830660778e-05, "loss": 0.9748, "step": 240700 }, { "epoch": 3.35, "learning_rate": 1.3232979005001463e-05, "loss": 0.9632, "step": 240800 }, { "epoch": 3.36, "learning_rate": 1.3226013179342148e-05, "loss": 1.026, "step": 240900 }, { "epoch": 3.36, "learning_rate": 1.3219047353682832e-05, "loss": 0.9639, "step": 241000 }, { "epoch": 3.36, "learning_rate": 1.3212081528023517e-05, "loss": 0.973, "step": 241100 }, { "epoch": 3.36, "learning_rate": 1.3205115702364202e-05, "loss": 0.9861, "step": 241200 }, { "epoch": 3.36, "learning_rate": 1.3198149876704886e-05, "loss": 0.9995, "step": 241300 }, { "epoch": 3.36, "learning_rate": 1.3191184051045571e-05, "loss": 0.9796, "step": 241400 }, { "epoch": 3.36, "learning_rate": 1.3184218225386256e-05, "loss": 0.9778, "step": 241500 }, { "epoch": 3.37, "learning_rate": 1.317725239972694e-05, "loss": 0.9788, "step": 241600 }, { "epoch": 3.37, "learning_rate": 1.3170286574067625e-05, "loss": 1.0043, "step": 241700 }, { "epoch": 3.37, "learning_rate": 1.316332074840831e-05, "loss": 0.9934, "step": 241800 }, { "epoch": 3.37, "learning_rate": 1.3156354922748994e-05, "loss": 0.976, "step": 241900 }, { "epoch": 3.37, "learning_rate": 1.3149458755346273e-05, "loss": 0.9997, "step": 242000 }, { "epoch": 3.37, "learning_rate": 1.3142492929686957e-05, "loss": 0.9845, "step": 242100 }, { "epoch": 3.37, "learning_rate": 1.3135527104027642e-05, "loss": 0.9935, "step": 242200 }, { "epoch": 3.38, "learning_rate": 1.3128561278368325e-05, "loss": 0.9589, "step": 242300 }, { "epoch": 3.38, "learning_rate": 1.312159545270901e-05, "loss": 0.9656, "step": 242400 }, { "epoch": 3.38, "learning_rate": 1.3114629627049694e-05, "loss": 0.9979, "step": 242500 }, { "epoch": 3.38, "learning_rate": 1.3107663801390379e-05, "loss": 0.9738, "step": 242600 }, { "epoch": 3.38, "learning_rate": 1.3100697975731064e-05, "loss": 1.0074, "step": 242700 }, { "epoch": 3.38, "learning_rate": 1.3093732150071748e-05, "loss": 1.0119, "step": 242800 }, { "epoch": 3.38, "learning_rate": 1.3086766324412433e-05, "loss": 0.9412, "step": 242900 }, { "epoch": 3.39, "learning_rate": 1.3079800498753118e-05, "loss": 1.0002, "step": 243000 }, { "epoch": 3.39, "learning_rate": 1.3072834673093802e-05, "loss": 0.9525, "step": 243100 }, { "epoch": 3.39, "learning_rate": 1.3065868847434487e-05, "loss": 0.959, "step": 243200 }, { "epoch": 3.39, "learning_rate": 1.3058903021775172e-05, "loss": 1.0097, "step": 243300 }, { "epoch": 3.39, "learning_rate": 1.3051937196115855e-05, "loss": 0.9807, "step": 243400 }, { "epoch": 3.39, "learning_rate": 1.304497137045654e-05, "loss": 0.9537, "step": 243500 }, { "epoch": 3.39, "learning_rate": 1.3038005544797224e-05, "loss": 0.9929, "step": 243600 }, { "epoch": 3.4, "learning_rate": 1.3031039719137909e-05, "loss": 0.9862, "step": 243700 }, { "epoch": 3.4, "learning_rate": 1.3024143551735187e-05, "loss": 0.9913, "step": 243800 }, { "epoch": 3.4, "learning_rate": 1.3017177726075872e-05, "loss": 0.9867, "step": 243900 }, { "epoch": 3.4, "learning_rate": 1.3010211900416556e-05, "loss": 0.9704, "step": 244000 }, { "epoch": 3.4, "learning_rate": 1.3003246074757241e-05, "loss": 0.9614, "step": 244100 }, { "epoch": 3.4, "learning_rate": 1.2996280249097926e-05, "loss": 0.9972, "step": 244200 }, { "epoch": 3.4, "learning_rate": 1.298931442343861e-05, "loss": 0.9898, "step": 244300 }, { "epoch": 3.4, "learning_rate": 1.2982348597779295e-05, "loss": 0.9753, "step": 244400 }, { "epoch": 3.41, "learning_rate": 1.297538277211998e-05, "loss": 0.9839, "step": 244500 }, { "epoch": 3.41, "learning_rate": 1.2968416946460664e-05, "loss": 0.9777, "step": 244600 }, { "epoch": 3.41, "learning_rate": 1.2961451120801347e-05, "loss": 0.9914, "step": 244700 }, { "epoch": 3.41, "learning_rate": 1.2954485295142032e-05, "loss": 0.9803, "step": 244800 }, { "epoch": 3.41, "learning_rate": 1.2947519469482717e-05, "loss": 0.9592, "step": 244900 }, { "epoch": 3.41, "learning_rate": 1.2940553643823403e-05, "loss": 0.9881, "step": 245000 }, { "epoch": 3.41, "learning_rate": 1.2933587818164088e-05, "loss": 0.9817, "step": 245100 }, { "epoch": 3.42, "learning_rate": 1.2926621992504773e-05, "loss": 0.9738, "step": 245200 }, { "epoch": 3.42, "learning_rate": 1.2919656166845457e-05, "loss": 0.9989, "step": 245300 }, { "epoch": 3.42, "learning_rate": 1.2912690341186142e-05, "loss": 0.9551, "step": 245400 }, { "epoch": 3.42, "learning_rate": 1.2905724515526827e-05, "loss": 0.9904, "step": 245500 }, { "epoch": 3.42, "learning_rate": 1.2898758689867511e-05, "loss": 0.9919, "step": 245600 }, { "epoch": 3.42, "learning_rate": 1.2891792864208196e-05, "loss": 0.9967, "step": 245700 }, { "epoch": 3.42, "learning_rate": 1.288482703854888e-05, "loss": 1.0002, "step": 245800 }, { "epoch": 3.43, "learning_rate": 1.2877861212889564e-05, "loss": 0.9913, "step": 245900 }, { "epoch": 3.43, "learning_rate": 1.2870895387230248e-05, "loss": 0.9849, "step": 246000 }, { "epoch": 3.43, "learning_rate": 1.2863929561570933e-05, "loss": 0.9998, "step": 246100 }, { "epoch": 3.43, "learning_rate": 1.2856963735911618e-05, "loss": 1.0058, "step": 246200 }, { "epoch": 3.43, "learning_rate": 1.2849997910252302e-05, "loss": 0.9815, "step": 246300 }, { "epoch": 3.43, "learning_rate": 1.2843032084592987e-05, "loss": 0.9764, "step": 246400 }, { "epoch": 3.43, "learning_rate": 1.2836066258933672e-05, "loss": 1.0131, "step": 246500 }, { "epoch": 3.44, "learning_rate": 1.2829100433274356e-05, "loss": 0.9728, "step": 246600 }, { "epoch": 3.44, "learning_rate": 1.2822134607615041e-05, "loss": 0.9944, "step": 246700 }, { "epoch": 3.44, "learning_rate": 1.2815168781955726e-05, "loss": 0.9332, "step": 246800 }, { "epoch": 3.44, "learning_rate": 1.280820295629641e-05, "loss": 0.961, "step": 246900 }, { "epoch": 3.44, "learning_rate": 1.2801237130637093e-05, "loss": 1.0046, "step": 247000 }, { "epoch": 3.44, "learning_rate": 1.2794271304977778e-05, "loss": 0.975, "step": 247100 }, { "epoch": 3.44, "learning_rate": 1.2787305479318463e-05, "loss": 0.9783, "step": 247200 }, { "epoch": 3.45, "learning_rate": 1.2780339653659147e-05, "loss": 0.9852, "step": 247300 }, { "epoch": 3.45, "learning_rate": 1.2773373827999832e-05, "loss": 1.0075, "step": 247400 }, { "epoch": 3.45, "learning_rate": 1.2766408002340519e-05, "loss": 0.9754, "step": 247500 }, { "epoch": 3.45, "learning_rate": 1.2759442176681203e-05, "loss": 0.9657, "step": 247600 }, { "epoch": 3.45, "learning_rate": 1.2752476351021888e-05, "loss": 1.0082, "step": 247700 }, { "epoch": 3.45, "learning_rate": 1.2745510525362573e-05, "loss": 0.9673, "step": 247800 }, { "epoch": 3.45, "learning_rate": 1.2738544699703257e-05, "loss": 0.9751, "step": 247900 }, { "epoch": 3.46, "learning_rate": 1.2731578874043942e-05, "loss": 1.0117, "step": 248000 }, { "epoch": 3.46, "learning_rate": 1.2724613048384625e-05, "loss": 1.0074, "step": 248100 }, { "epoch": 3.46, "learning_rate": 1.2717716880981903e-05, "loss": 1.0053, "step": 248200 }, { "epoch": 3.46, "learning_rate": 1.2710751055322588e-05, "loss": 1.0105, "step": 248300 }, { "epoch": 3.46, "learning_rate": 1.2703785229663272e-05, "loss": 0.9761, "step": 248400 }, { "epoch": 3.46, "learning_rate": 1.2696819404003957e-05, "loss": 0.9702, "step": 248500 }, { "epoch": 3.46, "learning_rate": 1.2689853578344642e-05, "loss": 1.0102, "step": 248600 }, { "epoch": 3.46, "learning_rate": 1.2682887752685327e-05, "loss": 0.9613, "step": 248700 }, { "epoch": 3.47, "learning_rate": 1.2675921927026011e-05, "loss": 0.9878, "step": 248800 }, { "epoch": 3.47, "learning_rate": 1.2668956101366696e-05, "loss": 0.9962, "step": 248900 }, { "epoch": 3.47, "learning_rate": 1.266199027570738e-05, "loss": 0.9722, "step": 249000 }, { "epoch": 3.47, "learning_rate": 1.2655024450048065e-05, "loss": 0.9736, "step": 249100 }, { "epoch": 3.47, "learning_rate": 1.264805862438875e-05, "loss": 0.9761, "step": 249200 }, { "epoch": 3.47, "learning_rate": 1.2641092798729435e-05, "loss": 0.9865, "step": 249300 }, { "epoch": 3.47, "learning_rate": 1.2634126973070118e-05, "loss": 1.0022, "step": 249400 }, { "epoch": 3.48, "learning_rate": 1.2627161147410802e-05, "loss": 0.9916, "step": 249500 }, { "epoch": 3.48, "learning_rate": 1.2620195321751487e-05, "loss": 0.9595, "step": 249600 }, { "epoch": 3.48, "learning_rate": 1.2613229496092172e-05, "loss": 0.9772, "step": 249700 }, { "epoch": 3.48, "learning_rate": 1.2606263670432856e-05, "loss": 0.99, "step": 249800 }, { "epoch": 3.48, "learning_rate": 1.2599297844773541e-05, "loss": 0.9679, "step": 249900 }, { "epoch": 3.48, "learning_rate": 1.2592332019114226e-05, "loss": 1.0168, "step": 250000 }, { "epoch": 3.48, "learning_rate": 1.258536619345491e-05, "loss": 0.9696, "step": 250100 }, { "epoch": 3.49, "learning_rate": 1.2578400367795595e-05, "loss": 0.9751, "step": 250200 }, { "epoch": 3.49, "learning_rate": 1.257143454213628e-05, "loss": 0.9922, "step": 250300 }, { "epoch": 3.49, "learning_rate": 1.2564468716476964e-05, "loss": 0.9825, "step": 250400 }, { "epoch": 3.49, "learning_rate": 1.2557572549074243e-05, "loss": 0.9756, "step": 250500 }, { "epoch": 3.49, "learning_rate": 1.2550606723414927e-05, "loss": 0.9796, "step": 250600 }, { "epoch": 3.49, "learning_rate": 1.254364089775561e-05, "loss": 0.9671, "step": 250700 }, { "epoch": 3.49, "learning_rate": 1.2536675072096295e-05, "loss": 0.9738, "step": 250800 }, { "epoch": 3.5, "learning_rate": 1.252970924643698e-05, "loss": 1.0037, "step": 250900 }, { "epoch": 3.5, "learning_rate": 1.2522743420777664e-05, "loss": 0.9856, "step": 251000 }, { "epoch": 3.5, "learning_rate": 1.2515777595118349e-05, "loss": 0.9816, "step": 251100 }, { "epoch": 3.5, "learning_rate": 1.2508811769459034e-05, "loss": 0.982, "step": 251200 }, { "epoch": 3.5, "learning_rate": 1.2501845943799718e-05, "loss": 0.9655, "step": 251300 }, { "epoch": 3.5, "learning_rate": 1.2494880118140403e-05, "loss": 0.9474, "step": 251400 }, { "epoch": 3.5, "learning_rate": 1.2487914292481088e-05, "loss": 1.0095, "step": 251500 }, { "epoch": 3.51, "learning_rate": 1.2480948466821774e-05, "loss": 0.9443, "step": 251600 }, { "epoch": 3.51, "learning_rate": 1.2473982641162459e-05, "loss": 1.0083, "step": 251700 }, { "epoch": 3.51, "learning_rate": 1.2467016815503142e-05, "loss": 0.9412, "step": 251800 }, { "epoch": 3.51, "learning_rate": 1.2460050989843826e-05, "loss": 0.9921, "step": 251900 }, { "epoch": 3.51, "learning_rate": 1.2453085164184511e-05, "loss": 0.9875, "step": 252000 }, { "epoch": 3.51, "learning_rate": 1.2446119338525196e-05, "loss": 0.9961, "step": 252100 }, { "epoch": 3.51, "learning_rate": 1.243915351286588e-05, "loss": 0.9483, "step": 252200 }, { "epoch": 3.51, "learning_rate": 1.2432187687206565e-05, "loss": 0.9692, "step": 252300 }, { "epoch": 3.52, "learning_rate": 1.242522186154725e-05, "loss": 0.9758, "step": 252400 }, { "epoch": 3.52, "learning_rate": 1.2418256035887935e-05, "loss": 0.9745, "step": 252500 }, { "epoch": 3.52, "learning_rate": 1.241129021022862e-05, "loss": 0.9939, "step": 252600 }, { "epoch": 3.52, "learning_rate": 1.2404324384569304e-05, "loss": 1.0025, "step": 252700 }, { "epoch": 3.52, "learning_rate": 1.2397358558909989e-05, "loss": 0.9738, "step": 252800 }, { "epoch": 3.52, "learning_rate": 1.2390392733250673e-05, "loss": 0.957, "step": 252900 }, { "epoch": 3.52, "learning_rate": 1.2383426907591356e-05, "loss": 0.9619, "step": 253000 }, { "epoch": 3.53, "learning_rate": 1.2376461081932041e-05, "loss": 0.9671, "step": 253100 }, { "epoch": 3.53, "learning_rate": 1.2369564914529319e-05, "loss": 0.9897, "step": 253200 }, { "epoch": 3.53, "learning_rate": 1.2362599088870004e-05, "loss": 0.9783, "step": 253300 }, { "epoch": 3.53, "learning_rate": 1.2355633263210688e-05, "loss": 0.9969, "step": 253400 }, { "epoch": 3.53, "learning_rate": 1.2348667437551373e-05, "loss": 0.9811, "step": 253500 }, { "epoch": 3.53, "learning_rate": 1.2341701611892058e-05, "loss": 0.9867, "step": 253600 }, { "epoch": 3.53, "learning_rate": 1.2334805444489336e-05, "loss": 1.0169, "step": 253700 }, { "epoch": 3.54, "learning_rate": 1.232783961883002e-05, "loss": 0.9899, "step": 253800 }, { "epoch": 3.54, "learning_rate": 1.2320873793170705e-05, "loss": 1.0126, "step": 253900 }, { "epoch": 3.54, "learning_rate": 1.231390796751139e-05, "loss": 0.9635, "step": 254000 }, { "epoch": 3.54, "learning_rate": 1.2306942141852075e-05, "loss": 1.0019, "step": 254100 }, { "epoch": 3.54, "learning_rate": 1.229997631619276e-05, "loss": 0.9804, "step": 254200 }, { "epoch": 3.54, "learning_rate": 1.2293010490533444e-05, "loss": 0.9913, "step": 254300 }, { "epoch": 3.54, "learning_rate": 1.2286044664874127e-05, "loss": 0.9763, "step": 254400 }, { "epoch": 3.55, "learning_rate": 1.2279078839214812e-05, "loss": 0.9634, "step": 254500 }, { "epoch": 3.55, "learning_rate": 1.2272113013555496e-05, "loss": 0.9622, "step": 254600 }, { "epoch": 3.55, "learning_rate": 1.2265147187896181e-05, "loss": 0.9788, "step": 254700 }, { "epoch": 3.55, "learning_rate": 1.2258181362236866e-05, "loss": 1.0044, "step": 254800 }, { "epoch": 3.55, "learning_rate": 1.225121553657755e-05, "loss": 0.9843, "step": 254900 }, { "epoch": 3.55, "learning_rate": 1.2244249710918235e-05, "loss": 0.9691, "step": 255000 }, { "epoch": 3.55, "learning_rate": 1.223728388525892e-05, "loss": 0.9985, "step": 255100 }, { "epoch": 3.56, "learning_rate": 1.2230318059599604e-05, "loss": 0.9796, "step": 255200 }, { "epoch": 3.56, "learning_rate": 1.2223352233940289e-05, "loss": 0.9725, "step": 255300 }, { "epoch": 3.56, "learning_rate": 1.2216386408280974e-05, "loss": 0.9806, "step": 255400 }, { "epoch": 3.56, "learning_rate": 1.2209420582621659e-05, "loss": 0.9748, "step": 255500 }, { "epoch": 3.56, "learning_rate": 1.2202454756962343e-05, "loss": 0.9934, "step": 255600 }, { "epoch": 3.56, "learning_rate": 1.2195488931303028e-05, "loss": 0.989, "step": 255700 }, { "epoch": 3.56, "learning_rate": 1.2188523105643713e-05, "loss": 1.0401, "step": 255800 }, { "epoch": 3.57, "learning_rate": 1.2181557279984397e-05, "loss": 1.0054, "step": 255900 }, { "epoch": 3.57, "learning_rate": 1.2174591454325082e-05, "loss": 0.9631, "step": 256000 }, { "epoch": 3.57, "learning_rate": 1.2167625628665767e-05, "loss": 0.9756, "step": 256100 }, { "epoch": 3.57, "learning_rate": 1.2160729461263043e-05, "loss": 0.9753, "step": 256200 }, { "epoch": 3.57, "learning_rate": 1.2153763635603728e-05, "loss": 0.9604, "step": 256300 }, { "epoch": 3.57, "learning_rate": 1.2146797809944412e-05, "loss": 0.9536, "step": 256400 }, { "epoch": 3.57, "learning_rate": 1.2139831984285099e-05, "loss": 0.9592, "step": 256500 }, { "epoch": 3.57, "learning_rate": 1.2132866158625783e-05, "loss": 0.9903, "step": 256600 }, { "epoch": 3.58, "learning_rate": 1.2125900332966468e-05, "loss": 0.9939, "step": 256700 }, { "epoch": 3.58, "learning_rate": 1.2118934507307153e-05, "loss": 0.9895, "step": 256800 }, { "epoch": 3.58, "learning_rate": 1.2111968681647836e-05, "loss": 1.0161, "step": 256900 }, { "epoch": 3.58, "learning_rate": 1.210500285598852e-05, "loss": 0.9908, "step": 257000 }, { "epoch": 3.58, "learning_rate": 1.2098037030329205e-05, "loss": 0.968, "step": 257100 }, { "epoch": 3.58, "learning_rate": 1.209107120466989e-05, "loss": 0.9951, "step": 257200 }, { "epoch": 3.58, "learning_rate": 1.2084105379010575e-05, "loss": 1.0156, "step": 257300 }, { "epoch": 3.59, "learning_rate": 1.207713955335126e-05, "loss": 0.9902, "step": 257400 }, { "epoch": 3.59, "learning_rate": 1.2070173727691944e-05, "loss": 0.9568, "step": 257500 }, { "epoch": 3.59, "learning_rate": 1.2063207902032629e-05, "loss": 0.9984, "step": 257600 }, { "epoch": 3.59, "learning_rate": 1.2056242076373313e-05, "loss": 0.9871, "step": 257700 }, { "epoch": 3.59, "learning_rate": 1.2049276250713998e-05, "loss": 0.9664, "step": 257800 }, { "epoch": 3.59, "learning_rate": 1.2042310425054683e-05, "loss": 0.9597, "step": 257900 }, { "epoch": 3.59, "learning_rate": 1.2035344599395366e-05, "loss": 1.025, "step": 258000 }, { "epoch": 3.6, "learning_rate": 1.202837877373605e-05, "loss": 0.9871, "step": 258100 }, { "epoch": 3.6, "learning_rate": 1.2021412948076735e-05, "loss": 1.0194, "step": 258200 }, { "epoch": 3.6, "learning_rate": 1.201444712241742e-05, "loss": 1.0025, "step": 258300 }, { "epoch": 3.6, "learning_rate": 1.2007481296758104e-05, "loss": 1.0267, "step": 258400 }, { "epoch": 3.6, "learning_rate": 1.2000515471098789e-05, "loss": 0.9666, "step": 258500 }, { "epoch": 3.6, "learning_rate": 1.1993549645439474e-05, "loss": 0.9595, "step": 258600 }, { "epoch": 3.6, "learning_rate": 1.1986583819780158e-05, "loss": 0.9739, "step": 258700 }, { "epoch": 3.61, "learning_rate": 1.1979617994120843e-05, "loss": 0.9822, "step": 258800 }, { "epoch": 3.61, "learning_rate": 1.197265216846153e-05, "loss": 0.975, "step": 258900 }, { "epoch": 3.61, "learning_rate": 1.1965686342802214e-05, "loss": 0.9816, "step": 259000 }, { "epoch": 3.61, "learning_rate": 1.1958720517142897e-05, "loss": 0.9882, "step": 259100 }, { "epoch": 3.61, "learning_rate": 1.1951754691483582e-05, "loss": 0.955, "step": 259200 }, { "epoch": 3.61, "learning_rate": 1.1944788865824267e-05, "loss": 1.0038, "step": 259300 }, { "epoch": 3.61, "learning_rate": 1.1937823040164951e-05, "loss": 0.9749, "step": 259400 }, { "epoch": 3.62, "learning_rate": 1.1930857214505636e-05, "loss": 1.0001, "step": 259500 }, { "epoch": 3.62, "learning_rate": 1.192389138884632e-05, "loss": 0.9507, "step": 259600 }, { "epoch": 3.62, "learning_rate": 1.1916925563187005e-05, "loss": 0.9973, "step": 259700 }, { "epoch": 3.62, "learning_rate": 1.190995973752769e-05, "loss": 0.989, "step": 259800 }, { "epoch": 3.62, "learning_rate": 1.1902993911868375e-05, "loss": 1.0236, "step": 259900 }, { "epoch": 3.62, "learning_rate": 1.189602808620906e-05, "loss": 0.9796, "step": 260000 }, { "epoch": 3.62, "learning_rate": 1.1889062260549744e-05, "loss": 1.0098, "step": 260100 }, { "epoch": 3.63, "learning_rate": 1.1882096434890427e-05, "loss": 0.986, "step": 260200 }, { "epoch": 3.63, "learning_rate": 1.1875130609231112e-05, "loss": 0.9521, "step": 260300 }, { "epoch": 3.63, "learning_rate": 1.1868164783571796e-05, "loss": 0.9656, "step": 260400 }, { "epoch": 3.63, "learning_rate": 1.1861198957912481e-05, "loss": 0.9844, "step": 260500 }, { "epoch": 3.63, "learning_rate": 1.1854233132253166e-05, "loss": 0.9965, "step": 260600 }, { "epoch": 3.63, "learning_rate": 1.184726730659385e-05, "loss": 0.9732, "step": 260700 }, { "epoch": 3.63, "learning_rate": 1.1840371139191129e-05, "loss": 0.9969, "step": 260800 }, { "epoch": 3.63, "learning_rate": 1.1833405313531813e-05, "loss": 0.9837, "step": 260900 }, { "epoch": 3.64, "learning_rate": 1.1826439487872498e-05, "loss": 1.0085, "step": 261000 }, { "epoch": 3.64, "learning_rate": 1.1819473662213183e-05, "loss": 0.9855, "step": 261100 }, { "epoch": 3.64, "learning_rate": 1.1812507836553867e-05, "loss": 0.9981, "step": 261200 }, { "epoch": 3.64, "learning_rate": 1.1805542010894552e-05, "loss": 1.0086, "step": 261300 }, { "epoch": 3.64, "learning_rate": 1.1798576185235237e-05, "loss": 0.9939, "step": 261400 }, { "epoch": 3.64, "learning_rate": 1.179161035957592e-05, "loss": 1.0008, "step": 261500 }, { "epoch": 3.64, "learning_rate": 1.1784644533916604e-05, "loss": 0.9769, "step": 261600 }, { "epoch": 3.65, "learning_rate": 1.1777678708257289e-05, "loss": 1.0016, "step": 261700 }, { "epoch": 3.65, "learning_rate": 1.1770712882597974e-05, "loss": 1.0011, "step": 261800 }, { "epoch": 3.65, "learning_rate": 1.1763747056938658e-05, "loss": 0.9821, "step": 261900 }, { "epoch": 3.65, "learning_rate": 1.1756781231279343e-05, "loss": 0.9878, "step": 262000 }, { "epoch": 3.65, "learning_rate": 1.1749815405620028e-05, "loss": 1.0042, "step": 262100 }, { "epoch": 3.65, "learning_rate": 1.1742849579960714e-05, "loss": 0.9959, "step": 262200 }, { "epoch": 3.65, "learning_rate": 1.1735883754301399e-05, "loss": 0.9901, "step": 262300 }, { "epoch": 3.66, "learning_rate": 1.1728917928642083e-05, "loss": 0.9912, "step": 262400 }, { "epoch": 3.66, "learning_rate": 1.1721952102982768e-05, "loss": 0.9897, "step": 262500 }, { "epoch": 3.66, "learning_rate": 1.1714986277323451e-05, "loss": 0.949, "step": 262600 }, { "epoch": 3.66, "learning_rate": 1.1708020451664136e-05, "loss": 0.9607, "step": 262700 }, { "epoch": 3.66, "learning_rate": 1.170105462600482e-05, "loss": 0.9794, "step": 262800 }, { "epoch": 3.66, "learning_rate": 1.1694088800345505e-05, "loss": 0.9869, "step": 262900 }, { "epoch": 3.66, "learning_rate": 1.168712297468619e-05, "loss": 0.9877, "step": 263000 }, { "epoch": 3.67, "learning_rate": 1.1680157149026875e-05, "loss": 0.9649, "step": 263100 }, { "epoch": 3.67, "learning_rate": 1.167319132336756e-05, "loss": 0.9846, "step": 263200 }, { "epoch": 3.67, "learning_rate": 1.1666225497708244e-05, "loss": 0.994, "step": 263300 }, { "epoch": 3.67, "learning_rate": 1.1659259672048929e-05, "loss": 0.95, "step": 263400 }, { "epoch": 3.67, "learning_rate": 1.1652293846389613e-05, "loss": 0.9849, "step": 263500 }, { "epoch": 3.67, "learning_rate": 1.1645328020730298e-05, "loss": 0.9534, "step": 263600 }, { "epoch": 3.67, "learning_rate": 1.1638362195070983e-05, "loss": 0.97, "step": 263700 }, { "epoch": 3.68, "learning_rate": 1.1631396369411666e-05, "loss": 1.004, "step": 263800 }, { "epoch": 3.68, "learning_rate": 1.162443054375235e-05, "loss": 0.9785, "step": 263900 }, { "epoch": 3.68, "learning_rate": 1.1617534376349628e-05, "loss": 0.9533, "step": 264000 }, { "epoch": 3.68, "learning_rate": 1.1610568550690313e-05, "loss": 0.9831, "step": 264100 }, { "epoch": 3.68, "learning_rate": 1.1603602725030998e-05, "loss": 1.0029, "step": 264200 }, { "epoch": 3.68, "learning_rate": 1.1596706557628276e-05, "loss": 0.9957, "step": 264300 }, { "epoch": 3.68, "learning_rate": 1.158974073196896e-05, "loss": 1.0092, "step": 264400 }, { "epoch": 3.68, "learning_rate": 1.1582774906309645e-05, "loss": 0.9792, "step": 264500 }, { "epoch": 3.69, "learning_rate": 1.157580908065033e-05, "loss": 0.9532, "step": 264600 }, { "epoch": 3.69, "learning_rate": 1.1568843254991015e-05, "loss": 0.959, "step": 264700 }, { "epoch": 3.69, "learning_rate": 1.15618774293317e-05, "loss": 0.9557, "step": 264800 }, { "epoch": 3.69, "learning_rate": 1.1554911603672384e-05, "loss": 0.9499, "step": 264900 }, { "epoch": 3.69, "learning_rate": 1.1547945778013069e-05, "loss": 0.9418, "step": 265000 }, { "epoch": 3.69, "learning_rate": 1.1540979952353753e-05, "loss": 1.0181, "step": 265100 }, { "epoch": 3.69, "learning_rate": 1.1534014126694436e-05, "loss": 0.9944, "step": 265200 }, { "epoch": 3.7, "learning_rate": 1.1527048301035121e-05, "loss": 0.9985, "step": 265300 }, { "epoch": 3.7, "learning_rate": 1.1520082475375806e-05, "loss": 0.9748, "step": 265400 }, { "epoch": 3.7, "learning_rate": 1.151311664971649e-05, "loss": 0.9544, "step": 265500 }, { "epoch": 3.7, "learning_rate": 1.1506150824057175e-05, "loss": 0.9731, "step": 265600 }, { "epoch": 3.7, "learning_rate": 1.149918499839786e-05, "loss": 0.9681, "step": 265700 }, { "epoch": 3.7, "learning_rate": 1.1492219172738544e-05, "loss": 0.9926, "step": 265800 }, { "epoch": 3.7, "learning_rate": 1.148525334707923e-05, "loss": 0.9781, "step": 265900 }, { "epoch": 3.71, "learning_rate": 1.1478287521419914e-05, "loss": 0.9916, "step": 266000 }, { "epoch": 3.71, "learning_rate": 1.1471321695760599e-05, "loss": 0.9642, "step": 266100 }, { "epoch": 3.71, "learning_rate": 1.1464355870101283e-05, "loss": 0.9716, "step": 266200 }, { "epoch": 3.71, "learning_rate": 1.145739004444197e-05, "loss": 0.9785, "step": 266300 }, { "epoch": 3.71, "learning_rate": 1.1450424218782653e-05, "loss": 0.9607, "step": 266400 }, { "epoch": 3.71, "learning_rate": 1.1443458393123337e-05, "loss": 0.972, "step": 266500 }, { "epoch": 3.71, "learning_rate": 1.1436492567464022e-05, "loss": 0.9862, "step": 266600 }, { "epoch": 3.72, "learning_rate": 1.1429526741804707e-05, "loss": 0.9673, "step": 266700 }, { "epoch": 3.72, "learning_rate": 1.1422560916145391e-05, "loss": 0.9721, "step": 266800 }, { "epoch": 3.72, "learning_rate": 1.1415595090486076e-05, "loss": 0.9615, "step": 266900 }, { "epoch": 3.72, "learning_rate": 1.140862926482676e-05, "loss": 0.9706, "step": 267000 }, { "epoch": 3.72, "learning_rate": 1.1401663439167445e-05, "loss": 1.0159, "step": 267100 }, { "epoch": 3.72, "learning_rate": 1.139469761350813e-05, "loss": 0.997, "step": 267200 }, { "epoch": 3.72, "learning_rate": 1.1387731787848815e-05, "loss": 0.97, "step": 267300 }, { "epoch": 3.73, "learning_rate": 1.13807659621895e-05, "loss": 1.0179, "step": 267400 }, { "epoch": 3.73, "learning_rate": 1.1373800136530182e-05, "loss": 0.9738, "step": 267500 }, { "epoch": 3.73, "learning_rate": 1.1366834310870867e-05, "loss": 0.9676, "step": 267600 }, { "epoch": 3.73, "learning_rate": 1.1359868485211552e-05, "loss": 0.9892, "step": 267700 }, { "epoch": 3.73, "learning_rate": 1.1352902659552236e-05, "loss": 1.0037, "step": 267800 }, { "epoch": 3.73, "learning_rate": 1.1345936833892921e-05, "loss": 1.0042, "step": 267900 }, { "epoch": 3.73, "learning_rate": 1.1338971008233606e-05, "loss": 0.9883, "step": 268000 }, { "epoch": 3.74, "learning_rate": 1.133200518257429e-05, "loss": 1.0058, "step": 268100 }, { "epoch": 3.74, "learning_rate": 1.1325039356914975e-05, "loss": 0.9622, "step": 268200 }, { "epoch": 3.74, "learning_rate": 1.131807353125566e-05, "loss": 1.002, "step": 268300 }, { "epoch": 3.74, "learning_rate": 1.1311107705596345e-05, "loss": 0.9946, "step": 268400 }, { "epoch": 3.74, "learning_rate": 1.130414187993703e-05, "loss": 1.0038, "step": 268500 }, { "epoch": 3.74, "learning_rate": 1.1297176054277712e-05, "loss": 0.9864, "step": 268600 }, { "epoch": 3.74, "learning_rate": 1.1290210228618399e-05, "loss": 0.9908, "step": 268700 }, { "epoch": 3.74, "learning_rate": 1.1283244402959083e-05, "loss": 1.0071, "step": 268800 }, { "epoch": 3.75, "learning_rate": 1.1276278577299768e-05, "loss": 0.9852, "step": 268900 }, { "epoch": 3.75, "learning_rate": 1.1269382409897044e-05, "loss": 0.9787, "step": 269000 }, { "epoch": 3.75, "learning_rate": 1.1262416584237729e-05, "loss": 0.9853, "step": 269100 }, { "epoch": 3.75, "learning_rate": 1.1255450758578414e-05, "loss": 0.9849, "step": 269200 }, { "epoch": 3.75, "learning_rate": 1.1248484932919098e-05, "loss": 0.9848, "step": 269300 }, { "epoch": 3.75, "learning_rate": 1.1241519107259783e-05, "loss": 0.967, "step": 269400 }, { "epoch": 3.75, "learning_rate": 1.123455328160047e-05, "loss": 0.9804, "step": 269500 }, { "epoch": 3.76, "learning_rate": 1.1227587455941154e-05, "loss": 0.9966, "step": 269600 }, { "epoch": 3.76, "learning_rate": 1.1220621630281839e-05, "loss": 0.9848, "step": 269700 }, { "epoch": 3.76, "learning_rate": 1.1213655804622524e-05, "loss": 0.9583, "step": 269800 }, { "epoch": 3.76, "learning_rate": 1.1206689978963207e-05, "loss": 0.9921, "step": 269900 }, { "epoch": 3.76, "learning_rate": 1.1199724153303891e-05, "loss": 0.9721, "step": 270000 }, { "epoch": 3.76, "learning_rate": 1.1192758327644576e-05, "loss": 0.9834, "step": 270100 }, { "epoch": 3.76, "learning_rate": 1.118579250198526e-05, "loss": 0.9992, "step": 270200 }, { "epoch": 3.77, "learning_rate": 1.1178826676325945e-05, "loss": 0.9495, "step": 270300 }, { "epoch": 3.77, "learning_rate": 1.117186085066663e-05, "loss": 0.9699, "step": 270400 }, { "epoch": 3.77, "learning_rate": 1.1164895025007315e-05, "loss": 0.9755, "step": 270500 }, { "epoch": 3.77, "learning_rate": 1.1157929199348e-05, "loss": 0.9905, "step": 270600 }, { "epoch": 3.77, "learning_rate": 1.1150963373688684e-05, "loss": 0.9889, "step": 270700 }, { "epoch": 3.77, "learning_rate": 1.1143997548029369e-05, "loss": 0.9678, "step": 270800 }, { "epoch": 3.77, "learning_rate": 1.1137031722370053e-05, "loss": 0.9644, "step": 270900 }, { "epoch": 3.78, "learning_rate": 1.1130065896710736e-05, "loss": 0.9962, "step": 271000 }, { "epoch": 3.78, "learning_rate": 1.1123100071051421e-05, "loss": 1.0022, "step": 271100 }, { "epoch": 3.78, "learning_rate": 1.1116134245392106e-05, "loss": 0.9839, "step": 271200 }, { "epoch": 3.78, "learning_rate": 1.110916841973279e-05, "loss": 0.9932, "step": 271300 }, { "epoch": 3.78, "learning_rate": 1.1102202594073475e-05, "loss": 0.9739, "step": 271400 }, { "epoch": 3.78, "learning_rate": 1.109523676841416e-05, "loss": 1.0009, "step": 271500 }, { "epoch": 3.78, "learning_rate": 1.1088270942754844e-05, "loss": 0.9738, "step": 271600 }, { "epoch": 3.79, "learning_rate": 1.1081305117095529e-05, "loss": 0.9487, "step": 271700 }, { "epoch": 3.79, "learning_rate": 1.1074339291436214e-05, "loss": 0.9752, "step": 271800 }, { "epoch": 3.79, "learning_rate": 1.1067373465776899e-05, "loss": 0.9955, "step": 271900 }, { "epoch": 3.79, "learning_rate": 1.1060407640117585e-05, "loss": 0.9662, "step": 272000 }, { "epoch": 3.79, "learning_rate": 1.105344181445827e-05, "loss": 0.9841, "step": 272100 }, { "epoch": 3.79, "learning_rate": 1.1046475988798953e-05, "loss": 0.9586, "step": 272200 }, { "epoch": 3.79, "learning_rate": 1.1039510163139637e-05, "loss": 0.9878, "step": 272300 }, { "epoch": 3.79, "learning_rate": 1.1032544337480322e-05, "loss": 0.9602, "step": 272400 }, { "epoch": 3.8, "learning_rate": 1.1025578511821007e-05, "loss": 0.9938, "step": 272500 }, { "epoch": 3.8, "learning_rate": 1.1018612686161691e-05, "loss": 0.9777, "step": 272600 }, { "epoch": 3.8, "learning_rate": 1.1011646860502376e-05, "loss": 0.9831, "step": 272700 }, { "epoch": 3.8, "learning_rate": 1.100468103484306e-05, "loss": 0.9644, "step": 272800 }, { "epoch": 3.8, "learning_rate": 1.0997715209183745e-05, "loss": 0.9664, "step": 272900 }, { "epoch": 3.8, "learning_rate": 1.099074938352443e-05, "loss": 0.9777, "step": 273000 }, { "epoch": 3.8, "learning_rate": 1.0983783557865115e-05, "loss": 0.9728, "step": 273100 }, { "epoch": 3.81, "learning_rate": 1.09768177322058e-05, "loss": 0.9791, "step": 273200 }, { "epoch": 3.81, "learning_rate": 1.0969921564803078e-05, "loss": 0.9805, "step": 273300 }, { "epoch": 3.81, "learning_rate": 1.0962955739143762e-05, "loss": 0.9774, "step": 273400 }, { "epoch": 3.81, "learning_rate": 1.0955989913484445e-05, "loss": 0.9813, "step": 273500 }, { "epoch": 3.81, "learning_rate": 1.094902408782513e-05, "loss": 0.9358, "step": 273600 }, { "epoch": 3.81, "learning_rate": 1.0942058262165815e-05, "loss": 1.0004, "step": 273700 }, { "epoch": 3.81, "learning_rate": 1.09350924365065e-05, "loss": 0.97, "step": 273800 }, { "epoch": 3.82, "learning_rate": 1.0928126610847184e-05, "loss": 0.9578, "step": 273900 }, { "epoch": 3.82, "learning_rate": 1.0921160785187869e-05, "loss": 0.994, "step": 274000 }, { "epoch": 3.82, "learning_rate": 1.0914194959528553e-05, "loss": 0.9845, "step": 274100 }, { "epoch": 3.82, "learning_rate": 1.0907229133869238e-05, "loss": 0.9934, "step": 274200 }, { "epoch": 3.82, "learning_rate": 1.0900263308209923e-05, "loss": 0.9909, "step": 274300 }, { "epoch": 3.82, "learning_rate": 1.0893297482550607e-05, "loss": 0.9895, "step": 274400 }, { "epoch": 3.82, "learning_rate": 1.0886331656891292e-05, "loss": 0.9869, "step": 274500 }, { "epoch": 3.83, "learning_rate": 1.0879365831231975e-05, "loss": 0.9759, "step": 274600 }, { "epoch": 3.83, "learning_rate": 1.087240000557266e-05, "loss": 1.013, "step": 274700 }, { "epoch": 3.83, "learning_rate": 1.0865434179913344e-05, "loss": 0.9553, "step": 274800 }, { "epoch": 3.83, "learning_rate": 1.0858468354254029e-05, "loss": 0.9715, "step": 274900 }, { "epoch": 3.83, "learning_rate": 1.0851502528594714e-05, "loss": 0.9699, "step": 275000 }, { "epoch": 3.83, "learning_rate": 1.0844536702935398e-05, "loss": 1.0056, "step": 275100 }, { "epoch": 3.83, "learning_rate": 1.0837640535532677e-05, "loss": 1.0038, "step": 275200 }, { "epoch": 3.84, "learning_rate": 1.0830674709873361e-05, "loss": 0.9863, "step": 275300 }, { "epoch": 3.84, "learning_rate": 1.0823708884214046e-05, "loss": 0.9697, "step": 275400 }, { "epoch": 3.84, "learning_rate": 1.081674305855473e-05, "loss": 0.9998, "step": 275500 }, { "epoch": 3.84, "learning_rate": 1.0809777232895415e-05, "loss": 1.0124, "step": 275600 }, { "epoch": 3.84, "learning_rate": 1.08028114072361e-05, "loss": 0.979, "step": 275700 }, { "epoch": 3.84, "learning_rate": 1.0795845581576785e-05, "loss": 1.0042, "step": 275800 }, { "epoch": 3.84, "learning_rate": 1.0788879755917468e-05, "loss": 0.9549, "step": 275900 }, { "epoch": 3.85, "learning_rate": 1.0781913930258154e-05, "loss": 0.9615, "step": 276000 }, { "epoch": 3.85, "learning_rate": 1.0774948104598839e-05, "loss": 0.9758, "step": 276100 }, { "epoch": 3.85, "learning_rate": 1.0767982278939523e-05, "loss": 0.9454, "step": 276200 }, { "epoch": 3.85, "learning_rate": 1.0761016453280208e-05, "loss": 0.9954, "step": 276300 }, { "epoch": 3.85, "learning_rate": 1.0754050627620893e-05, "loss": 0.9784, "step": 276400 }, { "epoch": 3.85, "learning_rate": 1.0747084801961577e-05, "loss": 0.9846, "step": 276500 }, { "epoch": 3.85, "learning_rate": 1.0740118976302262e-05, "loss": 0.9897, "step": 276600 }, { "epoch": 3.85, "learning_rate": 1.0733153150642947e-05, "loss": 1.0244, "step": 276700 }, { "epoch": 3.86, "learning_rate": 1.0726187324983631e-05, "loss": 0.9911, "step": 276800 }, { "epoch": 3.86, "learning_rate": 1.0719221499324316e-05, "loss": 1.0317, "step": 276900 }, { "epoch": 3.86, "learning_rate": 1.0712255673665e-05, "loss": 0.9978, "step": 277000 }, { "epoch": 3.86, "learning_rate": 1.0705289848005684e-05, "loss": 1.0002, "step": 277100 }, { "epoch": 3.86, "learning_rate": 1.0698324022346369e-05, "loss": 0.9814, "step": 277200 }, { "epoch": 3.86, "learning_rate": 1.0691358196687053e-05, "loss": 0.9785, "step": 277300 }, { "epoch": 3.86, "learning_rate": 1.0684392371027738e-05, "loss": 0.9724, "step": 277400 }, { "epoch": 3.87, "learning_rate": 1.0677426545368423e-05, "loss": 0.9989, "step": 277500 }, { "epoch": 3.87, "learning_rate": 1.0670460719709107e-05, "loss": 0.961, "step": 277600 }, { "epoch": 3.87, "learning_rate": 1.0663494894049792e-05, "loss": 0.96, "step": 277700 }, { "epoch": 3.87, "learning_rate": 1.0656529068390477e-05, "loss": 0.9835, "step": 277800 }, { "epoch": 3.87, "learning_rate": 1.0649563242731161e-05, "loss": 0.9815, "step": 277900 }, { "epoch": 3.87, "learning_rate": 1.0642597417071846e-05, "loss": 0.9932, "step": 278000 }, { "epoch": 3.87, "learning_rate": 1.0635631591412529e-05, "loss": 0.9926, "step": 278100 }, { "epoch": 3.88, "learning_rate": 1.0628665765753214e-05, "loss": 0.9705, "step": 278200 }, { "epoch": 3.88, "learning_rate": 1.0621699940093898e-05, "loss": 1.0001, "step": 278300 }, { "epoch": 3.88, "learning_rate": 1.0614734114434583e-05, "loss": 0.9889, "step": 278400 }, { "epoch": 3.88, "learning_rate": 1.060776828877527e-05, "loss": 0.9962, "step": 278500 }, { "epoch": 3.88, "learning_rate": 1.0600802463115954e-05, "loss": 0.9582, "step": 278600 }, { "epoch": 3.88, "learning_rate": 1.0593836637456639e-05, "loss": 0.9944, "step": 278700 }, { "epoch": 3.88, "learning_rate": 1.0586870811797323e-05, "loss": 0.9763, "step": 278800 }, { "epoch": 3.89, "learning_rate": 1.0579904986138008e-05, "loss": 0.9497, "step": 278900 }, { "epoch": 3.89, "learning_rate": 1.0572939160478693e-05, "loss": 0.9902, "step": 279000 }, { "epoch": 3.89, "learning_rate": 1.0565973334819378e-05, "loss": 0.9809, "step": 279100 }, { "epoch": 3.89, "learning_rate": 1.0559077167416654e-05, "loss": 0.9747, "step": 279200 }, { "epoch": 3.89, "learning_rate": 1.055211134175734e-05, "loss": 0.9805, "step": 279300 }, { "epoch": 3.89, "learning_rate": 1.0545145516098023e-05, "loss": 1.0024, "step": 279400 }, { "epoch": 3.89, "learning_rate": 1.0538179690438708e-05, "loss": 0.9853, "step": 279500 }, { "epoch": 3.9, "learning_rate": 1.0531213864779393e-05, "loss": 0.9734, "step": 279600 }, { "epoch": 3.9, "learning_rate": 1.0524317697376669e-05, "loss": 0.9694, "step": 279700 }, { "epoch": 3.9, "learning_rate": 1.0517351871717354e-05, "loss": 1.0061, "step": 279800 }, { "epoch": 3.9, "learning_rate": 1.0510386046058038e-05, "loss": 0.9687, "step": 279900 }, { "epoch": 3.9, "learning_rate": 1.0503420220398723e-05, "loss": 0.9794, "step": 280000 }, { "epoch": 3.9, "learning_rate": 1.049645439473941e-05, "loss": 0.9878, "step": 280100 }, { "epoch": 3.9, "learning_rate": 1.0489488569080094e-05, "loss": 0.9465, "step": 280200 }, { "epoch": 3.91, "learning_rate": 1.0482522743420779e-05, "loss": 1.0336, "step": 280300 }, { "epoch": 3.91, "learning_rate": 1.0475556917761464e-05, "loss": 0.9868, "step": 280400 }, { "epoch": 3.91, "learning_rate": 1.0468591092102148e-05, "loss": 1.0007, "step": 280500 }, { "epoch": 3.91, "learning_rate": 1.0461625266442833e-05, "loss": 0.9596, "step": 280600 }, { "epoch": 3.91, "learning_rate": 1.0454659440783516e-05, "loss": 1.0274, "step": 280700 }, { "epoch": 3.91, "learning_rate": 1.04476936151242e-05, "loss": 0.9902, "step": 280800 }, { "epoch": 3.91, "learning_rate": 1.0440727789464885e-05, "loss": 0.9704, "step": 280900 }, { "epoch": 3.91, "learning_rate": 1.043376196380557e-05, "loss": 0.9813, "step": 281000 }, { "epoch": 3.92, "learning_rate": 1.0426796138146255e-05, "loss": 1.0052, "step": 281100 }, { "epoch": 3.92, "learning_rate": 1.041983031248694e-05, "loss": 0.9405, "step": 281200 }, { "epoch": 3.92, "learning_rate": 1.0412864486827624e-05, "loss": 0.9918, "step": 281300 }, { "epoch": 3.92, "learning_rate": 1.0405898661168309e-05, "loss": 0.9681, "step": 281400 }, { "epoch": 3.92, "learning_rate": 1.0398932835508993e-05, "loss": 0.9744, "step": 281500 }, { "epoch": 3.92, "learning_rate": 1.0391967009849678e-05, "loss": 0.974, "step": 281600 }, { "epoch": 3.92, "learning_rate": 1.0385001184190363e-05, "loss": 1.0004, "step": 281700 }, { "epoch": 3.93, "learning_rate": 1.0378035358531046e-05, "loss": 0.9524, "step": 281800 }, { "epoch": 3.93, "learning_rate": 1.037106953287173e-05, "loss": 0.9796, "step": 281900 }, { "epoch": 3.93, "learning_rate": 1.0364103707212415e-05, "loss": 1.0044, "step": 282000 }, { "epoch": 3.93, "learning_rate": 1.03571378815531e-05, "loss": 0.9573, "step": 282100 }, { "epoch": 3.93, "learning_rate": 1.0350172055893784e-05, "loss": 1.0033, "step": 282200 }, { "epoch": 3.93, "learning_rate": 1.034320623023447e-05, "loss": 1.0084, "step": 282300 }, { "epoch": 3.93, "learning_rate": 1.0336240404575154e-05, "loss": 0.9516, "step": 282400 }, { "epoch": 3.94, "learning_rate": 1.0329274578915839e-05, "loss": 0.9972, "step": 282500 }, { "epoch": 3.94, "learning_rate": 1.0322308753256525e-05, "loss": 0.9877, "step": 282600 }, { "epoch": 3.94, "learning_rate": 1.031534292759721e-05, "loss": 1.0119, "step": 282700 }, { "epoch": 3.94, "learning_rate": 1.0308377101937894e-05, "loss": 0.9725, "step": 282800 }, { "epoch": 3.94, "learning_rate": 1.0301411276278579e-05, "loss": 0.9483, "step": 282900 }, { "epoch": 3.94, "learning_rate": 1.0294445450619262e-05, "loss": 1.0138, "step": 283000 }, { "epoch": 3.94, "learning_rate": 1.0287479624959947e-05, "loss": 0.9888, "step": 283100 }, { "epoch": 3.95, "learning_rate": 1.0280513799300631e-05, "loss": 0.9672, "step": 283200 }, { "epoch": 3.95, "learning_rate": 1.0273547973641316e-05, "loss": 0.9859, "step": 283300 }, { "epoch": 3.95, "learning_rate": 1.0266582147982e-05, "loss": 0.9606, "step": 283400 }, { "epoch": 3.95, "learning_rate": 1.0259616322322685e-05, "loss": 1.0003, "step": 283500 }, { "epoch": 3.95, "learning_rate": 1.025265049666337e-05, "loss": 0.9798, "step": 283600 }, { "epoch": 3.95, "learning_rate": 1.0245684671004055e-05, "loss": 0.9869, "step": 283700 }, { "epoch": 3.95, "learning_rate": 1.023871884534474e-05, "loss": 1.0049, "step": 283800 }, { "epoch": 3.96, "learning_rate": 1.0231753019685424e-05, "loss": 0.9581, "step": 283900 }, { "epoch": 3.96, "learning_rate": 1.0224787194026109e-05, "loss": 1.0021, "step": 284000 }, { "epoch": 3.96, "learning_rate": 1.0217821368366792e-05, "loss": 0.9775, "step": 284100 }, { "epoch": 3.96, "learning_rate": 1.0210855542707476e-05, "loss": 0.9659, "step": 284200 }, { "epoch": 3.96, "learning_rate": 1.0203889717048161e-05, "loss": 0.9469, "step": 284300 }, { "epoch": 3.96, "learning_rate": 1.019699354964544e-05, "loss": 0.9738, "step": 284400 }, { "epoch": 3.96, "learning_rate": 1.0190027723986124e-05, "loss": 0.9421, "step": 284500 }, { "epoch": 3.96, "learning_rate": 1.0183061898326809e-05, "loss": 0.989, "step": 284600 }, { "epoch": 3.97, "learning_rate": 1.0176096072667493e-05, "loss": 0.9879, "step": 284700 }, { "epoch": 3.97, "learning_rate": 1.0169199905264771e-05, "loss": 0.9911, "step": 284800 }, { "epoch": 3.97, "learning_rate": 1.0162234079605456e-05, "loss": 0.9613, "step": 284900 }, { "epoch": 3.97, "learning_rate": 1.015526825394614e-05, "loss": 0.9824, "step": 285000 }, { "epoch": 3.97, "learning_rate": 1.0148302428286825e-05, "loss": 0.9766, "step": 285100 }, { "epoch": 3.97, "learning_rate": 1.014133660262751e-05, "loss": 0.9803, "step": 285200 }, { "epoch": 3.97, "learning_rate": 1.0134370776968195e-05, "loss": 1.0073, "step": 285300 }, { "epoch": 3.98, "learning_rate": 1.012740495130888e-05, "loss": 0.9942, "step": 285400 }, { "epoch": 3.98, "learning_rate": 1.0120439125649564e-05, "loss": 0.9659, "step": 285500 }, { "epoch": 3.98, "learning_rate": 1.0113473299990247e-05, "loss": 0.9612, "step": 285600 }, { "epoch": 3.98, "learning_rate": 1.0106507474330932e-05, "loss": 0.969, "step": 285700 }, { "epoch": 3.98, "learning_rate": 1.0099541648671617e-05, "loss": 0.9585, "step": 285800 }, { "epoch": 3.98, "learning_rate": 1.0092575823012301e-05, "loss": 0.9946, "step": 285900 }, { "epoch": 3.98, "learning_rate": 1.0085609997352986e-05, "loss": 0.9986, "step": 286000 }, { "epoch": 3.99, "learning_rate": 1.007864417169367e-05, "loss": 1.0097, "step": 286100 }, { "epoch": 3.99, "learning_rate": 1.0071678346034355e-05, "loss": 1.0117, "step": 286200 }, { "epoch": 3.99, "learning_rate": 1.006471252037504e-05, "loss": 0.973, "step": 286300 }, { "epoch": 3.99, "learning_rate": 1.0057746694715725e-05, "loss": 0.9853, "step": 286400 }, { "epoch": 3.99, "learning_rate": 1.005078086905641e-05, "loss": 0.9752, "step": 286500 }, { "epoch": 3.99, "learning_rate": 1.0043815043397094e-05, "loss": 0.9869, "step": 286600 }, { "epoch": 3.99, "learning_rate": 1.0036849217737779e-05, "loss": 0.9808, "step": 286700 }, { "epoch": 4.0, "learning_rate": 1.0029883392078463e-05, "loss": 0.9659, "step": 286800 }, { "epoch": 4.0, "learning_rate": 1.0022917566419148e-05, "loss": 0.9788, "step": 286900 }, { "epoch": 4.0, "learning_rate": 1.0015951740759833e-05, "loss": 0.9915, "step": 287000 }, { "epoch": 4.0, "learning_rate": 1.0008985915100517e-05, "loss": 0.9798, "step": 287100 }, { "epoch": 4.0, "eval_gen_len": 20.0, "eval_loss": 1.1669589281082153, "eval_rouge1": 12.4306, "eval_rouge2": 3.7329, "eval_rougeL": 11.9497, "eval_rougeLsum": 12.0617, "eval_runtime": 1509.6015, "eval_samples_per_second": 8.855, "eval_steps_per_second": 2.214, "step": 287116 }, { "epoch": 4.0, "learning_rate": 1.0002020089441202e-05, "loss": 0.8798, "step": 287200 }, { "epoch": 4.0, "learning_rate": 9.995054263781887e-06, "loss": 0.887, "step": 287300 }, { "epoch": 4.0, "learning_rate": 9.988088438122572e-06, "loss": 0.9167, "step": 287400 }, { "epoch": 4.01, "learning_rate": 9.981122612463256e-06, "loss": 0.8906, "step": 287500 }, { "epoch": 4.01, "learning_rate": 9.974156786803941e-06, "loss": 0.8845, "step": 287600 }, { "epoch": 4.01, "learning_rate": 9.967190961144626e-06, "loss": 0.8998, "step": 287700 }, { "epoch": 4.01, "learning_rate": 9.960225135485309e-06, "loss": 0.9037, "step": 287800 }, { "epoch": 4.01, "learning_rate": 9.953259309825993e-06, "loss": 0.86, "step": 287900 }, { "epoch": 4.01, "learning_rate": 9.946293484166678e-06, "loss": 0.9148, "step": 288000 }, { "epoch": 4.01, "learning_rate": 9.939327658507363e-06, "loss": 0.9055, "step": 288100 }, { "epoch": 4.02, "learning_rate": 9.932361832848047e-06, "loss": 0.8924, "step": 288200 }, { "epoch": 4.02, "learning_rate": 9.925396007188732e-06, "loss": 0.8662, "step": 288300 }, { "epoch": 4.02, "learning_rate": 9.918430181529417e-06, "loss": 0.8848, "step": 288400 }, { "epoch": 4.02, "learning_rate": 9.911464355870101e-06, "loss": 0.8896, "step": 288500 }, { "epoch": 4.02, "learning_rate": 9.90456818846738e-06, "loss": 0.8681, "step": 288600 }, { "epoch": 4.02, "learning_rate": 9.897602362808064e-06, "loss": 0.9143, "step": 288700 }, { "epoch": 4.02, "learning_rate": 9.890636537148749e-06, "loss": 0.8555, "step": 288800 }, { "epoch": 4.02, "learning_rate": 9.883670711489434e-06, "loss": 0.898, "step": 288900 }, { "epoch": 4.03, "learning_rate": 9.876704885830118e-06, "loss": 0.8996, "step": 289000 }, { "epoch": 4.03, "learning_rate": 9.869739060170801e-06, "loss": 0.8892, "step": 289100 }, { "epoch": 4.03, "learning_rate": 9.862773234511486e-06, "loss": 0.8774, "step": 289200 }, { "epoch": 4.03, "learning_rate": 9.85580740885217e-06, "loss": 0.8788, "step": 289300 }, { "epoch": 4.03, "learning_rate": 9.848841583192855e-06, "loss": 0.8799, "step": 289400 }, { "epoch": 4.03, "learning_rate": 9.84187575753354e-06, "loss": 0.888, "step": 289500 }, { "epoch": 4.03, "learning_rate": 9.834909931874225e-06, "loss": 0.8484, "step": 289600 }, { "epoch": 4.04, "learning_rate": 9.82794410621491e-06, "loss": 0.9076, "step": 289700 }, { "epoch": 4.04, "learning_rate": 9.820978280555594e-06, "loss": 0.879, "step": 289800 }, { "epoch": 4.04, "learning_rate": 9.81401245489628e-06, "loss": 0.8713, "step": 289900 }, { "epoch": 4.04, "learning_rate": 9.807046629236965e-06, "loss": 0.8748, "step": 290000 }, { "epoch": 4.04, "learning_rate": 9.80008080357765e-06, "loss": 0.8888, "step": 290100 }, { "epoch": 4.04, "learning_rate": 9.793114977918333e-06, "loss": 0.8755, "step": 290200 }, { "epoch": 4.04, "learning_rate": 9.786149152259017e-06, "loss": 0.8682, "step": 290300 }, { "epoch": 4.05, "learning_rate": 9.779183326599702e-06, "loss": 0.8733, "step": 290400 }, { "epoch": 4.05, "learning_rate": 9.772217500940387e-06, "loss": 0.8791, "step": 290500 }, { "epoch": 4.05, "learning_rate": 9.765251675281071e-06, "loss": 0.8938, "step": 290600 }, { "epoch": 4.05, "learning_rate": 9.758285849621756e-06, "loss": 0.9049, "step": 290700 }, { "epoch": 4.05, "learning_rate": 9.75132002396244e-06, "loss": 0.8843, "step": 290800 }, { "epoch": 4.05, "learning_rate": 9.744354198303125e-06, "loss": 0.9157, "step": 290900 }, { "epoch": 4.05, "learning_rate": 9.73738837264381e-06, "loss": 0.9201, "step": 291000 }, { "epoch": 4.06, "learning_rate": 9.730422546984495e-06, "loss": 0.8714, "step": 291100 }, { "epoch": 4.06, "learning_rate": 9.72345672132518e-06, "loss": 0.8757, "step": 291200 }, { "epoch": 4.06, "learning_rate": 9.716490895665864e-06, "loss": 0.8866, "step": 291300 }, { "epoch": 4.06, "learning_rate": 9.709525070006547e-06, "loss": 0.9118, "step": 291400 }, { "epoch": 4.06, "learning_rate": 9.702559244347232e-06, "loss": 0.9034, "step": 291500 }, { "epoch": 4.06, "learning_rate": 9.695593418687917e-06, "loss": 0.8982, "step": 291600 }, { "epoch": 4.06, "learning_rate": 9.688627593028601e-06, "loss": 0.9282, "step": 291700 }, { "epoch": 4.07, "learning_rate": 9.681661767369286e-06, "loss": 0.8808, "step": 291800 }, { "epoch": 4.07, "learning_rate": 9.67469594170997e-06, "loss": 0.8979, "step": 291900 }, { "epoch": 4.07, "learning_rate": 9.667799774307249e-06, "loss": 0.9066, "step": 292000 }, { "epoch": 4.07, "learning_rate": 9.660833948647933e-06, "loss": 0.9289, "step": 292100 }, { "epoch": 4.07, "learning_rate": 9.653868122988618e-06, "loss": 0.9009, "step": 292200 }, { "epoch": 4.07, "learning_rate": 9.646902297329303e-06, "loss": 0.9069, "step": 292300 }, { "epoch": 4.07, "learning_rate": 9.639936471669987e-06, "loss": 0.9099, "step": 292400 }, { "epoch": 4.08, "learning_rate": 9.632970646010672e-06, "loss": 0.8847, "step": 292500 }, { "epoch": 4.08, "learning_rate": 9.626004820351357e-06, "loss": 0.8912, "step": 292600 }, { "epoch": 4.08, "learning_rate": 9.61903899469204e-06, "loss": 0.891, "step": 292700 }, { "epoch": 4.08, "learning_rate": 9.612073169032725e-06, "loss": 0.9028, "step": 292800 }, { "epoch": 4.08, "learning_rate": 9.60510734337341e-06, "loss": 0.9037, "step": 292900 }, { "epoch": 4.08, "learning_rate": 9.598141517714094e-06, "loss": 0.9124, "step": 293000 }, { "epoch": 4.08, "learning_rate": 9.591175692054779e-06, "loss": 0.8872, "step": 293100 }, { "epoch": 4.08, "learning_rate": 9.584209866395465e-06, "loss": 0.8969, "step": 293200 }, { "epoch": 4.09, "learning_rate": 9.57724404073615e-06, "loss": 0.9182, "step": 293300 }, { "epoch": 4.09, "learning_rate": 9.570278215076834e-06, "loss": 0.9193, "step": 293400 }, { "epoch": 4.09, "learning_rate": 9.563312389417519e-06, "loss": 0.8957, "step": 293500 }, { "epoch": 4.09, "learning_rate": 9.556346563758204e-06, "loss": 0.8839, "step": 293600 }, { "epoch": 4.09, "learning_rate": 9.549380738098888e-06, "loss": 0.903, "step": 293700 }, { "epoch": 4.09, "learning_rate": 9.542414912439571e-06, "loss": 0.8874, "step": 293800 }, { "epoch": 4.09, "learning_rate": 9.535449086780256e-06, "loss": 0.9027, "step": 293900 }, { "epoch": 4.1, "learning_rate": 9.52848326112094e-06, "loss": 0.8889, "step": 294000 }, { "epoch": 4.1, "learning_rate": 9.521587093718219e-06, "loss": 0.8773, "step": 294100 }, { "epoch": 4.1, "learning_rate": 9.514621268058904e-06, "loss": 0.91, "step": 294200 }, { "epoch": 4.1, "learning_rate": 9.507655442399588e-06, "loss": 0.8912, "step": 294300 }, { "epoch": 4.1, "learning_rate": 9.500689616740273e-06, "loss": 0.8588, "step": 294400 }, { "epoch": 4.1, "learning_rate": 9.493723791080958e-06, "loss": 0.8836, "step": 294500 }, { "epoch": 4.1, "learning_rate": 9.486757965421642e-06, "loss": 0.8813, "step": 294600 }, { "epoch": 4.11, "learning_rate": 9.479792139762327e-06, "loss": 0.8857, "step": 294700 }, { "epoch": 4.11, "learning_rate": 9.472826314103012e-06, "loss": 0.9061, "step": 294800 }, { "epoch": 4.11, "learning_rate": 9.465860488443696e-06, "loss": 0.9027, "step": 294900 }, { "epoch": 4.11, "learning_rate": 9.458964321040974e-06, "loss": 0.9203, "step": 295000 }, { "epoch": 4.11, "learning_rate": 9.451998495381659e-06, "loss": 0.8871, "step": 295100 }, { "epoch": 4.11, "learning_rate": 9.445032669722344e-06, "loss": 0.9012, "step": 295200 }, { "epoch": 4.11, "learning_rate": 9.438066844063027e-06, "loss": 0.9211, "step": 295300 }, { "epoch": 4.12, "learning_rate": 9.431101018403711e-06, "loss": 0.8783, "step": 295400 }, { "epoch": 4.12, "learning_rate": 9.424135192744396e-06, "loss": 0.8633, "step": 295500 }, { "epoch": 4.12, "learning_rate": 9.41716936708508e-06, "loss": 0.9149, "step": 295600 }, { "epoch": 4.12, "learning_rate": 9.410203541425766e-06, "loss": 0.8882, "step": 295700 }, { "epoch": 4.12, "learning_rate": 9.40323771576645e-06, "loss": 0.8843, "step": 295800 }, { "epoch": 4.12, "learning_rate": 9.396271890107135e-06, "loss": 0.8888, "step": 295900 }, { "epoch": 4.12, "learning_rate": 9.38930606444782e-06, "loss": 0.8865, "step": 296000 }, { "epoch": 4.13, "learning_rate": 9.382340238788504e-06, "loss": 0.8992, "step": 296100 }, { "epoch": 4.13, "learning_rate": 9.375374413129189e-06, "loss": 0.8829, "step": 296200 }, { "epoch": 4.13, "learning_rate": 9.368408587469874e-06, "loss": 0.9391, "step": 296300 }, { "epoch": 4.13, "learning_rate": 9.361442761810557e-06, "loss": 0.8892, "step": 296400 }, { "epoch": 4.13, "learning_rate": 9.354476936151241e-06, "loss": 0.8723, "step": 296500 }, { "epoch": 4.13, "learning_rate": 9.347511110491926e-06, "loss": 0.905, "step": 296600 }, { "epoch": 4.13, "learning_rate": 9.34054528483261e-06, "loss": 0.8861, "step": 296700 }, { "epoch": 4.13, "learning_rate": 9.333579459173295e-06, "loss": 0.875, "step": 296800 }, { "epoch": 4.14, "learning_rate": 9.32661363351398e-06, "loss": 0.8967, "step": 296900 }, { "epoch": 4.14, "learning_rate": 9.319647807854665e-06, "loss": 0.9027, "step": 297000 }, { "epoch": 4.14, "learning_rate": 9.31268198219535e-06, "loss": 0.885, "step": 297100 }, { "epoch": 4.14, "learning_rate": 9.305716156536036e-06, "loss": 0.9072, "step": 297200 }, { "epoch": 4.14, "learning_rate": 9.29875033087672e-06, "loss": 0.9043, "step": 297300 }, { "epoch": 4.14, "learning_rate": 9.291784505217405e-06, "loss": 0.864, "step": 297400 }, { "epoch": 4.14, "learning_rate": 9.284818679558088e-06, "loss": 0.8637, "step": 297500 }, { "epoch": 4.15, "learning_rate": 9.277852853898773e-06, "loss": 0.887, "step": 297600 }, { "epoch": 4.15, "learning_rate": 9.270887028239457e-06, "loss": 0.9015, "step": 297700 }, { "epoch": 4.15, "learning_rate": 9.263921202580142e-06, "loss": 0.9068, "step": 297800 }, { "epoch": 4.15, "learning_rate": 9.256955376920827e-06, "loss": 0.8656, "step": 297900 }, { "epoch": 4.15, "learning_rate": 9.249989551261512e-06, "loss": 0.9118, "step": 298000 }, { "epoch": 4.15, "learning_rate": 9.243023725602196e-06, "loss": 0.8942, "step": 298100 }, { "epoch": 4.15, "learning_rate": 9.236127558199474e-06, "loss": 0.8867, "step": 298200 }, { "epoch": 4.16, "learning_rate": 9.229161732540159e-06, "loss": 0.8914, "step": 298300 }, { "epoch": 4.16, "learning_rate": 9.222195906880844e-06, "loss": 0.8917, "step": 298400 }, { "epoch": 4.16, "learning_rate": 9.215230081221528e-06, "loss": 0.8779, "step": 298500 }, { "epoch": 4.16, "learning_rate": 9.208264255562213e-06, "loss": 0.9147, "step": 298600 }, { "epoch": 4.16, "learning_rate": 9.201298429902898e-06, "loss": 0.882, "step": 298700 }, { "epoch": 4.16, "learning_rate": 9.19433260424358e-06, "loss": 0.8678, "step": 298800 }, { "epoch": 4.16, "learning_rate": 9.187366778584265e-06, "loss": 0.8821, "step": 298900 }, { "epoch": 4.17, "learning_rate": 9.18040095292495e-06, "loss": 0.8871, "step": 299000 }, { "epoch": 4.17, "learning_rate": 9.173435127265635e-06, "loss": 0.9118, "step": 299100 }, { "epoch": 4.17, "learning_rate": 9.16646930160632e-06, "loss": 0.9057, "step": 299200 }, { "epoch": 4.17, "learning_rate": 9.159503475947004e-06, "loss": 0.8713, "step": 299300 }, { "epoch": 4.17, "learning_rate": 9.152537650287689e-06, "loss": 0.8827, "step": 299400 }, { "epoch": 4.17, "learning_rate": 9.145571824628374e-06, "loss": 0.8992, "step": 299500 }, { "epoch": 4.17, "learning_rate": 9.138605998969058e-06, "loss": 0.9391, "step": 299600 }, { "epoch": 4.18, "learning_rate": 9.131640173309743e-06, "loss": 0.9138, "step": 299700 }, { "epoch": 4.18, "learning_rate": 9.124674347650428e-06, "loss": 0.8712, "step": 299800 }, { "epoch": 4.18, "learning_rate": 9.11770852199111e-06, "loss": 0.9157, "step": 299900 }, { "epoch": 4.18, "learning_rate": 9.110742696331795e-06, "loss": 0.8838, "step": 300000 }, { "epoch": 4.18, "learning_rate": 9.10377687067248e-06, "loss": 0.9131, "step": 300100 }, { "epoch": 4.18, "learning_rate": 9.096811045013165e-06, "loss": 0.9078, "step": 300200 }, { "epoch": 4.18, "learning_rate": 9.08984521935385e-06, "loss": 0.9053, "step": 300300 }, { "epoch": 4.19, "learning_rate": 9.082879393694534e-06, "loss": 0.9065, "step": 300400 }, { "epoch": 4.19, "learning_rate": 9.07591356803522e-06, "loss": 0.9008, "step": 300500 }, { "epoch": 4.19, "learning_rate": 9.068947742375905e-06, "loss": 0.8981, "step": 300600 }, { "epoch": 4.19, "learning_rate": 9.06198191671659e-06, "loss": 0.8974, "step": 300700 }, { "epoch": 4.19, "learning_rate": 9.055016091057274e-06, "loss": 0.9151, "step": 300800 }, { "epoch": 4.19, "learning_rate": 9.048050265397959e-06, "loss": 0.9115, "step": 300900 }, { "epoch": 4.19, "learning_rate": 9.041084439738642e-06, "loss": 0.9199, "step": 301000 }, { "epoch": 4.19, "learning_rate": 9.034118614079327e-06, "loss": 0.8923, "step": 301100 }, { "epoch": 4.2, "learning_rate": 9.027152788420011e-06, "loss": 0.8718, "step": 301200 }, { "epoch": 4.2, "learning_rate": 9.020186962760696e-06, "loss": 0.8895, "step": 301300 }, { "epoch": 4.2, "learning_rate": 9.01322113710138e-06, "loss": 0.9087, "step": 301400 }, { "epoch": 4.2, "learning_rate": 9.006255311442065e-06, "loss": 0.8972, "step": 301500 }, { "epoch": 4.2, "learning_rate": 8.99928948578275e-06, "loss": 0.9067, "step": 301600 }, { "epoch": 4.2, "learning_rate": 8.992323660123435e-06, "loss": 0.9087, "step": 301700 }, { "epoch": 4.2, "learning_rate": 8.98535783446412e-06, "loss": 0.9044, "step": 301800 }, { "epoch": 4.21, "learning_rate": 8.978392008804804e-06, "loss": 0.8673, "step": 301900 }, { "epoch": 4.21, "learning_rate": 8.971426183145489e-06, "loss": 0.8897, "step": 302000 }, { "epoch": 4.21, "learning_rate": 8.964460357486174e-06, "loss": 0.8733, "step": 302100 }, { "epoch": 4.21, "learning_rate": 8.957494531826857e-06, "loss": 0.8798, "step": 302200 }, { "epoch": 4.21, "learning_rate": 8.950598364424135e-06, "loss": 0.8735, "step": 302300 }, { "epoch": 4.21, "learning_rate": 8.94363253876482e-06, "loss": 0.8819, "step": 302400 }, { "epoch": 4.21, "learning_rate": 8.936666713105504e-06, "loss": 0.9019, "step": 302500 }, { "epoch": 4.22, "learning_rate": 8.929700887446189e-06, "loss": 0.8547, "step": 302600 }, { "epoch": 4.22, "learning_rate": 8.922735061786873e-06, "loss": 0.8871, "step": 302700 }, { "epoch": 4.22, "learning_rate": 8.915769236127558e-06, "loss": 0.8906, "step": 302800 }, { "epoch": 4.22, "learning_rate": 8.908803410468243e-06, "loss": 0.9111, "step": 302900 }, { "epoch": 4.22, "learning_rate": 8.901837584808927e-06, "loss": 0.8933, "step": 303000 }, { "epoch": 4.22, "learning_rate": 8.894871759149612e-06, "loss": 0.8749, "step": 303100 }, { "epoch": 4.22, "learning_rate": 8.887905933490297e-06, "loss": 0.9143, "step": 303200 }, { "epoch": 4.23, "learning_rate": 8.880940107830982e-06, "loss": 0.8993, "step": 303300 }, { "epoch": 4.23, "learning_rate": 8.873974282171666e-06, "loss": 0.9112, "step": 303400 }, { "epoch": 4.23, "learning_rate": 8.86700845651235e-06, "loss": 0.8785, "step": 303500 }, { "epoch": 4.23, "learning_rate": 8.860042630853034e-06, "loss": 0.8886, "step": 303600 }, { "epoch": 4.23, "learning_rate": 8.85307680519372e-06, "loss": 0.938, "step": 303700 }, { "epoch": 4.23, "learning_rate": 8.846110979534405e-06, "loss": 0.854, "step": 303800 }, { "epoch": 4.23, "learning_rate": 8.83914515387509e-06, "loss": 0.926, "step": 303900 }, { "epoch": 4.24, "learning_rate": 8.832179328215774e-06, "loss": 0.9124, "step": 304000 }, { "epoch": 4.24, "learning_rate": 8.825213502556459e-06, "loss": 0.8968, "step": 304100 }, { "epoch": 4.24, "learning_rate": 8.818247676897144e-06, "loss": 0.9058, "step": 304200 }, { "epoch": 4.24, "learning_rate": 8.811281851237828e-06, "loss": 0.908, "step": 304300 }, { "epoch": 4.24, "learning_rate": 8.804385683835105e-06, "loss": 0.9382, "step": 304400 }, { "epoch": 4.24, "learning_rate": 8.79741985817579e-06, "loss": 0.8834, "step": 304500 }, { "epoch": 4.24, "learning_rate": 8.790454032516476e-06, "loss": 0.8964, "step": 304600 }, { "epoch": 4.24, "learning_rate": 8.78348820685716e-06, "loss": 0.9125, "step": 304700 }, { "epoch": 4.25, "learning_rate": 8.776522381197844e-06, "loss": 0.8903, "step": 304800 }, { "epoch": 4.25, "learning_rate": 8.769556555538528e-06, "loss": 0.8758, "step": 304900 }, { "epoch": 4.25, "learning_rate": 8.762590729879213e-06, "loss": 0.8857, "step": 305000 }, { "epoch": 4.25, "learning_rate": 8.755624904219898e-06, "loss": 0.9212, "step": 305100 }, { "epoch": 4.25, "learning_rate": 8.748728736817174e-06, "loss": 0.8922, "step": 305200 }, { "epoch": 4.25, "learning_rate": 8.741762911157859e-06, "loss": 0.9003, "step": 305300 }, { "epoch": 4.25, "learning_rate": 8.734797085498545e-06, "loss": 0.9132, "step": 305400 }, { "epoch": 4.26, "learning_rate": 8.72783125983923e-06, "loss": 0.8758, "step": 305500 }, { "epoch": 4.26, "learning_rate": 8.720865434179914e-06, "loss": 0.8875, "step": 305600 }, { "epoch": 4.26, "learning_rate": 8.713899608520599e-06, "loss": 0.9164, "step": 305700 }, { "epoch": 4.26, "learning_rate": 8.706933782861284e-06, "loss": 0.8996, "step": 305800 }, { "epoch": 4.26, "learning_rate": 8.699967957201968e-06, "loss": 0.9045, "step": 305900 }, { "epoch": 4.26, "learning_rate": 8.693002131542653e-06, "loss": 0.8863, "step": 306000 }, { "epoch": 4.26, "learning_rate": 8.686036305883336e-06, "loss": 0.8951, "step": 306100 }, { "epoch": 4.27, "learning_rate": 8.67907048022402e-06, "loss": 0.9016, "step": 306200 }, { "epoch": 4.27, "learning_rate": 8.672104654564706e-06, "loss": 0.9075, "step": 306300 }, { "epoch": 4.27, "learning_rate": 8.66513882890539e-06, "loss": 0.8794, "step": 306400 }, { "epoch": 4.27, "learning_rate": 8.658173003246075e-06, "loss": 0.9076, "step": 306500 }, { "epoch": 4.27, "learning_rate": 8.65120717758676e-06, "loss": 0.8812, "step": 306600 }, { "epoch": 4.27, "learning_rate": 8.644241351927444e-06, "loss": 0.8843, "step": 306700 }, { "epoch": 4.27, "learning_rate": 8.637275526268129e-06, "loss": 0.8714, "step": 306800 }, { "epoch": 4.28, "learning_rate": 8.630309700608814e-06, "loss": 0.879, "step": 306900 }, { "epoch": 4.28, "learning_rate": 8.623343874949498e-06, "loss": 0.8801, "step": 307000 }, { "epoch": 4.28, "learning_rate": 8.616378049290183e-06, "loss": 0.8613, "step": 307100 }, { "epoch": 4.28, "learning_rate": 8.609412223630866e-06, "loss": 0.887, "step": 307200 }, { "epoch": 4.28, "learning_rate": 8.60244639797155e-06, "loss": 0.8832, "step": 307300 }, { "epoch": 4.28, "learning_rate": 8.595480572312235e-06, "loss": 0.9144, "step": 307400 }, { "epoch": 4.28, "learning_rate": 8.58851474665292e-06, "loss": 0.8777, "step": 307500 }, { "epoch": 4.29, "learning_rate": 8.581548920993605e-06, "loss": 0.8765, "step": 307600 }, { "epoch": 4.29, "learning_rate": 8.57458309533429e-06, "loss": 0.9076, "step": 307700 }, { "epoch": 4.29, "learning_rate": 8.567617269674976e-06, "loss": 0.8719, "step": 307800 }, { "epoch": 4.29, "learning_rate": 8.56065144401566e-06, "loss": 0.8705, "step": 307900 }, { "epoch": 4.29, "learning_rate": 8.553685618356345e-06, "loss": 0.9109, "step": 308000 }, { "epoch": 4.29, "learning_rate": 8.54671979269703e-06, "loss": 0.9082, "step": 308100 }, { "epoch": 4.29, "learning_rate": 8.539753967037715e-06, "loss": 0.9211, "step": 308200 }, { "epoch": 4.3, "learning_rate": 8.532788141378398e-06, "loss": 0.8623, "step": 308300 }, { "epoch": 4.3, "learning_rate": 8.525822315719082e-06, "loss": 0.9063, "step": 308400 }, { "epoch": 4.3, "learning_rate": 8.518856490059767e-06, "loss": 0.91, "step": 308500 }, { "epoch": 4.3, "learning_rate": 8.511890664400452e-06, "loss": 0.9026, "step": 308600 }, { "epoch": 4.3, "learning_rate": 8.504924838741136e-06, "loss": 0.9274, "step": 308700 }, { "epoch": 4.3, "learning_rate": 8.498028671338414e-06, "loss": 0.8974, "step": 308800 }, { "epoch": 4.3, "learning_rate": 8.491062845679099e-06, "loss": 0.914, "step": 308900 }, { "epoch": 4.3, "learning_rate": 8.484097020019784e-06, "loss": 0.9032, "step": 309000 }, { "epoch": 4.31, "learning_rate": 8.477131194360468e-06, "loss": 0.895, "step": 309100 }, { "epoch": 4.31, "learning_rate": 8.470165368701153e-06, "loss": 0.9005, "step": 309200 }, { "epoch": 4.31, "learning_rate": 8.463199543041838e-06, "loss": 0.9147, "step": 309300 }, { "epoch": 4.31, "learning_rate": 8.456233717382522e-06, "loss": 0.913, "step": 309400 }, { "epoch": 4.31, "learning_rate": 8.449267891723207e-06, "loss": 0.9146, "step": 309500 }, { "epoch": 4.31, "learning_rate": 8.44230206606389e-06, "loss": 0.9002, "step": 309600 }, { "epoch": 4.31, "learning_rate": 8.435336240404575e-06, "loss": 0.9239, "step": 309700 }, { "epoch": 4.32, "learning_rate": 8.42837041474526e-06, "loss": 0.8964, "step": 309800 }, { "epoch": 4.32, "learning_rate": 8.421404589085944e-06, "loss": 0.9108, "step": 309900 }, { "epoch": 4.32, "learning_rate": 8.414438763426629e-06, "loss": 0.8997, "step": 310000 }, { "epoch": 4.32, "learning_rate": 8.407472937767314e-06, "loss": 0.8904, "step": 310100 }, { "epoch": 4.32, "learning_rate": 8.400507112107998e-06, "loss": 0.8925, "step": 310200 }, { "epoch": 4.32, "learning_rate": 8.393541286448683e-06, "loss": 0.8845, "step": 310300 }, { "epoch": 4.32, "learning_rate": 8.386575460789368e-06, "loss": 0.859, "step": 310400 }, { "epoch": 4.33, "learning_rate": 8.379609635130052e-06, "loss": 0.882, "step": 310500 }, { "epoch": 4.33, "learning_rate": 8.372643809470737e-06, "loss": 0.9013, "step": 310600 }, { "epoch": 4.33, "learning_rate": 8.36567798381142e-06, "loss": 0.8792, "step": 310700 }, { "epoch": 4.33, "learning_rate": 8.358712158152105e-06, "loss": 0.8754, "step": 310800 }, { "epoch": 4.33, "learning_rate": 8.35174633249279e-06, "loss": 0.8944, "step": 310900 }, { "epoch": 4.33, "learning_rate": 8.344780506833474e-06, "loss": 0.9278, "step": 311000 }, { "epoch": 4.33, "learning_rate": 8.33781468117416e-06, "loss": 0.9196, "step": 311100 }, { "epoch": 4.34, "learning_rate": 8.330848855514845e-06, "loss": 0.9093, "step": 311200 }, { "epoch": 4.34, "learning_rate": 8.32388302985553e-06, "loss": 0.8672, "step": 311300 }, { "epoch": 4.34, "learning_rate": 8.316917204196214e-06, "loss": 0.889, "step": 311400 }, { "epoch": 4.34, "learning_rate": 8.309951378536899e-06, "loss": 0.9111, "step": 311500 }, { "epoch": 4.34, "learning_rate": 8.302985552877584e-06, "loss": 0.9003, "step": 311600 }, { "epoch": 4.34, "learning_rate": 8.296019727218268e-06, "loss": 0.9248, "step": 311700 }, { "epoch": 4.34, "learning_rate": 8.289053901558953e-06, "loss": 0.9375, "step": 311800 }, { "epoch": 4.35, "learning_rate": 8.282088075899636e-06, "loss": 0.8977, "step": 311900 }, { "epoch": 4.35, "learning_rate": 8.27512225024032e-06, "loss": 0.9032, "step": 312000 }, { "epoch": 4.35, "learning_rate": 8.268156424581006e-06, "loss": 0.8761, "step": 312100 }, { "epoch": 4.35, "learning_rate": 8.26119059892169e-06, "loss": 0.9121, "step": 312200 }, { "epoch": 4.35, "learning_rate": 8.254224773262375e-06, "loss": 0.9079, "step": 312300 }, { "epoch": 4.35, "learning_rate": 8.24725894760306e-06, "loss": 0.8987, "step": 312400 }, { "epoch": 4.35, "learning_rate": 8.240293121943744e-06, "loss": 0.8772, "step": 312500 }, { "epoch": 4.36, "learning_rate": 8.233327296284429e-06, "loss": 0.8698, "step": 312600 }, { "epoch": 4.36, "learning_rate": 8.226361470625114e-06, "loss": 0.8869, "step": 312700 }, { "epoch": 4.36, "learning_rate": 8.219395644965798e-06, "loss": 0.8906, "step": 312800 }, { "epoch": 4.36, "learning_rate": 8.212429819306483e-06, "loss": 0.9012, "step": 312900 }, { "epoch": 4.36, "learning_rate": 8.205463993647166e-06, "loss": 0.9043, "step": 313000 }, { "epoch": 4.36, "learning_rate": 8.19849816798785e-06, "loss": 0.8731, "step": 313100 }, { "epoch": 4.36, "learning_rate": 8.191532342328535e-06, "loss": 0.8759, "step": 313200 }, { "epoch": 4.36, "learning_rate": 8.18456651666922e-06, "loss": 0.8886, "step": 313300 }, { "epoch": 4.37, "learning_rate": 8.177600691009905e-06, "loss": 0.9356, "step": 313400 }, { "epoch": 4.37, "learning_rate": 8.17063486535059e-06, "loss": 0.8879, "step": 313500 }, { "epoch": 4.37, "learning_rate": 8.163669039691276e-06, "loss": 0.8747, "step": 313600 }, { "epoch": 4.37, "learning_rate": 8.15670321403196e-06, "loss": 0.8869, "step": 313700 }, { "epoch": 4.37, "learning_rate": 8.149737388372645e-06, "loss": 0.8997, "step": 313800 }, { "epoch": 4.37, "learning_rate": 8.14277156271333e-06, "loss": 0.8978, "step": 313900 }, { "epoch": 4.37, "learning_rate": 8.135805737054014e-06, "loss": 0.8883, "step": 314000 }, { "epoch": 4.38, "learning_rate": 8.128909569651291e-06, "loss": 0.8676, "step": 314100 }, { "epoch": 4.38, "learning_rate": 8.121943743991976e-06, "loss": 0.9107, "step": 314200 }, { "epoch": 4.38, "learning_rate": 8.11497791833266e-06, "loss": 0.8825, "step": 314300 }, { "epoch": 4.38, "learning_rate": 8.108012092673345e-06, "loss": 0.9159, "step": 314400 }, { "epoch": 4.38, "learning_rate": 8.10104626701403e-06, "loss": 0.8902, "step": 314500 }, { "epoch": 4.38, "learning_rate": 8.094080441354714e-06, "loss": 0.913, "step": 314600 }, { "epoch": 4.38, "learning_rate": 8.087114615695399e-06, "loss": 0.8953, "step": 314700 }, { "epoch": 4.39, "learning_rate": 8.080148790036084e-06, "loss": 0.8857, "step": 314800 }, { "epoch": 4.39, "learning_rate": 8.073182964376768e-06, "loss": 0.8929, "step": 314900 }, { "epoch": 4.39, "learning_rate": 8.066217138717453e-06, "loss": 0.9126, "step": 315000 }, { "epoch": 4.39, "learning_rate": 8.059251313058138e-06, "loss": 0.8801, "step": 315100 }, { "epoch": 4.39, "learning_rate": 8.052285487398822e-06, "loss": 0.8607, "step": 315200 }, { "epoch": 4.39, "learning_rate": 8.045319661739507e-06, "loss": 0.934, "step": 315300 }, { "epoch": 4.39, "learning_rate": 8.03835383608019e-06, "loss": 0.9185, "step": 315400 }, { "epoch": 4.4, "learning_rate": 8.031388010420875e-06, "loss": 0.8689, "step": 315500 }, { "epoch": 4.4, "learning_rate": 8.02442218476156e-06, "loss": 0.8656, "step": 315600 }, { "epoch": 4.4, "learning_rate": 8.017456359102244e-06, "loss": 0.9096, "step": 315700 }, { "epoch": 4.4, "learning_rate": 8.010490533442929e-06, "loss": 0.8914, "step": 315800 }, { "epoch": 4.4, "learning_rate": 8.003524707783614e-06, "loss": 0.9163, "step": 315900 }, { "epoch": 4.4, "learning_rate": 7.996558882124298e-06, "loss": 0.9187, "step": 316000 }, { "epoch": 4.4, "learning_rate": 7.989593056464983e-06, "loss": 0.8844, "step": 316100 }, { "epoch": 4.41, "learning_rate": 7.982696889062261e-06, "loss": 0.8941, "step": 316200 }, { "epoch": 4.41, "learning_rate": 7.975731063402946e-06, "loss": 0.9091, "step": 316300 }, { "epoch": 4.41, "learning_rate": 7.96876523774363e-06, "loss": 0.9058, "step": 316400 }, { "epoch": 4.41, "learning_rate": 7.961799412084315e-06, "loss": 0.902, "step": 316500 }, { "epoch": 4.41, "learning_rate": 7.954833586425e-06, "loss": 0.9069, "step": 316600 }, { "epoch": 4.41, "learning_rate": 7.947867760765683e-06, "loss": 0.9132, "step": 316700 }, { "epoch": 4.41, "learning_rate": 7.940901935106367e-06, "loss": 0.9092, "step": 316800 }, { "epoch": 4.41, "learning_rate": 7.933936109447052e-06, "loss": 0.8787, "step": 316900 }, { "epoch": 4.42, "learning_rate": 7.926970283787737e-06, "loss": 0.8912, "step": 317000 }, { "epoch": 4.42, "learning_rate": 7.920004458128421e-06, "loss": 0.8981, "step": 317100 }, { "epoch": 4.42, "learning_rate": 7.913038632469106e-06, "loss": 0.8828, "step": 317200 }, { "epoch": 4.42, "learning_rate": 7.90607280680979e-06, "loss": 0.8916, "step": 317300 }, { "epoch": 4.42, "learning_rate": 7.899106981150476e-06, "loss": 0.8966, "step": 317400 }, { "epoch": 4.42, "learning_rate": 7.89214115549116e-06, "loss": 0.8944, "step": 317500 }, { "epoch": 4.42, "learning_rate": 7.885175329831847e-06, "loss": 0.9162, "step": 317600 }, { "epoch": 4.43, "learning_rate": 7.878209504172531e-06, "loss": 0.8874, "step": 317700 }, { "epoch": 4.43, "learning_rate": 7.871243678513214e-06, "loss": 0.8966, "step": 317800 }, { "epoch": 4.43, "learning_rate": 7.864277852853899e-06, "loss": 0.9229, "step": 317900 }, { "epoch": 4.43, "learning_rate": 7.857312027194584e-06, "loss": 0.9151, "step": 318000 }, { "epoch": 4.43, "learning_rate": 7.850346201535268e-06, "loss": 0.9226, "step": 318100 }, { "epoch": 4.43, "learning_rate": 7.843380375875953e-06, "loss": 0.9051, "step": 318200 }, { "epoch": 4.43, "learning_rate": 7.836414550216638e-06, "loss": 0.9119, "step": 318300 }, { "epoch": 4.44, "learning_rate": 7.829448724557322e-06, "loss": 0.8696, "step": 318400 }, { "epoch": 4.44, "learning_rate": 7.822482898898007e-06, "loss": 0.877, "step": 318500 }, { "epoch": 4.44, "learning_rate": 7.815517073238692e-06, "loss": 0.9048, "step": 318600 }, { "epoch": 4.44, "learning_rate": 7.808551247579376e-06, "loss": 0.8981, "step": 318700 }, { "epoch": 4.44, "learning_rate": 7.801585421920061e-06, "loss": 0.8949, "step": 318800 }, { "epoch": 4.44, "learning_rate": 7.794619596260744e-06, "loss": 0.8869, "step": 318900 }, { "epoch": 4.44, "learning_rate": 7.787653770601429e-06, "loss": 0.9065, "step": 319000 }, { "epoch": 4.45, "learning_rate": 7.780687944942113e-06, "loss": 0.8938, "step": 319100 }, { "epoch": 4.45, "learning_rate": 7.773722119282798e-06, "loss": 0.9054, "step": 319200 }, { "epoch": 4.45, "learning_rate": 7.766756293623483e-06, "loss": 0.8662, "step": 319300 }, { "epoch": 4.45, "learning_rate": 7.759790467964167e-06, "loss": 0.8957, "step": 319400 }, { "epoch": 4.45, "learning_rate": 7.752824642304852e-06, "loss": 0.8956, "step": 319500 }, { "epoch": 4.45, "learning_rate": 7.745858816645537e-06, "loss": 0.8782, "step": 319600 }, { "epoch": 4.45, "learning_rate": 7.738892990986222e-06, "loss": 0.8943, "step": 319700 }, { "epoch": 4.46, "learning_rate": 7.731927165326906e-06, "loss": 0.8972, "step": 319800 }, { "epoch": 4.46, "learning_rate": 7.724961339667591e-06, "loss": 0.9111, "step": 319900 }, { "epoch": 4.46, "learning_rate": 7.717995514008276e-06, "loss": 0.908, "step": 320000 }, { "epoch": 4.46, "learning_rate": 7.71102968834896e-06, "loss": 0.9101, "step": 320100 }, { "epoch": 4.46, "learning_rate": 7.704133520946237e-06, "loss": 0.8888, "step": 320200 }, { "epoch": 4.46, "learning_rate": 7.697167695286921e-06, "loss": 0.9402, "step": 320300 }, { "epoch": 4.46, "learning_rate": 7.690201869627606e-06, "loss": 0.9185, "step": 320400 }, { "epoch": 4.47, "learning_rate": 7.68323604396829e-06, "loss": 0.9445, "step": 320500 }, { "epoch": 4.47, "learning_rate": 7.676339876565569e-06, "loss": 0.862, "step": 320600 }, { "epoch": 4.47, "learning_rate": 7.669374050906254e-06, "loss": 0.8918, "step": 320700 }, { "epoch": 4.47, "learning_rate": 7.662408225246938e-06, "loss": 0.888, "step": 320800 }, { "epoch": 4.47, "learning_rate": 7.655442399587623e-06, "loss": 0.8846, "step": 320900 }, { "epoch": 4.47, "learning_rate": 7.648476573928308e-06, "loss": 0.8908, "step": 321000 }, { "epoch": 4.47, "learning_rate": 7.641510748268992e-06, "loss": 0.8922, "step": 321100 }, { "epoch": 4.47, "learning_rate": 7.634544922609677e-06, "loss": 0.8926, "step": 321200 }, { "epoch": 4.48, "learning_rate": 7.6275790969503625e-06, "loss": 0.8947, "step": 321300 }, { "epoch": 4.48, "learning_rate": 7.620613271291047e-06, "loss": 0.892, "step": 321400 }, { "epoch": 4.48, "learning_rate": 7.61364744563173e-06, "loss": 0.8965, "step": 321500 }, { "epoch": 4.48, "learning_rate": 7.606681619972415e-06, "loss": 0.9022, "step": 321600 }, { "epoch": 4.48, "learning_rate": 7.5997157943130996e-06, "loss": 0.8852, "step": 321700 }, { "epoch": 4.48, "learning_rate": 7.592749968653784e-06, "loss": 0.9176, "step": 321800 }, { "epoch": 4.48, "learning_rate": 7.585784142994469e-06, "loss": 0.8962, "step": 321900 }, { "epoch": 4.49, "learning_rate": 7.578818317335154e-06, "loss": 0.9282, "step": 322000 }, { "epoch": 4.49, "learning_rate": 7.571852491675838e-06, "loss": 0.9027, "step": 322100 }, { "epoch": 4.49, "learning_rate": 7.564886666016523e-06, "loss": 0.9116, "step": 322200 }, { "epoch": 4.49, "learning_rate": 7.5579208403572085e-06, "loss": 0.8951, "step": 322300 }, { "epoch": 4.49, "learning_rate": 7.550955014697893e-06, "loss": 0.9081, "step": 322400 }, { "epoch": 4.49, "learning_rate": 7.543989189038578e-06, "loss": 0.9026, "step": 322500 }, { "epoch": 4.49, "learning_rate": 7.5370233633792625e-06, "loss": 0.8898, "step": 322600 }, { "epoch": 4.5, "learning_rate": 7.53012719597654e-06, "loss": 0.8865, "step": 322700 }, { "epoch": 4.5, "learning_rate": 7.523161370317223e-06, "loss": 0.9049, "step": 322800 }, { "epoch": 4.5, "learning_rate": 7.5161955446579075e-06, "loss": 0.9006, "step": 322900 }, { "epoch": 4.5, "learning_rate": 7.509229718998592e-06, "loss": 0.8915, "step": 323000 }, { "epoch": 4.5, "learning_rate": 7.502263893339278e-06, "loss": 0.8986, "step": 323100 }, { "epoch": 4.5, "learning_rate": 7.495298067679962e-06, "loss": 0.8966, "step": 323200 }, { "epoch": 4.5, "learning_rate": 7.488332242020647e-06, "loss": 0.8789, "step": 323300 }, { "epoch": 4.51, "learning_rate": 7.481366416361332e-06, "loss": 0.9292, "step": 323400 }, { "epoch": 4.51, "learning_rate": 7.4744005907020164e-06, "loss": 0.9006, "step": 323500 }, { "epoch": 4.51, "learning_rate": 7.467434765042701e-06, "loss": 0.9103, "step": 323600 }, { "epoch": 4.51, "learning_rate": 7.460468939383385e-06, "loss": 0.902, "step": 323700 }, { "epoch": 4.51, "learning_rate": 7.45350311372407e-06, "loss": 0.904, "step": 323800 }, { "epoch": 4.51, "learning_rate": 7.446537288064754e-06, "loss": 0.897, "step": 323900 }, { "epoch": 4.51, "learning_rate": 7.439571462405439e-06, "loss": 0.9162, "step": 324000 }, { "epoch": 4.52, "learning_rate": 7.432605636746124e-06, "loss": 0.9034, "step": 324100 }, { "epoch": 4.52, "learning_rate": 7.4256398110868075e-06, "loss": 0.8902, "step": 324200 }, { "epoch": 4.52, "learning_rate": 7.418673985427493e-06, "loss": 0.9144, "step": 324300 }, { "epoch": 4.52, "learning_rate": 7.411708159768178e-06, "loss": 0.8973, "step": 324400 }, { "epoch": 4.52, "learning_rate": 7.4047423341088624e-06, "loss": 0.8659, "step": 324500 }, { "epoch": 4.52, "learning_rate": 7.397776508449547e-06, "loss": 0.9041, "step": 324600 }, { "epoch": 4.52, "learning_rate": 7.390810682790232e-06, "loss": 0.9243, "step": 324700 }, { "epoch": 4.53, "learning_rate": 7.383844857130916e-06, "loss": 0.9291, "step": 324800 }, { "epoch": 4.53, "learning_rate": 7.376948689728194e-06, "loss": 0.9036, "step": 324900 }, { "epoch": 4.53, "learning_rate": 7.369982864068878e-06, "loss": 0.9243, "step": 325000 }, { "epoch": 4.53, "learning_rate": 7.363017038409562e-06, "loss": 0.8956, "step": 325100 }, { "epoch": 4.53, "learning_rate": 7.356051212750248e-06, "loss": 0.8965, "step": 325200 }, { "epoch": 4.53, "learning_rate": 7.3490853870909325e-06, "loss": 0.8957, "step": 325300 }, { "epoch": 4.53, "learning_rate": 7.342119561431617e-06, "loss": 0.9157, "step": 325400 }, { "epoch": 4.53, "learning_rate": 7.335153735772301e-06, "loss": 0.9129, "step": 325500 }, { "epoch": 4.54, "learning_rate": 7.328187910112986e-06, "loss": 0.862, "step": 325600 }, { "epoch": 4.54, "learning_rate": 7.32122208445367e-06, "loss": 0.9181, "step": 325700 }, { "epoch": 4.54, "learning_rate": 7.314256258794355e-06, "loss": 0.9186, "step": 325800 }, { "epoch": 4.54, "learning_rate": 7.30729043313504e-06, "loss": 0.9071, "step": 325900 }, { "epoch": 4.54, "learning_rate": 7.3003246074757244e-06, "loss": 0.9064, "step": 326000 }, { "epoch": 4.54, "learning_rate": 7.293358781816408e-06, "loss": 0.8919, "step": 326100 }, { "epoch": 4.54, "learning_rate": 7.286392956157093e-06, "loss": 0.8977, "step": 326200 }, { "epoch": 4.55, "learning_rate": 7.279427130497778e-06, "loss": 0.8911, "step": 326300 }, { "epoch": 4.55, "learning_rate": 7.272461304838463e-06, "loss": 0.9122, "step": 326400 }, { "epoch": 4.55, "learning_rate": 7.265495479179148e-06, "loss": 0.8897, "step": 326500 }, { "epoch": 4.55, "learning_rate": 7.2585296535198325e-06, "loss": 0.8913, "step": 326600 }, { "epoch": 4.55, "learning_rate": 7.251563827860516e-06, "loss": 0.9271, "step": 326700 }, { "epoch": 4.55, "learning_rate": 7.244598002201201e-06, "loss": 0.8734, "step": 326800 }, { "epoch": 4.55, "learning_rate": 7.237632176541886e-06, "loss": 0.9086, "step": 326900 }, { "epoch": 4.56, "learning_rate": 7.230736009139163e-06, "loss": 0.8985, "step": 327000 }, { "epoch": 4.56, "learning_rate": 7.223770183479848e-06, "loss": 0.8944, "step": 327100 }, { "epoch": 4.56, "learning_rate": 7.216804357820533e-06, "loss": 0.8879, "step": 327200 }, { "epoch": 4.56, "learning_rate": 7.209838532161218e-06, "loss": 0.8855, "step": 327300 }, { "epoch": 4.56, "learning_rate": 7.202872706501902e-06, "loss": 0.9174, "step": 327400 }, { "epoch": 4.56, "learning_rate": 7.195906880842586e-06, "loss": 0.9044, "step": 327500 }, { "epoch": 4.56, "learning_rate": 7.188941055183271e-06, "loss": 0.9211, "step": 327600 }, { "epoch": 4.57, "learning_rate": 7.181975229523956e-06, "loss": 0.8787, "step": 327700 }, { "epoch": 4.57, "learning_rate": 7.1750094038646405e-06, "loss": 0.9017, "step": 327800 }, { "epoch": 4.57, "learning_rate": 7.168043578205325e-06, "loss": 0.8862, "step": 327900 }, { "epoch": 4.57, "learning_rate": 7.161077752546009e-06, "loss": 0.8903, "step": 328000 }, { "epoch": 4.57, "learning_rate": 7.154111926886694e-06, "loss": 0.8577, "step": 328100 }, { "epoch": 4.57, "learning_rate": 7.147146101227378e-06, "loss": 0.889, "step": 328200 }, { "epoch": 4.57, "learning_rate": 7.140180275568063e-06, "loss": 0.8597, "step": 328300 }, { "epoch": 4.58, "learning_rate": 7.1332144499087486e-06, "loss": 0.8797, "step": 328400 }, { "epoch": 4.58, "learning_rate": 7.126248624249432e-06, "loss": 0.8777, "step": 328500 }, { "epoch": 4.58, "learning_rate": 7.119282798590117e-06, "loss": 0.8944, "step": 328600 }, { "epoch": 4.58, "learning_rate": 7.112316972930802e-06, "loss": 0.9213, "step": 328700 }, { "epoch": 4.58, "learning_rate": 7.1053511472714865e-06, "loss": 0.9316, "step": 328800 }, { "epoch": 4.58, "learning_rate": 7.098385321612171e-06, "loss": 0.9004, "step": 328900 }, { "epoch": 4.58, "learning_rate": 7.091419495952856e-06, "loss": 0.9317, "step": 329000 }, { "epoch": 4.58, "learning_rate": 7.08445367029354e-06, "loss": 0.8835, "step": 329100 }, { "epoch": 4.59, "learning_rate": 7.077487844634224e-06, "loss": 0.9031, "step": 329200 }, { "epoch": 4.59, "learning_rate": 7.070522018974909e-06, "loss": 0.8982, "step": 329300 }, { "epoch": 4.59, "learning_rate": 7.063556193315594e-06, "loss": 0.9028, "step": 329400 }, { "epoch": 4.59, "learning_rate": 7.056590367656278e-06, "loss": 0.8781, "step": 329500 }, { "epoch": 4.59, "learning_rate": 7.049624541996963e-06, "loss": 0.9082, "step": 329600 }, { "epoch": 4.59, "learning_rate": 7.042658716337648e-06, "loss": 0.9128, "step": 329700 }, { "epoch": 4.59, "learning_rate": 7.0356928906783324e-06, "loss": 0.9191, "step": 329800 }, { "epoch": 4.6, "learning_rate": 7.028727065019017e-06, "loss": 0.8662, "step": 329900 }, { "epoch": 4.6, "learning_rate": 7.021761239359702e-06, "loss": 0.9145, "step": 330000 }, { "epoch": 4.6, "learning_rate": 7.0147954137003865e-06, "loss": 0.9019, "step": 330100 }, { "epoch": 4.6, "learning_rate": 7.00782958804107e-06, "loss": 0.9101, "step": 330200 }, { "epoch": 4.6, "learning_rate": 7.000863762381755e-06, "loss": 0.9263, "step": 330300 }, { "epoch": 4.6, "learning_rate": 6.99389793672244e-06, "loss": 0.9352, "step": 330400 }, { "epoch": 4.6, "learning_rate": 6.986932111063124e-06, "loss": 0.9041, "step": 330500 }, { "epoch": 4.61, "learning_rate": 6.979966285403809e-06, "loss": 0.9118, "step": 330600 }, { "epoch": 4.61, "learning_rate": 6.973000459744494e-06, "loss": 0.8861, "step": 330700 }, { "epoch": 4.61, "learning_rate": 6.966034634085178e-06, "loss": 0.9053, "step": 330800 }, { "epoch": 4.61, "learning_rate": 6.959068808425863e-06, "loss": 0.8984, "step": 330900 }, { "epoch": 4.61, "learning_rate": 6.952102982766548e-06, "loss": 0.8753, "step": 331000 }, { "epoch": 4.61, "learning_rate": 6.945206815363825e-06, "loss": 0.8981, "step": 331100 }, { "epoch": 4.61, "learning_rate": 6.93824098970451e-06, "loss": 0.9, "step": 331200 }, { "epoch": 4.62, "learning_rate": 6.9312751640451944e-06, "loss": 0.887, "step": 331300 }, { "epoch": 4.62, "learning_rate": 6.924309338385879e-06, "loss": 0.9234, "step": 331400 }, { "epoch": 4.62, "learning_rate": 6.917343512726563e-06, "loss": 0.9033, "step": 331500 }, { "epoch": 4.62, "learning_rate": 6.910377687067248e-06, "loss": 0.8558, "step": 331600 }, { "epoch": 4.62, "learning_rate": 6.903411861407933e-06, "loss": 0.8931, "step": 331700 }, { "epoch": 4.62, "learning_rate": 6.896446035748618e-06, "loss": 0.9008, "step": 331800 }, { "epoch": 4.62, "learning_rate": 6.8894802100893025e-06, "loss": 0.8831, "step": 331900 }, { "epoch": 4.63, "learning_rate": 6.882514384429987e-06, "loss": 0.8829, "step": 332000 }, { "epoch": 4.63, "learning_rate": 6.875548558770671e-06, "loss": 0.9051, "step": 332100 }, { "epoch": 4.63, "learning_rate": 6.868582733111356e-06, "loss": 0.905, "step": 332200 }, { "epoch": 4.63, "learning_rate": 6.86161690745204e-06, "loss": 0.9167, "step": 332300 }, { "epoch": 4.63, "learning_rate": 6.854651081792725e-06, "loss": 0.9018, "step": 332400 }, { "epoch": 4.63, "learning_rate": 6.84768525613341e-06, "loss": 0.8984, "step": 332500 }, { "epoch": 4.63, "learning_rate": 6.840719430474094e-06, "loss": 0.9024, "step": 332600 }, { "epoch": 4.64, "learning_rate": 6.833753604814778e-06, "loss": 0.89, "step": 332700 }, { "epoch": 4.64, "learning_rate": 6.826787779155463e-06, "loss": 0.9009, "step": 332800 }, { "epoch": 4.64, "learning_rate": 6.8198219534961485e-06, "loss": 0.9188, "step": 332900 }, { "epoch": 4.64, "learning_rate": 6.812856127836833e-06, "loss": 0.9017, "step": 333000 }, { "epoch": 4.64, "learning_rate": 6.8059599604341105e-06, "loss": 0.9005, "step": 333100 }, { "epoch": 4.64, "learning_rate": 6.798994134774795e-06, "loss": 0.9046, "step": 333200 }, { "epoch": 4.64, "learning_rate": 6.79202830911548e-06, "loss": 0.9067, "step": 333300 }, { "epoch": 4.64, "learning_rate": 6.785062483456164e-06, "loss": 0.9117, "step": 333400 }, { "epoch": 4.65, "learning_rate": 6.778096657796848e-06, "loss": 0.9139, "step": 333500 }, { "epoch": 4.65, "learning_rate": 6.771130832137533e-06, "loss": 0.9028, "step": 333600 }, { "epoch": 4.65, "learning_rate": 6.764165006478219e-06, "loss": 0.8987, "step": 333700 }, { "epoch": 4.65, "learning_rate": 6.757268839075496e-06, "loss": 0.8911, "step": 333800 }, { "epoch": 4.65, "learning_rate": 6.7503030134161805e-06, "loss": 0.8636, "step": 333900 }, { "epoch": 4.65, "learning_rate": 6.743337187756865e-06, "loss": 0.902, "step": 334000 }, { "epoch": 4.65, "learning_rate": 6.736371362097549e-06, "loss": 0.8968, "step": 334100 }, { "epoch": 4.66, "learning_rate": 6.729405536438234e-06, "loss": 0.8989, "step": 334200 }, { "epoch": 4.66, "learning_rate": 6.7224397107789184e-06, "loss": 0.887, "step": 334300 }, { "epoch": 4.66, "learning_rate": 6.715473885119603e-06, "loss": 0.886, "step": 334400 }, { "epoch": 4.66, "learning_rate": 6.708508059460289e-06, "loss": 0.9102, "step": 334500 }, { "epoch": 4.66, "learning_rate": 6.701542233800973e-06, "loss": 0.895, "step": 334600 }, { "epoch": 4.66, "learning_rate": 6.694576408141657e-06, "loss": 0.8741, "step": 334700 }, { "epoch": 4.66, "learning_rate": 6.687610582482342e-06, "loss": 0.9149, "step": 334800 }, { "epoch": 4.67, "learning_rate": 6.6806447568230265e-06, "loss": 0.8864, "step": 334900 }, { "epoch": 4.67, "learning_rate": 6.673678931163711e-06, "loss": 0.895, "step": 335000 }, { "epoch": 4.67, "learning_rate": 6.666713105504396e-06, "loss": 0.902, "step": 335100 }, { "epoch": 4.67, "learning_rate": 6.65974727984508e-06, "loss": 0.8989, "step": 335200 }, { "epoch": 4.67, "learning_rate": 6.652781454185764e-06, "loss": 0.9056, "step": 335300 }, { "epoch": 4.67, "learning_rate": 6.645815628526449e-06, "loss": 0.8864, "step": 335400 }, { "epoch": 4.67, "learning_rate": 6.638849802867134e-06, "loss": 0.9214, "step": 335500 }, { "epoch": 4.68, "learning_rate": 6.6318839772078185e-06, "loss": 0.8958, "step": 335600 }, { "epoch": 4.68, "learning_rate": 6.624918151548503e-06, "loss": 0.9103, "step": 335700 }, { "epoch": 4.68, "learning_rate": 6.617952325889188e-06, "loss": 0.8671, "step": 335800 }, { "epoch": 4.68, "learning_rate": 6.6109865002298725e-06, "loss": 0.9166, "step": 335900 }, { "epoch": 4.68, "learning_rate": 6.604020674570557e-06, "loss": 0.8945, "step": 336000 }, { "epoch": 4.68, "learning_rate": 6.597054848911242e-06, "loss": 0.923, "step": 336100 }, { "epoch": 4.68, "learning_rate": 6.5900890232519266e-06, "loss": 0.9177, "step": 336200 }, { "epoch": 4.69, "learning_rate": 6.583123197592611e-06, "loss": 0.905, "step": 336300 }, { "epoch": 4.69, "learning_rate": 6.576157371933295e-06, "loss": 0.9267, "step": 336400 }, { "epoch": 4.69, "learning_rate": 6.56919154627398e-06, "loss": 0.8953, "step": 336500 }, { "epoch": 4.69, "learning_rate": 6.5622257206146644e-06, "loss": 0.8962, "step": 336600 }, { "epoch": 4.69, "learning_rate": 6.555259894955349e-06, "loss": 0.9365, "step": 336700 }, { "epoch": 4.69, "learning_rate": 6.548294069296034e-06, "loss": 0.8649, "step": 336800 }, { "epoch": 4.69, "learning_rate": 6.541328243636718e-06, "loss": 0.8944, "step": 336900 }, { "epoch": 4.69, "learning_rate": 6.534362417977403e-06, "loss": 0.9301, "step": 337000 }, { "epoch": 4.7, "learning_rate": 6.527396592318088e-06, "loss": 0.8917, "step": 337100 }, { "epoch": 4.7, "learning_rate": 6.520500424915365e-06, "loss": 0.8989, "step": 337200 }, { "epoch": 4.7, "learning_rate": 6.51353459925605e-06, "loss": 0.8881, "step": 337300 }, { "epoch": 4.7, "learning_rate": 6.5065687735967345e-06, "loss": 0.9066, "step": 337400 }, { "epoch": 4.7, "learning_rate": 6.499602947937419e-06, "loss": 0.8719, "step": 337500 }, { "epoch": 4.7, "learning_rate": 6.492637122278104e-06, "loss": 0.9109, "step": 337600 }, { "epoch": 4.7, "learning_rate": 6.485671296618788e-06, "loss": 0.8819, "step": 337700 }, { "epoch": 4.71, "learning_rate": 6.478705470959473e-06, "loss": 0.9111, "step": 337800 }, { "epoch": 4.71, "learning_rate": 6.471739645300158e-06, "loss": 0.9033, "step": 337900 }, { "epoch": 4.71, "learning_rate": 6.464773819640843e-06, "loss": 0.9152, "step": 338000 }, { "epoch": 4.71, "learning_rate": 6.457807993981527e-06, "loss": 0.8903, "step": 338100 }, { "epoch": 4.71, "learning_rate": 6.450842168322211e-06, "loss": 0.8815, "step": 338200 }, { "epoch": 4.71, "learning_rate": 6.443876342662896e-06, "loss": 0.9236, "step": 338300 }, { "epoch": 4.71, "learning_rate": 6.4369105170035805e-06, "loss": 0.899, "step": 338400 }, { "epoch": 4.72, "learning_rate": 6.429944691344265e-06, "loss": 0.8957, "step": 338500 }, { "epoch": 4.72, "learning_rate": 6.42297886568495e-06, "loss": 0.8671, "step": 338600 }, { "epoch": 4.72, "learning_rate": 6.4160130400256345e-06, "loss": 0.9103, "step": 338700 }, { "epoch": 4.72, "learning_rate": 6.409047214366318e-06, "loss": 0.8624, "step": 338800 }, { "epoch": 4.72, "learning_rate": 6.402081388707003e-06, "loss": 0.8942, "step": 338900 }, { "epoch": 4.72, "learning_rate": 6.395115563047689e-06, "loss": 0.9036, "step": 339000 }, { "epoch": 4.72, "learning_rate": 6.388149737388373e-06, "loss": 0.8815, "step": 339100 }, { "epoch": 4.73, "learning_rate": 6.381183911729058e-06, "loss": 0.9228, "step": 339200 }, { "epoch": 4.73, "learning_rate": 6.374218086069742e-06, "loss": 0.8905, "step": 339300 }, { "epoch": 4.73, "learning_rate": 6.3672522604104265e-06, "loss": 0.8905, "step": 339400 }, { "epoch": 4.73, "learning_rate": 6.360356093007704e-06, "loss": 0.871, "step": 339500 }, { "epoch": 4.73, "learning_rate": 6.3533902673483884e-06, "loss": 0.8743, "step": 339600 }, { "epoch": 4.73, "learning_rate": 6.346424441689073e-06, "loss": 0.8953, "step": 339700 }, { "epoch": 4.73, "learning_rate": 6.339458616029759e-06, "loss": 0.9128, "step": 339800 }, { "epoch": 4.74, "learning_rate": 6.332492790370443e-06, "loss": 0.9017, "step": 339900 }, { "epoch": 4.74, "learning_rate": 6.325526964711128e-06, "loss": 0.8974, "step": 340000 }, { "epoch": 4.74, "learning_rate": 6.318561139051812e-06, "loss": 0.9075, "step": 340100 }, { "epoch": 4.74, "learning_rate": 6.3115953133924965e-06, "loss": 0.9063, "step": 340200 }, { "epoch": 4.74, "learning_rate": 6.304629487733181e-06, "loss": 0.9167, "step": 340300 }, { "epoch": 4.74, "learning_rate": 6.297663662073866e-06, "loss": 0.873, "step": 340400 }, { "epoch": 4.74, "learning_rate": 6.290697836414551e-06, "loss": 0.8829, "step": 340500 }, { "epoch": 4.75, "learning_rate": 6.283732010755234e-06, "loss": 0.8914, "step": 340600 }, { "epoch": 4.75, "learning_rate": 6.276766185095919e-06, "loss": 0.9371, "step": 340700 }, { "epoch": 4.75, "learning_rate": 6.269800359436604e-06, "loss": 0.8828, "step": 340800 }, { "epoch": 4.75, "learning_rate": 6.2628345337772885e-06, "loss": 0.895, "step": 340900 }, { "epoch": 4.75, "learning_rate": 6.255868708117973e-06, "loss": 0.8914, "step": 341000 }, { "epoch": 4.75, "learning_rate": 6.248972540715251e-06, "loss": 0.9313, "step": 341100 }, { "epoch": 4.75, "learning_rate": 6.242006715055936e-06, "loss": 0.879, "step": 341200 }, { "epoch": 4.75, "learning_rate": 6.235040889396621e-06, "loss": 0.8908, "step": 341300 }, { "epoch": 4.76, "learning_rate": 6.2280750637373045e-06, "loss": 0.9114, "step": 341400 }, { "epoch": 4.76, "learning_rate": 6.221109238077989e-06, "loss": 0.9086, "step": 341500 }, { "epoch": 4.76, "learning_rate": 6.214143412418674e-06, "loss": 0.9252, "step": 341600 }, { "epoch": 4.76, "learning_rate": 6.2071775867593585e-06, "loss": 0.9261, "step": 341700 }, { "epoch": 4.76, "learning_rate": 6.200211761100043e-06, "loss": 0.8984, "step": 341800 }, { "epoch": 4.76, "learning_rate": 6.193245935440728e-06, "loss": 0.8705, "step": 341900 }, { "epoch": 4.76, "learning_rate": 6.186280109781413e-06, "loss": 0.8985, "step": 342000 }, { "epoch": 4.77, "learning_rate": 6.179314284122097e-06, "loss": 0.8767, "step": 342100 }, { "epoch": 4.77, "learning_rate": 6.172348458462782e-06, "loss": 0.8924, "step": 342200 }, { "epoch": 4.77, "learning_rate": 6.165382632803467e-06, "loss": 0.9175, "step": 342300 }, { "epoch": 4.77, "learning_rate": 6.158416807144151e-06, "loss": 0.9246, "step": 342400 }, { "epoch": 4.77, "learning_rate": 6.151450981484835e-06, "loss": 0.8804, "step": 342500 }, { "epoch": 4.77, "learning_rate": 6.14448515582552e-06, "loss": 0.8869, "step": 342600 }, { "epoch": 4.77, "learning_rate": 6.1375193301662045e-06, "loss": 0.919, "step": 342700 }, { "epoch": 4.78, "learning_rate": 6.130553504506889e-06, "loss": 0.9046, "step": 342800 }, { "epoch": 4.78, "learning_rate": 6.123587678847574e-06, "loss": 0.8937, "step": 342900 }, { "epoch": 4.78, "learning_rate": 6.1166218531882586e-06, "loss": 0.8938, "step": 343000 }, { "epoch": 4.78, "learning_rate": 6.109656027528943e-06, "loss": 0.903, "step": 343100 }, { "epoch": 4.78, "learning_rate": 6.102690201869628e-06, "loss": 0.8774, "step": 343200 }, { "epoch": 4.78, "learning_rate": 6.095724376210313e-06, "loss": 0.8893, "step": 343300 }, { "epoch": 4.78, "learning_rate": 6.088758550550997e-06, "loss": 0.8893, "step": 343400 }, { "epoch": 4.79, "learning_rate": 6.081792724891682e-06, "loss": 0.8624, "step": 343500 }, { "epoch": 4.79, "learning_rate": 6.074826899232366e-06, "loss": 0.8782, "step": 343600 }, { "epoch": 4.79, "learning_rate": 6.0678610735730505e-06, "loss": 0.8608, "step": 343700 }, { "epoch": 4.79, "learning_rate": 6.060895247913735e-06, "loss": 0.8896, "step": 343800 }, { "epoch": 4.79, "learning_rate": 6.05392942225442e-06, "loss": 0.9133, "step": 343900 }, { "epoch": 4.79, "learning_rate": 6.0469635965951045e-06, "loss": 0.9189, "step": 344000 }, { "epoch": 4.79, "learning_rate": 6.039997770935789e-06, "loss": 0.908, "step": 344100 }, { "epoch": 4.8, "learning_rate": 6.033031945276473e-06, "loss": 0.902, "step": 344200 }, { "epoch": 4.8, "learning_rate": 6.026066119617159e-06, "loss": 0.8951, "step": 344300 }, { "epoch": 4.8, "learning_rate": 6.019100293957843e-06, "loss": 0.9014, "step": 344400 }, { "epoch": 4.8, "learning_rate": 6.012134468298528e-06, "loss": 0.8941, "step": 344500 }, { "epoch": 4.8, "learning_rate": 6.005168642639213e-06, "loss": 0.9028, "step": 344600 }, { "epoch": 4.8, "learning_rate": 5.9982028169798965e-06, "loss": 0.8792, "step": 344700 }, { "epoch": 4.8, "learning_rate": 5.991236991320581e-06, "loss": 0.9255, "step": 344800 }, { "epoch": 4.81, "learning_rate": 5.984271165661266e-06, "loss": 0.8769, "step": 344900 }, { "epoch": 4.81, "learning_rate": 5.9773053400019505e-06, "loss": 0.9103, "step": 345000 }, { "epoch": 4.81, "learning_rate": 5.970339514342635e-06, "loss": 0.924, "step": 345100 }, { "epoch": 4.81, "learning_rate": 5.96337368868332e-06, "loss": 0.8969, "step": 345200 }, { "epoch": 4.81, "learning_rate": 5.956407863024004e-06, "loss": 0.9029, "step": 345300 }, { "epoch": 4.81, "learning_rate": 5.949442037364688e-06, "loss": 0.9153, "step": 345400 }, { "epoch": 4.81, "learning_rate": 5.9425458699619665e-06, "loss": 0.8868, "step": 345500 }, { "epoch": 4.81, "learning_rate": 5.935580044302651e-06, "loss": 0.9184, "step": 345600 }, { "epoch": 4.82, "learning_rate": 5.928614218643336e-06, "loss": 0.8876, "step": 345700 }, { "epoch": 4.82, "learning_rate": 5.921648392984021e-06, "loss": 0.9123, "step": 345800 }, { "epoch": 4.82, "learning_rate": 5.914682567324705e-06, "loss": 0.8983, "step": 345900 }, { "epoch": 4.82, "learning_rate": 5.90771674166539e-06, "loss": 0.9041, "step": 346000 }, { "epoch": 4.82, "learning_rate": 5.900750916006074e-06, "loss": 0.9088, "step": 346100 }, { "epoch": 4.82, "learning_rate": 5.8937850903467585e-06, "loss": 0.8975, "step": 346200 }, { "epoch": 4.82, "learning_rate": 5.886819264687443e-06, "loss": 0.8626, "step": 346300 }, { "epoch": 4.83, "learning_rate": 5.879853439028129e-06, "loss": 0.9134, "step": 346400 }, { "epoch": 4.83, "learning_rate": 5.872887613368813e-06, "loss": 0.9149, "step": 346500 }, { "epoch": 4.83, "learning_rate": 5.865991445966091e-06, "loss": 0.9046, "step": 346600 }, { "epoch": 4.83, "learning_rate": 5.859025620306775e-06, "loss": 0.8771, "step": 346700 }, { "epoch": 4.83, "learning_rate": 5.852059794647459e-06, "loss": 0.9007, "step": 346800 }, { "epoch": 4.83, "learning_rate": 5.845093968988144e-06, "loss": 0.8749, "step": 346900 }, { "epoch": 4.83, "learning_rate": 5.8381281433288285e-06, "loss": 0.924, "step": 347000 }, { "epoch": 4.84, "learning_rate": 5.831162317669513e-06, "loss": 0.9206, "step": 347100 }, { "epoch": 4.84, "learning_rate": 5.824266150266791e-06, "loss": 0.8967, "step": 347200 }, { "epoch": 4.84, "learning_rate": 5.817300324607476e-06, "loss": 0.8749, "step": 347300 }, { "epoch": 4.84, "learning_rate": 5.810334498948161e-06, "loss": 0.9193, "step": 347400 }, { "epoch": 4.84, "learning_rate": 5.8033686732888446e-06, "loss": 0.8796, "step": 347500 }, { "epoch": 4.84, "learning_rate": 5.796402847629529e-06, "loss": 0.9018, "step": 347600 }, { "epoch": 4.84, "learning_rate": 5.789437021970214e-06, "loss": 0.8721, "step": 347700 }, { "epoch": 4.85, "learning_rate": 5.782471196310899e-06, "loss": 0.9011, "step": 347800 }, { "epoch": 4.85, "learning_rate": 5.775505370651583e-06, "loss": 0.9137, "step": 347900 }, { "epoch": 4.85, "learning_rate": 5.768539544992269e-06, "loss": 0.8995, "step": 348000 }, { "epoch": 4.85, "learning_rate": 5.761573719332953e-06, "loss": 0.9014, "step": 348100 }, { "epoch": 4.85, "learning_rate": 5.754607893673637e-06, "loss": 0.9109, "step": 348200 }, { "epoch": 4.85, "learning_rate": 5.747642068014322e-06, "loss": 0.8916, "step": 348300 }, { "epoch": 4.85, "learning_rate": 5.740676242355007e-06, "loss": 0.8899, "step": 348400 }, { "epoch": 4.86, "learning_rate": 5.733710416695691e-06, "loss": 0.8821, "step": 348500 }, { "epoch": 4.86, "learning_rate": 5.726744591036376e-06, "loss": 0.8959, "step": 348600 }, { "epoch": 4.86, "learning_rate": 5.71977876537706e-06, "loss": 0.8975, "step": 348700 }, { "epoch": 4.86, "learning_rate": 5.712812939717745e-06, "loss": 0.8898, "step": 348800 }, { "epoch": 4.86, "learning_rate": 5.705847114058429e-06, "loss": 0.8829, "step": 348900 }, { "epoch": 4.86, "learning_rate": 5.698881288399114e-06, "loss": 0.8892, "step": 349000 }, { "epoch": 4.86, "learning_rate": 5.691915462739799e-06, "loss": 0.8734, "step": 349100 }, { "epoch": 4.86, "learning_rate": 5.684949637080483e-06, "loss": 0.91, "step": 349200 }, { "epoch": 4.87, "learning_rate": 5.677983811421168e-06, "loss": 0.8991, "step": 349300 }, { "epoch": 4.87, "learning_rate": 5.671017985761853e-06, "loss": 0.9066, "step": 349400 }, { "epoch": 4.87, "learning_rate": 5.664052160102537e-06, "loss": 0.8815, "step": 349500 }, { "epoch": 4.87, "learning_rate": 5.657086334443222e-06, "loss": 0.9071, "step": 349600 }, { "epoch": 4.87, "learning_rate": 5.650120508783907e-06, "loss": 0.902, "step": 349700 }, { "epoch": 4.87, "learning_rate": 5.6431546831245906e-06, "loss": 0.9186, "step": 349800 }, { "epoch": 4.87, "learning_rate": 5.636188857465275e-06, "loss": 0.895, "step": 349900 }, { "epoch": 4.88, "learning_rate": 5.62922303180596e-06, "loss": 0.8841, "step": 350000 }, { "epoch": 4.88, "learning_rate": 5.622257206146645e-06, "loss": 0.8849, "step": 350100 }, { "epoch": 4.88, "learning_rate": 5.615291380487329e-06, "loss": 0.8815, "step": 350200 }, { "epoch": 4.88, "learning_rate": 5.608325554828013e-06, "loss": 0.8902, "step": 350300 }, { "epoch": 4.88, "learning_rate": 5.601359729168699e-06, "loss": 0.9048, "step": 350400 }, { "epoch": 4.88, "learning_rate": 5.594393903509383e-06, "loss": 0.9038, "step": 350500 }, { "epoch": 4.88, "learning_rate": 5.587428077850068e-06, "loss": 0.8853, "step": 350600 }, { "epoch": 4.89, "learning_rate": 5.580462252190753e-06, "loss": 0.9107, "step": 350700 }, { "epoch": 4.89, "learning_rate": 5.573496426531437e-06, "loss": 0.9116, "step": 350800 }, { "epoch": 4.89, "learning_rate": 5.566530600872121e-06, "loss": 0.8804, "step": 350900 }, { "epoch": 4.89, "learning_rate": 5.559564775212806e-06, "loss": 0.8989, "step": 351000 }, { "epoch": 4.89, "learning_rate": 5.552598949553491e-06, "loss": 0.8993, "step": 351100 }, { "epoch": 4.89, "learning_rate": 5.545633123894175e-06, "loss": 0.8658, "step": 351200 }, { "epoch": 4.89, "learning_rate": 5.53866729823486e-06, "loss": 0.8993, "step": 351300 }, { "epoch": 4.9, "learning_rate": 5.531701472575545e-06, "loss": 0.9247, "step": 351400 }, { "epoch": 4.9, "learning_rate": 5.5247356469162285e-06, "loss": 0.8967, "step": 351500 }, { "epoch": 4.9, "learning_rate": 5.517769821256913e-06, "loss": 0.9107, "step": 351600 }, { "epoch": 4.9, "learning_rate": 5.510803995597599e-06, "loss": 0.8948, "step": 351700 }, { "epoch": 4.9, "learning_rate": 5.503838169938283e-06, "loss": 0.8823, "step": 351800 }, { "epoch": 4.9, "learning_rate": 5.496872344278968e-06, "loss": 0.8879, "step": 351900 }, { "epoch": 4.9, "learning_rate": 5.489906518619652e-06, "loss": 0.9149, "step": 352000 }, { "epoch": 4.91, "learning_rate": 5.482940692960337e-06, "loss": 0.8996, "step": 352100 }, { "epoch": 4.91, "learning_rate": 5.475974867301021e-06, "loss": 0.9015, "step": 352200 }, { "epoch": 4.91, "learning_rate": 5.469009041641706e-06, "loss": 0.9153, "step": 352300 }, { "epoch": 4.91, "learning_rate": 5.462043215982391e-06, "loss": 0.883, "step": 352400 }, { "epoch": 4.91, "learning_rate": 5.455077390323075e-06, "loss": 0.8792, "step": 352500 }, { "epoch": 4.91, "learning_rate": 5.448111564663759e-06, "loss": 0.9128, "step": 352600 }, { "epoch": 4.91, "learning_rate": 5.441145739004444e-06, "loss": 0.8933, "step": 352700 }, { "epoch": 4.92, "learning_rate": 5.4341799133451285e-06, "loss": 0.8921, "step": 352800 }, { "epoch": 4.92, "learning_rate": 5.427214087685814e-06, "loss": 0.8886, "step": 352900 }, { "epoch": 4.92, "learning_rate": 5.420248262026499e-06, "loss": 0.8993, "step": 353000 }, { "epoch": 4.92, "learning_rate": 5.4132824363671826e-06, "loss": 0.9102, "step": 353100 }, { "epoch": 4.92, "learning_rate": 5.406316610707867e-06, "loss": 0.904, "step": 353200 }, { "epoch": 4.92, "learning_rate": 5.399350785048552e-06, "loss": 0.8972, "step": 353300 }, { "epoch": 4.92, "learning_rate": 5.392384959389237e-06, "loss": 0.9171, "step": 353400 }, { "epoch": 4.92, "learning_rate": 5.385419133729921e-06, "loss": 0.9246, "step": 353500 }, { "epoch": 4.93, "learning_rate": 5.378453308070606e-06, "loss": 0.9323, "step": 353600 }, { "epoch": 4.93, "learning_rate": 5.37148748241129e-06, "loss": 0.8837, "step": 353700 }, { "epoch": 4.93, "learning_rate": 5.364591315008569e-06, "loss": 0.8957, "step": 353800 }, { "epoch": 4.93, "learning_rate": 5.357625489349253e-06, "loss": 0.875, "step": 353900 }, { "epoch": 4.93, "learning_rate": 5.350659663689937e-06, "loss": 0.8903, "step": 354000 }, { "epoch": 4.93, "learning_rate": 5.343693838030622e-06, "loss": 0.8809, "step": 354100 }, { "epoch": 4.93, "learning_rate": 5.336728012371307e-06, "loss": 0.8936, "step": 354200 }, { "epoch": 4.94, "learning_rate": 5.329831844968584e-06, "loss": 0.8777, "step": 354300 }, { "epoch": 4.94, "learning_rate": 5.322866019309269e-06, "loss": 0.8892, "step": 354400 }, { "epoch": 4.94, "learning_rate": 5.315900193649954e-06, "loss": 0.8496, "step": 354500 }, { "epoch": 4.94, "learning_rate": 5.308934367990638e-06, "loss": 0.8915, "step": 354600 }, { "epoch": 4.94, "learning_rate": 5.301968542331323e-06, "loss": 0.9091, "step": 354700 }, { "epoch": 4.94, "learning_rate": 5.295002716672007e-06, "loss": 0.8996, "step": 354800 }, { "epoch": 4.94, "learning_rate": 5.288036891012692e-06, "loss": 0.8934, "step": 354900 }, { "epoch": 4.95, "learning_rate": 5.281071065353377e-06, "loss": 0.914, "step": 355000 }, { "epoch": 4.95, "learning_rate": 5.2741052396940614e-06, "loss": 0.8762, "step": 355100 }, { "epoch": 4.95, "learning_rate": 5.267139414034745e-06, "loss": 0.9032, "step": 355200 }, { "epoch": 4.95, "learning_rate": 5.26017358837543e-06, "loss": 0.8971, "step": 355300 }, { "epoch": 4.95, "learning_rate": 5.253207762716115e-06, "loss": 0.9071, "step": 355400 }, { "epoch": 4.95, "learning_rate": 5.246241937056799e-06, "loss": 0.9246, "step": 355500 }, { "epoch": 4.95, "learning_rate": 5.239276111397484e-06, "loss": 0.9098, "step": 355600 }, { "epoch": 4.96, "learning_rate": 5.232310285738169e-06, "loss": 0.8697, "step": 355700 }, { "epoch": 4.96, "learning_rate": 5.225344460078853e-06, "loss": 0.914, "step": 355800 }, { "epoch": 4.96, "learning_rate": 5.218378634419538e-06, "loss": 0.8898, "step": 355900 }, { "epoch": 4.96, "learning_rate": 5.211412808760223e-06, "loss": 0.9309, "step": 356000 }, { "epoch": 4.96, "learning_rate": 5.204446983100907e-06, "loss": 0.906, "step": 356100 }, { "epoch": 4.96, "learning_rate": 5.197481157441592e-06, "loss": 0.8984, "step": 356200 }, { "epoch": 4.96, "learning_rate": 5.190515331782276e-06, "loss": 0.9126, "step": 356300 }, { "epoch": 4.97, "learning_rate": 5.183549506122961e-06, "loss": 0.894, "step": 356400 }, { "epoch": 4.97, "learning_rate": 5.176583680463645e-06, "loss": 0.8923, "step": 356500 }, { "epoch": 4.97, "learning_rate": 5.16961785480433e-06, "loss": 0.8962, "step": 356600 }, { "epoch": 4.97, "learning_rate": 5.162652029145015e-06, "loss": 0.8862, "step": 356700 }, { "epoch": 4.97, "learning_rate": 5.155686203485699e-06, "loss": 0.8902, "step": 356800 }, { "epoch": 4.97, "learning_rate": 5.148720377826383e-06, "loss": 0.9039, "step": 356900 }, { "epoch": 4.97, "learning_rate": 5.141754552167069e-06, "loss": 0.8818, "step": 357000 }, { "epoch": 4.97, "learning_rate": 5.134788726507753e-06, "loss": 0.8793, "step": 357100 }, { "epoch": 4.98, "learning_rate": 5.127822900848438e-06, "loss": 0.9053, "step": 357200 }, { "epoch": 4.98, "learning_rate": 5.120857075189123e-06, "loss": 0.8951, "step": 357300 }, { "epoch": 4.98, "learning_rate": 5.113891249529807e-06, "loss": 0.8964, "step": 357400 }, { "epoch": 4.98, "learning_rate": 5.106925423870491e-06, "loss": 0.8916, "step": 357500 }, { "epoch": 4.98, "learning_rate": 5.099959598211176e-06, "loss": 0.8703, "step": 357600 }, { "epoch": 4.98, "learning_rate": 5.092993772551861e-06, "loss": 0.9257, "step": 357700 }, { "epoch": 4.98, "learning_rate": 5.086027946892545e-06, "loss": 0.8877, "step": 357800 }, { "epoch": 4.99, "learning_rate": 5.07906212123323e-06, "loss": 0.8723, "step": 357900 }, { "epoch": 4.99, "learning_rate": 5.072096295573914e-06, "loss": 0.8969, "step": 358000 }, { "epoch": 4.99, "learning_rate": 5.0651304699145985e-06, "loss": 0.8809, "step": 358100 }, { "epoch": 4.99, "learning_rate": 5.058164644255284e-06, "loss": 0.916, "step": 358200 }, { "epoch": 4.99, "learning_rate": 5.051198818595969e-06, "loss": 0.9012, "step": 358300 }, { "epoch": 4.99, "learning_rate": 5.0442329929366534e-06, "loss": 0.9146, "step": 358400 }, { "epoch": 4.99, "learning_rate": 5.037267167277337e-06, "loss": 0.8625, "step": 358500 }, { "epoch": 5.0, "learning_rate": 5.030301341618022e-06, "loss": 0.9046, "step": 358600 }, { "epoch": 5.0, "learning_rate": 5.023335515958707e-06, "loss": 0.8989, "step": 358700 }, { "epoch": 5.0, "learning_rate": 5.016369690299391e-06, "loss": 0.9112, "step": 358800 }, { "epoch": 5.0, "eval_gen_len": 20.0, "eval_loss": 1.1667309999465942, "eval_rouge1": 12.5404, "eval_rouge2": 3.7842, "eval_rougeL": 12.0541, "eval_rougeLsum": 12.1643, "eval_runtime": 1508.2531, "eval_samples_per_second": 8.863, "eval_steps_per_second": 2.216, "step": 358895 }, { "epoch": 5.0, "learning_rate": 5.009403864640076e-06, "loss": 0.9271, "step": 358900 }, { "epoch": 5.0, "learning_rate": 5.002438038980761e-06, "loss": 0.8275, "step": 359000 }, { "epoch": 5.0, "learning_rate": 4.9954722133214445e-06, "loss": 0.8209, "step": 359100 }, { "epoch": 5.0, "learning_rate": 4.9885760459187235e-06, "loss": 0.8225, "step": 359200 }, { "epoch": 5.01, "learning_rate": 4.981610220259407e-06, "loss": 0.8078, "step": 359300 }, { "epoch": 5.01, "learning_rate": 4.974644394600092e-06, "loss": 0.8394, "step": 359400 }, { "epoch": 5.01, "learning_rate": 4.967678568940777e-06, "loss": 0.8552, "step": 359500 }, { "epoch": 5.01, "learning_rate": 4.960712743281461e-06, "loss": 0.8287, "step": 359600 }, { "epoch": 5.01, "learning_rate": 4.953746917622146e-06, "loss": 0.7978, "step": 359700 }, { "epoch": 5.01, "learning_rate": 4.94678109196283e-06, "loss": 0.8254, "step": 359800 }, { "epoch": 5.01, "learning_rate": 4.939815266303515e-06, "loss": 0.805, "step": 359900 }, { "epoch": 5.02, "learning_rate": 4.932849440644199e-06, "loss": 0.8448, "step": 360000 }, { "epoch": 5.02, "learning_rate": 4.925883614984884e-06, "loss": 0.8456, "step": 360100 }, { "epoch": 5.02, "learning_rate": 4.9189177893255695e-06, "loss": 0.8382, "step": 360200 }, { "epoch": 5.02, "learning_rate": 4.911951963666254e-06, "loss": 0.8349, "step": 360300 }, { "epoch": 5.02, "learning_rate": 4.904986138006938e-06, "loss": 0.8105, "step": 360400 }, { "epoch": 5.02, "learning_rate": 4.898020312347623e-06, "loss": 0.8326, "step": 360500 }, { "epoch": 5.02, "learning_rate": 4.891054486688307e-06, "loss": 0.8429, "step": 360600 }, { "epoch": 5.03, "learning_rate": 4.884088661028992e-06, "loss": 0.8391, "step": 360700 }, { "epoch": 5.03, "learning_rate": 4.877122835369677e-06, "loss": 0.8196, "step": 360800 }, { "epoch": 5.03, "learning_rate": 4.870226667966954e-06, "loss": 0.8342, "step": 360900 }, { "epoch": 5.03, "learning_rate": 4.8632608423076395e-06, "loss": 0.8288, "step": 361000 }, { "epoch": 5.03, "learning_rate": 4.856295016648323e-06, "loss": 0.8454, "step": 361100 }, { "epoch": 5.03, "learning_rate": 4.849329190989008e-06, "loss": 0.8364, "step": 361200 }, { "epoch": 5.03, "learning_rate": 4.842363365329693e-06, "loss": 0.8249, "step": 361300 }, { "epoch": 5.03, "learning_rate": 4.8353975396703774e-06, "loss": 0.7996, "step": 361400 }, { "epoch": 5.04, "learning_rate": 4.828431714011062e-06, "loss": 0.8294, "step": 361500 }, { "epoch": 5.04, "learning_rate": 4.821465888351747e-06, "loss": 0.816, "step": 361600 }, { "epoch": 5.04, "learning_rate": 4.814500062692431e-06, "loss": 0.8391, "step": 361700 }, { "epoch": 5.04, "learning_rate": 4.807534237033115e-06, "loss": 0.8331, "step": 361800 }, { "epoch": 5.04, "learning_rate": 4.8005684113738e-06, "loss": 0.8248, "step": 361900 }, { "epoch": 5.04, "learning_rate": 4.793602585714485e-06, "loss": 0.8309, "step": 362000 }, { "epoch": 5.04, "learning_rate": 4.786636760055169e-06, "loss": 0.8254, "step": 362100 }, { "epoch": 5.05, "learning_rate": 4.779670934395854e-06, "loss": 0.8449, "step": 362200 }, { "epoch": 5.05, "learning_rate": 4.772705108736539e-06, "loss": 0.8179, "step": 362300 }, { "epoch": 5.05, "learning_rate": 4.765739283077223e-06, "loss": 0.8216, "step": 362400 }, { "epoch": 5.05, "learning_rate": 4.758773457417908e-06, "loss": 0.8372, "step": 362500 }, { "epoch": 5.05, "learning_rate": 4.751807631758593e-06, "loss": 0.8232, "step": 362600 }, { "epoch": 5.05, "learning_rate": 4.7448418060992775e-06, "loss": 0.8381, "step": 362700 }, { "epoch": 5.05, "learning_rate": 4.737875980439961e-06, "loss": 0.8593, "step": 362800 }, { "epoch": 5.06, "learning_rate": 4.730910154780646e-06, "loss": 0.8482, "step": 362900 }, { "epoch": 5.06, "learning_rate": 4.723944329121331e-06, "loss": 0.8209, "step": 363000 }, { "epoch": 5.06, "learning_rate": 4.716978503462015e-06, "loss": 0.8023, "step": 363100 }, { "epoch": 5.06, "learning_rate": 4.7100126778027e-06, "loss": 0.8405, "step": 363200 }, { "epoch": 5.06, "learning_rate": 4.703046852143385e-06, "loss": 0.8098, "step": 363300 }, { "epoch": 5.06, "learning_rate": 4.6960810264840685e-06, "loss": 0.8205, "step": 363400 }, { "epoch": 5.06, "learning_rate": 4.689115200824754e-06, "loss": 0.8516, "step": 363500 }, { "epoch": 5.07, "learning_rate": 4.682149375165439e-06, "loss": 0.8504, "step": 363600 }, { "epoch": 5.07, "learning_rate": 4.6751835495061234e-06, "loss": 0.8465, "step": 363700 }, { "epoch": 5.07, "learning_rate": 4.668217723846808e-06, "loss": 0.8444, "step": 363800 }, { "epoch": 5.07, "learning_rate": 4.661251898187492e-06, "loss": 0.8536, "step": 363900 }, { "epoch": 5.07, "learning_rate": 4.654286072528177e-06, "loss": 0.7937, "step": 364000 }, { "epoch": 5.07, "learning_rate": 4.647320246868861e-06, "loss": 0.8394, "step": 364100 }, { "epoch": 5.07, "learning_rate": 4.640354421209546e-06, "loss": 0.8356, "step": 364200 }, { "epoch": 5.08, "learning_rate": 4.633388595550231e-06, "loss": 0.8217, "step": 364300 }, { "epoch": 5.08, "learning_rate": 4.626422769890915e-06, "loss": 0.8573, "step": 364400 }, { "epoch": 5.08, "learning_rate": 4.619456944231599e-06, "loss": 0.8214, "step": 364500 }, { "epoch": 5.08, "learning_rate": 4.612491118572284e-06, "loss": 0.8246, "step": 364600 }, { "epoch": 5.08, "learning_rate": 4.6055252929129694e-06, "loss": 0.8251, "step": 364700 }, { "epoch": 5.08, "learning_rate": 4.598559467253654e-06, "loss": 0.84, "step": 364800 }, { "epoch": 5.08, "learning_rate": 4.591593641594339e-06, "loss": 0.8238, "step": 364900 }, { "epoch": 5.09, "learning_rate": 4.5846278159350235e-06, "loss": 0.8194, "step": 365000 }, { "epoch": 5.09, "learning_rate": 4.577661990275707e-06, "loss": 0.8537, "step": 365100 }, { "epoch": 5.09, "learning_rate": 4.570696164616392e-06, "loss": 0.8201, "step": 365200 }, { "epoch": 5.09, "learning_rate": 4.563730338957077e-06, "loss": 0.8323, "step": 365300 }, { "epoch": 5.09, "learning_rate": 4.556764513297761e-06, "loss": 0.8201, "step": 365400 }, { "epoch": 5.09, "learning_rate": 4.549798687638446e-06, "loss": 0.8394, "step": 365500 }, { "epoch": 5.09, "learning_rate": 4.54283286197913e-06, "loss": 0.8441, "step": 365600 }, { "epoch": 5.09, "learning_rate": 4.5358670363198146e-06, "loss": 0.8349, "step": 365700 }, { "epoch": 5.1, "learning_rate": 4.528901210660499e-06, "loss": 0.8408, "step": 365800 }, { "epoch": 5.1, "learning_rate": 4.521935385001185e-06, "loss": 0.8681, "step": 365900 }, { "epoch": 5.1, "learning_rate": 4.5149695593418695e-06, "loss": 0.8023, "step": 366000 }, { "epoch": 5.1, "learning_rate": 4.508003733682554e-06, "loss": 0.8192, "step": 366100 }, { "epoch": 5.1, "learning_rate": 4.501037908023238e-06, "loss": 0.842, "step": 366200 }, { "epoch": 5.1, "learning_rate": 4.494072082363923e-06, "loss": 0.8262, "step": 366300 }, { "epoch": 5.1, "learning_rate": 4.487106256704607e-06, "loss": 0.8162, "step": 366400 }, { "epoch": 5.11, "learning_rate": 4.480140431045292e-06, "loss": 0.8301, "step": 366500 }, { "epoch": 5.11, "learning_rate": 4.473174605385977e-06, "loss": 0.8343, "step": 366600 }, { "epoch": 5.11, "learning_rate": 4.466208779726661e-06, "loss": 0.8363, "step": 366700 }, { "epoch": 5.11, "learning_rate": 4.4593126123239395e-06, "loss": 0.8299, "step": 366800 }, { "epoch": 5.11, "learning_rate": 4.452346786664623e-06, "loss": 0.8213, "step": 366900 }, { "epoch": 5.11, "learning_rate": 4.445380961005308e-06, "loss": 0.8165, "step": 367000 }, { "epoch": 5.11, "learning_rate": 4.438415135345993e-06, "loss": 0.8285, "step": 367100 }, { "epoch": 5.12, "learning_rate": 4.431449309686677e-06, "loss": 0.8347, "step": 367200 }, { "epoch": 5.12, "learning_rate": 4.424483484027362e-06, "loss": 0.8282, "step": 367300 }, { "epoch": 5.12, "learning_rate": 4.417517658368047e-06, "loss": 0.8539, "step": 367400 }, { "epoch": 5.12, "learning_rate": 4.410551832708731e-06, "loss": 0.8361, "step": 367500 }, { "epoch": 5.12, "learning_rate": 4.403586007049415e-06, "loss": 0.8209, "step": 367600 }, { "epoch": 5.12, "learning_rate": 4.3966201813901e-06, "loss": 0.8303, "step": 367700 }, { "epoch": 5.12, "learning_rate": 4.389654355730785e-06, "loss": 0.8435, "step": 367800 }, { "epoch": 5.13, "learning_rate": 4.382688530071469e-06, "loss": 0.8139, "step": 367900 }, { "epoch": 5.13, "learning_rate": 4.375722704412155e-06, "loss": 0.8544, "step": 368000 }, { "epoch": 5.13, "learning_rate": 4.368756878752839e-06, "loss": 0.8244, "step": 368100 }, { "epoch": 5.13, "learning_rate": 4.361791053093523e-06, "loss": 0.8323, "step": 368200 }, { "epoch": 5.13, "learning_rate": 4.354825227434208e-06, "loss": 0.8592, "step": 368300 }, { "epoch": 5.13, "learning_rate": 4.347859401774893e-06, "loss": 0.8287, "step": 368400 }, { "epoch": 5.13, "learning_rate": 4.3408935761155774e-06, "loss": 0.8361, "step": 368500 }, { "epoch": 5.14, "learning_rate": 4.333927750456261e-06, "loss": 0.823, "step": 368600 }, { "epoch": 5.14, "learning_rate": 4.326961924796946e-06, "loss": 0.8323, "step": 368700 }, { "epoch": 5.14, "learning_rate": 4.319996099137631e-06, "loss": 0.8575, "step": 368800 }, { "epoch": 5.14, "learning_rate": 4.313030273478315e-06, "loss": 0.8333, "step": 368900 }, { "epoch": 5.14, "learning_rate": 4.306064447819e-06, "loss": 0.8535, "step": 369000 }, { "epoch": 5.14, "learning_rate": 4.299098622159685e-06, "loss": 0.8152, "step": 369100 }, { "epoch": 5.14, "learning_rate": 4.292132796500369e-06, "loss": 0.8207, "step": 369200 }, { "epoch": 5.14, "learning_rate": 4.285166970841054e-06, "loss": 0.8351, "step": 369300 }, { "epoch": 5.15, "learning_rate": 4.278201145181739e-06, "loss": 0.8194, "step": 369400 }, { "epoch": 5.15, "learning_rate": 4.2712353195224234e-06, "loss": 0.8532, "step": 369500 }, { "epoch": 5.15, "learning_rate": 4.264339152119701e-06, "loss": 0.8306, "step": 369600 }, { "epoch": 5.15, "learning_rate": 4.257373326460385e-06, "loss": 0.8295, "step": 369700 }, { "epoch": 5.15, "learning_rate": 4.25040750080107e-06, "loss": 0.8504, "step": 369800 }, { "epoch": 5.15, "learning_rate": 4.243441675141754e-06, "loss": 0.8283, "step": 369900 }, { "epoch": 5.15, "learning_rate": 4.2364758494824394e-06, "loss": 0.8485, "step": 370000 }, { "epoch": 5.16, "learning_rate": 4.229510023823124e-06, "loss": 0.81, "step": 370100 }, { "epoch": 5.16, "learning_rate": 4.222544198163809e-06, "loss": 0.8437, "step": 370200 }, { "epoch": 5.16, "learning_rate": 4.2155783725044935e-06, "loss": 0.8365, "step": 370300 }, { "epoch": 5.16, "learning_rate": 4.208612546845178e-06, "loss": 0.8538, "step": 370400 }, { "epoch": 5.16, "learning_rate": 4.201646721185862e-06, "loss": 0.8265, "step": 370500 }, { "epoch": 5.16, "learning_rate": 4.194680895526547e-06, "loss": 0.8176, "step": 370600 }, { "epoch": 5.16, "learning_rate": 4.187715069867231e-06, "loss": 0.8338, "step": 370700 }, { "epoch": 5.17, "learning_rate": 4.180749244207916e-06, "loss": 0.8237, "step": 370800 }, { "epoch": 5.17, "learning_rate": 4.173783418548601e-06, "loss": 0.8213, "step": 370900 }, { "epoch": 5.17, "learning_rate": 4.166817592889285e-06, "loss": 0.8419, "step": 371000 }, { "epoch": 5.17, "learning_rate": 4.159851767229969e-06, "loss": 0.8052, "step": 371100 }, { "epoch": 5.17, "learning_rate": 4.152885941570655e-06, "loss": 0.7705, "step": 371200 }, { "epoch": 5.17, "learning_rate": 4.1459201159113395e-06, "loss": 0.8226, "step": 371300 }, { "epoch": 5.17, "learning_rate": 4.138954290252024e-06, "loss": 0.8284, "step": 371400 }, { "epoch": 5.18, "learning_rate": 4.131988464592709e-06, "loss": 0.836, "step": 371500 }, { "epoch": 5.18, "learning_rate": 4.125022638933393e-06, "loss": 0.8067, "step": 371600 }, { "epoch": 5.18, "learning_rate": 4.118056813274077e-06, "loss": 0.8206, "step": 371700 }, { "epoch": 5.18, "learning_rate": 4.111090987614762e-06, "loss": 0.8185, "step": 371800 }, { "epoch": 5.18, "learning_rate": 4.104125161955447e-06, "loss": 0.8344, "step": 371900 }, { "epoch": 5.18, "learning_rate": 4.097159336296131e-06, "loss": 0.8392, "step": 372000 }, { "epoch": 5.18, "learning_rate": 4.090193510636816e-06, "loss": 0.8224, "step": 372100 }, { "epoch": 5.19, "learning_rate": 4.0832276849775e-06, "loss": 0.8247, "step": 372200 }, { "epoch": 5.19, "learning_rate": 4.076261859318185e-06, "loss": 0.8541, "step": 372300 }, { "epoch": 5.19, "learning_rate": 4.069296033658869e-06, "loss": 0.8669, "step": 372400 }, { "epoch": 5.19, "learning_rate": 4.062330207999555e-06, "loss": 0.8228, "step": 372500 }, { "epoch": 5.19, "learning_rate": 4.0553643823402395e-06, "loss": 0.8359, "step": 372600 }, { "epoch": 5.19, "learning_rate": 4.048398556680923e-06, "loss": 0.8489, "step": 372700 }, { "epoch": 5.19, "learning_rate": 4.041432731021608e-06, "loss": 0.8537, "step": 372800 }, { "epoch": 5.2, "learning_rate": 4.034536563618885e-06, "loss": 0.8625, "step": 372900 }, { "epoch": 5.2, "learning_rate": 4.02757073795957e-06, "loss": 0.8186, "step": 373000 }, { "epoch": 5.2, "learning_rate": 4.020604912300255e-06, "loss": 0.8274, "step": 373100 }, { "epoch": 5.2, "learning_rate": 4.013708744897533e-06, "loss": 0.8246, "step": 373200 }, { "epoch": 5.2, "learning_rate": 4.0067429192382175e-06, "loss": 0.8173, "step": 373300 }, { "epoch": 5.2, "learning_rate": 3.999777093578902e-06, "loss": 0.8404, "step": 373400 }, { "epoch": 5.2, "learning_rate": 3.992811267919587e-06, "loss": 0.8254, "step": 373500 }, { "epoch": 5.2, "learning_rate": 3.985845442260271e-06, "loss": 0.8032, "step": 373600 }, { "epoch": 5.21, "learning_rate": 3.978879616600955e-06, "loss": 0.81, "step": 373700 }, { "epoch": 5.21, "learning_rate": 3.97191379094164e-06, "loss": 0.8396, "step": 373800 }, { "epoch": 5.21, "learning_rate": 3.964947965282325e-06, "loss": 0.8314, "step": 373900 }, { "epoch": 5.21, "learning_rate": 3.957982139623009e-06, "loss": 0.8444, "step": 374000 }, { "epoch": 5.21, "learning_rate": 3.951016313963695e-06, "loss": 0.8479, "step": 374100 }, { "epoch": 5.21, "learning_rate": 3.944050488304379e-06, "loss": 0.8289, "step": 374200 }, { "epoch": 5.21, "learning_rate": 3.9370846626450635e-06, "loss": 0.8545, "step": 374300 }, { "epoch": 5.22, "learning_rate": 3.930118836985748e-06, "loss": 0.8515, "step": 374400 }, { "epoch": 5.22, "learning_rate": 3.923153011326433e-06, "loss": 0.8247, "step": 374500 }, { "epoch": 5.22, "learning_rate": 3.9161871856671175e-06, "loss": 0.8546, "step": 374600 }, { "epoch": 5.22, "learning_rate": 3.909221360007802e-06, "loss": 0.8536, "step": 374700 }, { "epoch": 5.22, "learning_rate": 3.902255534348486e-06, "loss": 0.8446, "step": 374800 }, { "epoch": 5.22, "learning_rate": 3.895289708689171e-06, "loss": 0.8714, "step": 374900 }, { "epoch": 5.22, "learning_rate": 3.888323883029855e-06, "loss": 0.8566, "step": 375000 }, { "epoch": 5.23, "learning_rate": 3.88135805737054e-06, "loss": 0.8568, "step": 375100 }, { "epoch": 5.23, "learning_rate": 3.874392231711225e-06, "loss": 0.8445, "step": 375200 }, { "epoch": 5.23, "learning_rate": 3.8674264060519094e-06, "loss": 0.8545, "step": 375300 }, { "epoch": 5.23, "learning_rate": 3.860460580392594e-06, "loss": 0.8354, "step": 375400 }, { "epoch": 5.23, "learning_rate": 3.853494754733279e-06, "loss": 0.8702, "step": 375500 }, { "epoch": 5.23, "learning_rate": 3.8465289290739635e-06, "loss": 0.83, "step": 375600 }, { "epoch": 5.23, "learning_rate": 3.839563103414648e-06, "loss": 0.8258, "step": 375700 }, { "epoch": 5.24, "learning_rate": 3.832597277755333e-06, "loss": 0.8523, "step": 375800 }, { "epoch": 5.24, "learning_rate": 3.825631452096017e-06, "loss": 0.8535, "step": 375900 }, { "epoch": 5.24, "learning_rate": 3.818665626436701e-06, "loss": 0.8125, "step": 376000 }, { "epoch": 5.24, "learning_rate": 3.811699800777386e-06, "loss": 0.8218, "step": 376100 }, { "epoch": 5.24, "learning_rate": 3.804733975118071e-06, "loss": 0.8384, "step": 376200 }, { "epoch": 5.24, "learning_rate": 3.797768149458756e-06, "loss": 0.8437, "step": 376300 }, { "epoch": 5.24, "learning_rate": 3.7908023237994397e-06, "loss": 0.8364, "step": 376400 }, { "epoch": 5.25, "learning_rate": 3.7838364981401244e-06, "loss": 0.8538, "step": 376500 }, { "epoch": 5.25, "learning_rate": 3.776870672480809e-06, "loss": 0.8263, "step": 376600 }, { "epoch": 5.25, "learning_rate": 3.7699048468214937e-06, "loss": 0.8514, "step": 376700 }, { "epoch": 5.25, "learning_rate": 3.7629390211621784e-06, "loss": 0.8167, "step": 376800 }, { "epoch": 5.25, "learning_rate": 3.7559731955028635e-06, "loss": 0.8557, "step": 376900 }, { "epoch": 5.25, "learning_rate": 3.749007369843548e-06, "loss": 0.8237, "step": 377000 }, { "epoch": 5.25, "learning_rate": 3.742041544184232e-06, "loss": 0.8251, "step": 377100 }, { "epoch": 5.26, "learning_rate": 3.7350757185249167e-06, "loss": 0.8271, "step": 377200 }, { "epoch": 5.26, "learning_rate": 3.7281098928656014e-06, "loss": 0.8176, "step": 377300 }, { "epoch": 5.26, "learning_rate": 3.7211440672062857e-06, "loss": 0.8048, "step": 377400 }, { "epoch": 5.26, "learning_rate": 3.7141782415469708e-06, "loss": 0.8213, "step": 377500 }, { "epoch": 5.26, "learning_rate": 3.7072124158876555e-06, "loss": 0.8204, "step": 377600 }, { "epoch": 5.26, "learning_rate": 3.7002465902283397e-06, "loss": 0.8593, "step": 377700 }, { "epoch": 5.26, "learning_rate": 3.6932807645690244e-06, "loss": 0.8453, "step": 377800 }, { "epoch": 5.26, "learning_rate": 3.686314938909709e-06, "loss": 0.852, "step": 377900 }, { "epoch": 5.27, "learning_rate": 3.6793491132503934e-06, "loss": 0.8347, "step": 378000 }, { "epoch": 5.27, "learning_rate": 3.6723832875910785e-06, "loss": 0.8226, "step": 378100 }, { "epoch": 5.27, "learning_rate": 3.665417461931763e-06, "loss": 0.8266, "step": 378200 }, { "epoch": 5.27, "learning_rate": 3.6584516362724474e-06, "loss": 0.8069, "step": 378300 }, { "epoch": 5.27, "learning_rate": 3.651555468869725e-06, "loss": 0.8233, "step": 378400 }, { "epoch": 5.27, "learning_rate": 3.64458964321041e-06, "loss": 0.8328, "step": 378500 }, { "epoch": 5.27, "learning_rate": 3.6376238175510945e-06, "loss": 0.8203, "step": 378600 }, { "epoch": 5.28, "learning_rate": 3.6306579918917787e-06, "loss": 0.8252, "step": 378700 }, { "epoch": 5.28, "learning_rate": 3.6236921662324634e-06, "loss": 0.831, "step": 378800 }, { "epoch": 5.28, "learning_rate": 3.6167263405731485e-06, "loss": 0.8212, "step": 378900 }, { "epoch": 5.28, "learning_rate": 3.6097605149138328e-06, "loss": 0.8431, "step": 379000 }, { "epoch": 5.28, "learning_rate": 3.6027946892545175e-06, "loss": 0.81, "step": 379100 }, { "epoch": 5.28, "learning_rate": 3.595828863595202e-06, "loss": 0.8325, "step": 379200 }, { "epoch": 5.28, "learning_rate": 3.5888630379358864e-06, "loss": 0.8471, "step": 379300 }, { "epoch": 5.29, "learning_rate": 3.581897212276571e-06, "loss": 0.8566, "step": 379400 }, { "epoch": 5.29, "learning_rate": 3.574931386617256e-06, "loss": 0.8169, "step": 379500 }, { "epoch": 5.29, "learning_rate": 3.5679655609579405e-06, "loss": 0.8231, "step": 379600 }, { "epoch": 5.29, "learning_rate": 3.560999735298625e-06, "loss": 0.8377, "step": 379700 }, { "epoch": 5.29, "learning_rate": 3.5540339096393094e-06, "loss": 0.8441, "step": 379800 }, { "epoch": 5.29, "learning_rate": 3.547068083979994e-06, "loss": 0.834, "step": 379900 }, { "epoch": 5.29, "learning_rate": 3.5401022583206788e-06, "loss": 0.8248, "step": 380000 }, { "epoch": 5.3, "learning_rate": 3.5331364326613634e-06, "loss": 0.8165, "step": 380100 }, { "epoch": 5.3, "learning_rate": 3.526170607002048e-06, "loss": 0.8424, "step": 380200 }, { "epoch": 5.3, "learning_rate": 3.519204781342733e-06, "loss": 0.8317, "step": 380300 }, { "epoch": 5.3, "learning_rate": 3.512238955683417e-06, "loss": 0.8206, "step": 380400 }, { "epoch": 5.3, "learning_rate": 3.505342788280695e-06, "loss": 0.8332, "step": 380500 }, { "epoch": 5.3, "learning_rate": 3.4983769626213795e-06, "loss": 0.8391, "step": 380600 }, { "epoch": 5.3, "learning_rate": 3.491411136962064e-06, "loss": 0.8209, "step": 380700 }, { "epoch": 5.31, "learning_rate": 3.484445311302749e-06, "loss": 0.8131, "step": 380800 }, { "epoch": 5.31, "learning_rate": 3.4774794856434335e-06, "loss": 0.8374, "step": 380900 }, { "epoch": 5.31, "learning_rate": 3.470513659984118e-06, "loss": 0.8362, "step": 381000 }, { "epoch": 5.31, "learning_rate": 3.4635478343248025e-06, "loss": 0.8236, "step": 381100 }, { "epoch": 5.31, "learning_rate": 3.456582008665487e-06, "loss": 0.8184, "step": 381200 }, { "epoch": 5.31, "learning_rate": 3.449616183006172e-06, "loss": 0.8451, "step": 381300 }, { "epoch": 5.31, "learning_rate": 3.442650357346856e-06, "loss": 0.821, "step": 381400 }, { "epoch": 5.31, "learning_rate": 3.435684531687541e-06, "loss": 0.8402, "step": 381500 }, { "epoch": 5.32, "learning_rate": 3.428718706028226e-06, "loss": 0.8643, "step": 381600 }, { "epoch": 5.32, "learning_rate": 3.42175288036891e-06, "loss": 0.8172, "step": 381700 }, { "epoch": 5.32, "learning_rate": 3.414787054709595e-06, "loss": 0.8328, "step": 381800 }, { "epoch": 5.32, "learning_rate": 3.4078212290502795e-06, "loss": 0.8191, "step": 381900 }, { "epoch": 5.32, "learning_rate": 3.4008554033909638e-06, "loss": 0.8503, "step": 382000 }, { "epoch": 5.32, "learning_rate": 3.393889577731649e-06, "loss": 0.8623, "step": 382100 }, { "epoch": 5.32, "learning_rate": 3.3869237520723335e-06, "loss": 0.8358, "step": 382200 }, { "epoch": 5.33, "learning_rate": 3.379957926413018e-06, "loss": 0.8478, "step": 382300 }, { "epoch": 5.33, "learning_rate": 3.3729921007537025e-06, "loss": 0.825, "step": 382400 }, { "epoch": 5.33, "learning_rate": 3.3660262750943867e-06, "loss": 0.8273, "step": 382500 }, { "epoch": 5.33, "learning_rate": 3.3590604494350714e-06, "loss": 0.8264, "step": 382600 }, { "epoch": 5.33, "learning_rate": 3.352094623775756e-06, "loss": 0.8397, "step": 382700 }, { "epoch": 5.33, "learning_rate": 3.345128798116441e-06, "loss": 0.8351, "step": 382800 }, { "epoch": 5.33, "learning_rate": 3.3381629724571255e-06, "loss": 0.8359, "step": 382900 }, { "epoch": 5.34, "learning_rate": 3.33119714679781e-06, "loss": 0.8243, "step": 383000 }, { "epoch": 5.34, "learning_rate": 3.3242313211384944e-06, "loss": 0.8176, "step": 383100 }, { "epoch": 5.34, "learning_rate": 3.317265495479179e-06, "loss": 0.8401, "step": 383200 }, { "epoch": 5.34, "learning_rate": 3.310299669819864e-06, "loss": 0.825, "step": 383300 }, { "epoch": 5.34, "learning_rate": 3.3033338441605485e-06, "loss": 0.8245, "step": 383400 }, { "epoch": 5.34, "learning_rate": 3.296368018501233e-06, "loss": 0.8646, "step": 383500 }, { "epoch": 5.34, "learning_rate": 3.289402192841918e-06, "loss": 0.8621, "step": 383600 }, { "epoch": 5.35, "learning_rate": 3.282436367182602e-06, "loss": 0.83, "step": 383700 }, { "epoch": 5.35, "learning_rate": 3.2754705415232868e-06, "loss": 0.8519, "step": 383800 }, { "epoch": 5.35, "learning_rate": 3.268504715863971e-06, "loss": 0.8233, "step": 383900 }, { "epoch": 5.35, "learning_rate": 3.261538890204656e-06, "loss": 0.8262, "step": 384000 }, { "epoch": 5.35, "learning_rate": 3.254573064545341e-06, "loss": 0.8491, "step": 384100 }, { "epoch": 5.35, "learning_rate": 3.2476768971426185e-06, "loss": 0.8227, "step": 384200 }, { "epoch": 5.35, "learning_rate": 3.240780729739896e-06, "loss": 0.8404, "step": 384300 }, { "epoch": 5.36, "learning_rate": 3.233814904080581e-06, "loss": 0.8342, "step": 384400 }, { "epoch": 5.36, "learning_rate": 3.2268490784212656e-06, "loss": 0.8264, "step": 384500 }, { "epoch": 5.36, "learning_rate": 3.21988325276195e-06, "loss": 0.8257, "step": 384600 }, { "epoch": 5.36, "learning_rate": 3.2129174271026345e-06, "loss": 0.8442, "step": 384700 }, { "epoch": 5.36, "learning_rate": 3.2059516014433192e-06, "loss": 0.8405, "step": 384800 }, { "epoch": 5.36, "learning_rate": 3.1989857757840035e-06, "loss": 0.8456, "step": 384900 }, { "epoch": 5.36, "learning_rate": 3.1920199501246886e-06, "loss": 0.8348, "step": 385000 }, { "epoch": 5.37, "learning_rate": 3.1850541244653733e-06, "loss": 0.8129, "step": 385100 }, { "epoch": 5.37, "learning_rate": 3.1780882988060575e-06, "loss": 0.847, "step": 385200 }, { "epoch": 5.37, "learning_rate": 3.1711224731467422e-06, "loss": 0.8226, "step": 385300 }, { "epoch": 5.37, "learning_rate": 3.1641566474874265e-06, "loss": 0.8348, "step": 385400 }, { "epoch": 5.37, "learning_rate": 3.157190821828111e-06, "loss": 0.821, "step": 385500 }, { "epoch": 5.37, "learning_rate": 3.1502249961687963e-06, "loss": 0.8228, "step": 385600 }, { "epoch": 5.37, "learning_rate": 3.1432591705094805e-06, "loss": 0.8163, "step": 385700 }, { "epoch": 5.37, "learning_rate": 3.136293344850165e-06, "loss": 0.8451, "step": 385800 }, { "epoch": 5.38, "learning_rate": 3.12932751919085e-06, "loss": 0.8191, "step": 385900 }, { "epoch": 5.38, "learning_rate": 3.122361693531534e-06, "loss": 0.8147, "step": 386000 }, { "epoch": 5.38, "learning_rate": 3.115395867872219e-06, "loss": 0.8197, "step": 386100 }, { "epoch": 5.38, "learning_rate": 3.108430042212904e-06, "loss": 0.8634, "step": 386200 }, { "epoch": 5.38, "learning_rate": 3.101464216553588e-06, "loss": 0.8023, "step": 386300 }, { "epoch": 5.38, "learning_rate": 3.094498390894273e-06, "loss": 0.8237, "step": 386400 }, { "epoch": 5.38, "learning_rate": 3.0875325652349576e-06, "loss": 0.8171, "step": 386500 }, { "epoch": 5.39, "learning_rate": 3.080566739575642e-06, "loss": 0.798, "step": 386600 }, { "epoch": 5.39, "learning_rate": 3.0736009139163265e-06, "loss": 0.8277, "step": 386700 }, { "epoch": 5.39, "learning_rate": 3.066635088257011e-06, "loss": 0.8288, "step": 386800 }, { "epoch": 5.39, "learning_rate": 3.059669262597696e-06, "loss": 0.8021, "step": 386900 }, { "epoch": 5.39, "learning_rate": 3.0527034369383806e-06, "loss": 0.8283, "step": 387000 }, { "epoch": 5.39, "learning_rate": 3.045737611279065e-06, "loss": 0.8425, "step": 387100 }, { "epoch": 5.39, "learning_rate": 3.0387717856197495e-06, "loss": 0.818, "step": 387200 }, { "epoch": 5.4, "learning_rate": 3.031805959960434e-06, "loss": 0.8403, "step": 387300 }, { "epoch": 5.4, "learning_rate": 3.024840134301119e-06, "loss": 0.8149, "step": 387400 }, { "epoch": 5.4, "learning_rate": 3.0178743086418036e-06, "loss": 0.8069, "step": 387500 }, { "epoch": 5.4, "learning_rate": 3.0109084829824882e-06, "loss": 0.8554, "step": 387600 }, { "epoch": 5.4, "learning_rate": 3.0039426573231725e-06, "loss": 0.8302, "step": 387700 }, { "epoch": 5.4, "learning_rate": 2.996976831663857e-06, "loss": 0.8193, "step": 387800 }, { "epoch": 5.4, "learning_rate": 2.990011006004542e-06, "loss": 0.8342, "step": 387900 }, { "epoch": 5.41, "learning_rate": 2.983045180345226e-06, "loss": 0.8531, "step": 388000 }, { "epoch": 5.41, "learning_rate": 2.9760793546859112e-06, "loss": 0.8356, "step": 388100 }, { "epoch": 5.41, "learning_rate": 2.9691135290265955e-06, "loss": 0.8063, "step": 388200 }, { "epoch": 5.41, "learning_rate": 2.96214770336728e-06, "loss": 0.8303, "step": 388300 }, { "epoch": 5.41, "learning_rate": 2.955181877707965e-06, "loss": 0.8202, "step": 388400 }, { "epoch": 5.41, "learning_rate": 2.948216052048649e-06, "loss": 0.8353, "step": 388500 }, { "epoch": 5.41, "learning_rate": 2.941250226389334e-06, "loss": 0.838, "step": 388600 }, { "epoch": 5.42, "learning_rate": 2.934284400730019e-06, "loss": 0.8324, "step": 388700 }, { "epoch": 5.42, "learning_rate": 2.927318575070703e-06, "loss": 0.8324, "step": 388800 }, { "epoch": 5.42, "learning_rate": 2.920352749411388e-06, "loss": 0.8674, "step": 388900 }, { "epoch": 5.42, "learning_rate": 2.9133869237520725e-06, "loss": 0.8197, "step": 389000 }, { "epoch": 5.42, "learning_rate": 2.906421098092757e-06, "loss": 0.8349, "step": 389100 }, { "epoch": 5.42, "learning_rate": 2.8994552724334415e-06, "loss": 0.8212, "step": 389200 }, { "epoch": 5.42, "learning_rate": 2.8924894467741266e-06, "loss": 0.8134, "step": 389300 }, { "epoch": 5.42, "learning_rate": 2.885523621114811e-06, "loss": 0.8445, "step": 389400 }, { "epoch": 5.43, "learning_rate": 2.8785577954554955e-06, "loss": 0.8518, "step": 389500 }, { "epoch": 5.43, "learning_rate": 2.8716616280527732e-06, "loss": 0.842, "step": 389600 }, { "epoch": 5.43, "learning_rate": 2.864695802393458e-06, "loss": 0.819, "step": 389700 }, { "epoch": 5.43, "learning_rate": 2.857729976734142e-06, "loss": 0.8206, "step": 389800 }, { "epoch": 5.43, "learning_rate": 2.850764151074827e-06, "loss": 0.8133, "step": 389900 }, { "epoch": 5.43, "learning_rate": 2.8437983254155115e-06, "loss": 0.8705, "step": 390000 }, { "epoch": 5.43, "learning_rate": 2.8368324997561962e-06, "loss": 0.7895, "step": 390100 }, { "epoch": 5.44, "learning_rate": 2.829866674096881e-06, "loss": 0.8369, "step": 390200 }, { "epoch": 5.44, "learning_rate": 2.8229008484375656e-06, "loss": 0.8482, "step": 390300 }, { "epoch": 5.44, "learning_rate": 2.81593502277825e-06, "loss": 0.839, "step": 390400 }, { "epoch": 5.44, "learning_rate": 2.8089691971189345e-06, "loss": 0.8442, "step": 390500 }, { "epoch": 5.44, "learning_rate": 2.802003371459619e-06, "loss": 0.8403, "step": 390600 }, { "epoch": 5.44, "learning_rate": 2.795037545800304e-06, "loss": 0.8377, "step": 390700 }, { "epoch": 5.44, "learning_rate": 2.7880717201409886e-06, "loss": 0.7882, "step": 390800 }, { "epoch": 5.45, "learning_rate": 2.781105894481673e-06, "loss": 0.8355, "step": 390900 }, { "epoch": 5.45, "learning_rate": 2.7741400688223575e-06, "loss": 0.8392, "step": 391000 }, { "epoch": 5.45, "learning_rate": 2.767174243163042e-06, "loss": 0.8438, "step": 391100 }, { "epoch": 5.45, "learning_rate": 2.7602084175037265e-06, "loss": 0.8294, "step": 391200 }, { "epoch": 5.45, "learning_rate": 2.7532425918444116e-06, "loss": 0.8563, "step": 391300 }, { "epoch": 5.45, "learning_rate": 2.7462767661850963e-06, "loss": 0.7958, "step": 391400 }, { "epoch": 5.45, "learning_rate": 2.7393109405257805e-06, "loss": 0.8421, "step": 391500 }, { "epoch": 5.46, "learning_rate": 2.732345114866465e-06, "loss": 0.8167, "step": 391600 }, { "epoch": 5.46, "learning_rate": 2.72537928920715e-06, "loss": 0.815, "step": 391700 }, { "epoch": 5.46, "learning_rate": 2.718413463547834e-06, "loss": 0.8459, "step": 391800 }, { "epoch": 5.46, "learning_rate": 2.7114476378885192e-06, "loss": 0.83, "step": 391900 }, { "epoch": 5.46, "learning_rate": 2.704481812229204e-06, "loss": 0.8363, "step": 392000 }, { "epoch": 5.46, "learning_rate": 2.697515986569888e-06, "loss": 0.8393, "step": 392100 }, { "epoch": 5.46, "learning_rate": 2.690550160910573e-06, "loss": 0.8446, "step": 392200 }, { "epoch": 5.47, "learning_rate": 2.6836539935078506e-06, "loss": 0.8503, "step": 392300 }, { "epoch": 5.47, "learning_rate": 2.6766881678485353e-06, "loss": 0.858, "step": 392400 }, { "epoch": 5.47, "learning_rate": 2.6697223421892195e-06, "loss": 0.8208, "step": 392500 }, { "epoch": 5.47, "learning_rate": 2.662756516529904e-06, "loss": 0.8277, "step": 392600 }, { "epoch": 5.47, "learning_rate": 2.6557906908705893e-06, "loss": 0.8217, "step": 392700 }, { "epoch": 5.47, "learning_rate": 2.6488248652112736e-06, "loss": 0.7853, "step": 392800 }, { "epoch": 5.47, "learning_rate": 2.6418590395519582e-06, "loss": 0.8538, "step": 392900 }, { "epoch": 5.48, "learning_rate": 2.634893213892643e-06, "loss": 0.8455, "step": 393000 }, { "epoch": 5.48, "learning_rate": 2.627927388233327e-06, "loss": 0.8299, "step": 393100 }, { "epoch": 5.48, "learning_rate": 2.620961562574012e-06, "loss": 0.8385, "step": 393200 }, { "epoch": 5.48, "learning_rate": 2.6139957369146966e-06, "loss": 0.8374, "step": 393300 }, { "epoch": 5.48, "learning_rate": 2.6070299112553812e-06, "loss": 0.818, "step": 393400 }, { "epoch": 5.48, "learning_rate": 2.600064085596066e-06, "loss": 0.8294, "step": 393500 }, { "epoch": 5.48, "learning_rate": 2.59309825993675e-06, "loss": 0.8212, "step": 393600 }, { "epoch": 5.48, "learning_rate": 2.586132434277435e-06, "loss": 0.8319, "step": 393700 }, { "epoch": 5.49, "learning_rate": 2.5791666086181196e-06, "loss": 0.8353, "step": 393800 }, { "epoch": 5.49, "learning_rate": 2.572200782958804e-06, "loss": 0.8453, "step": 393900 }, { "epoch": 5.49, "learning_rate": 2.565234957299489e-06, "loss": 0.8301, "step": 394000 }, { "epoch": 5.49, "learning_rate": 2.5582691316401736e-06, "loss": 0.8026, "step": 394100 }, { "epoch": 5.49, "learning_rate": 2.551303305980858e-06, "loss": 0.8317, "step": 394200 }, { "epoch": 5.49, "learning_rate": 2.5443374803215425e-06, "loss": 0.8567, "step": 394300 }, { "epoch": 5.49, "learning_rate": 2.5373716546622272e-06, "loss": 0.842, "step": 394400 }, { "epoch": 5.5, "learning_rate": 2.5304058290029115e-06, "loss": 0.8337, "step": 394500 }, { "epoch": 5.5, "learning_rate": 2.5234400033435966e-06, "loss": 0.8284, "step": 394600 }, { "epoch": 5.5, "learning_rate": 2.5164741776842813e-06, "loss": 0.8391, "step": 394700 }, { "epoch": 5.5, "learning_rate": 2.5095083520249655e-06, "loss": 0.8143, "step": 394800 }, { "epoch": 5.5, "learning_rate": 2.5025425263656502e-06, "loss": 0.8268, "step": 394900 }, { "epoch": 5.5, "learning_rate": 2.4955767007063345e-06, "loss": 0.8075, "step": 395000 }, { "epoch": 5.5, "learning_rate": 2.488610875047019e-06, "loss": 0.8218, "step": 395100 }, { "epoch": 5.51, "learning_rate": 2.4816450493877043e-06, "loss": 0.8452, "step": 395200 }, { "epoch": 5.51, "learning_rate": 2.4746792237283885e-06, "loss": 0.8349, "step": 395300 }, { "epoch": 5.51, "learning_rate": 2.467713398069073e-06, "loss": 0.8594, "step": 395400 }, { "epoch": 5.51, "learning_rate": 2.460747572409758e-06, "loss": 0.8357, "step": 395500 }, { "epoch": 5.51, "learning_rate": 2.453781746750442e-06, "loss": 0.8482, "step": 395600 }, { "epoch": 5.51, "learning_rate": 2.446815921091127e-06, "loss": 0.8132, "step": 395700 }, { "epoch": 5.51, "learning_rate": 2.439850095431812e-06, "loss": 0.8626, "step": 395800 }, { "epoch": 5.52, "learning_rate": 2.432884269772496e-06, "loss": 0.8244, "step": 395900 }, { "epoch": 5.52, "learning_rate": 2.425918444113181e-06, "loss": 0.8427, "step": 396000 }, { "epoch": 5.52, "learning_rate": 2.4189526184538656e-06, "loss": 0.8401, "step": 396100 }, { "epoch": 5.52, "learning_rate": 2.4120564510511433e-06, "loss": 0.8222, "step": 396200 }, { "epoch": 5.52, "learning_rate": 2.4050906253918275e-06, "loss": 0.845, "step": 396300 }, { "epoch": 5.52, "learning_rate": 2.3981247997325122e-06, "loss": 0.818, "step": 396400 }, { "epoch": 5.52, "learning_rate": 2.391158974073197e-06, "loss": 0.8338, "step": 396500 }, { "epoch": 5.53, "learning_rate": 2.3841931484138816e-06, "loss": 0.8357, "step": 396600 }, { "epoch": 5.53, "learning_rate": 2.3772273227545663e-06, "loss": 0.8255, "step": 396700 }, { "epoch": 5.53, "learning_rate": 2.370261497095251e-06, "loss": 0.8069, "step": 396800 }, { "epoch": 5.53, "learning_rate": 2.363295671435935e-06, "loss": 0.8002, "step": 396900 }, { "epoch": 5.53, "learning_rate": 2.35632984577662e-06, "loss": 0.8583, "step": 397000 }, { "epoch": 5.53, "learning_rate": 2.3493640201173046e-06, "loss": 0.8307, "step": 397100 }, { "epoch": 5.53, "learning_rate": 2.3423981944579893e-06, "loss": 0.8362, "step": 397200 }, { "epoch": 5.54, "learning_rate": 2.335432368798674e-06, "loss": 0.8156, "step": 397300 }, { "epoch": 5.54, "learning_rate": 2.3284665431393586e-06, "loss": 0.8103, "step": 397400 }, { "epoch": 5.54, "learning_rate": 2.321500717480043e-06, "loss": 0.8506, "step": 397500 }, { "epoch": 5.54, "learning_rate": 2.3145348918207276e-06, "loss": 0.8415, "step": 397600 }, { "epoch": 5.54, "learning_rate": 2.307569066161412e-06, "loss": 0.8264, "step": 397700 }, { "epoch": 5.54, "learning_rate": 2.300603240502097e-06, "loss": 0.8058, "step": 397800 }, { "epoch": 5.54, "learning_rate": 2.2936374148427816e-06, "loss": 0.8303, "step": 397900 }, { "epoch": 5.54, "learning_rate": 2.286671589183466e-06, "loss": 0.8256, "step": 398000 }, { "epoch": 5.55, "learning_rate": 2.2797057635241506e-06, "loss": 0.8057, "step": 398100 }, { "epoch": 5.55, "learning_rate": 2.2727399378648352e-06, "loss": 0.8375, "step": 398200 }, { "epoch": 5.55, "learning_rate": 2.2657741122055195e-06, "loss": 0.8279, "step": 398300 }, { "epoch": 5.55, "learning_rate": 2.258808286546204e-06, "loss": 0.8181, "step": 398400 }, { "epoch": 5.55, "learning_rate": 2.2518424608868893e-06, "loss": 0.8052, "step": 398500 }, { "epoch": 5.55, "learning_rate": 2.2448766352275736e-06, "loss": 0.8453, "step": 398600 }, { "epoch": 5.55, "learning_rate": 2.2379108095682582e-06, "loss": 0.8587, "step": 398700 }, { "epoch": 5.56, "learning_rate": 2.230944983908943e-06, "loss": 0.8429, "step": 398800 }, { "epoch": 5.56, "learning_rate": 2.223979158249627e-06, "loss": 0.8207, "step": 398900 }, { "epoch": 5.56, "learning_rate": 2.217082990846905e-06, "loss": 0.8515, "step": 399000 }, { "epoch": 5.56, "learning_rate": 2.2101171651875896e-06, "loss": 0.8403, "step": 399100 }, { "epoch": 5.56, "learning_rate": 2.2031513395282742e-06, "loss": 0.8369, "step": 399200 }, { "epoch": 5.56, "learning_rate": 2.196185513868959e-06, "loss": 0.841, "step": 399300 }, { "epoch": 5.56, "learning_rate": 2.1892196882096436e-06, "loss": 0.8074, "step": 399400 }, { "epoch": 5.57, "learning_rate": 2.1822538625503283e-06, "loss": 0.7971, "step": 399500 }, { "epoch": 5.57, "learning_rate": 2.1752880368910126e-06, "loss": 0.8242, "step": 399600 }, { "epoch": 5.57, "learning_rate": 2.1683222112316972e-06, "loss": 0.8233, "step": 399700 }, { "epoch": 5.57, "learning_rate": 2.161356385572382e-06, "loss": 0.8398, "step": 399800 }, { "epoch": 5.57, "learning_rate": 2.1543905599130666e-06, "loss": 0.8336, "step": 399900 }, { "epoch": 5.57, "learning_rate": 2.1474247342537513e-06, "loss": 0.8367, "step": 400000 }, { "epoch": 5.57, "learning_rate": 2.140458908594436e-06, "loss": 0.8471, "step": 400100 }, { "epoch": 5.58, "learning_rate": 2.1334930829351202e-06, "loss": 0.8388, "step": 400200 }, { "epoch": 5.58, "learning_rate": 2.126527257275805e-06, "loss": 0.8394, "step": 400300 }, { "epoch": 5.58, "learning_rate": 2.119561431616489e-06, "loss": 0.8561, "step": 400400 }, { "epoch": 5.58, "learning_rate": 2.1125956059571743e-06, "loss": 0.8279, "step": 400500 }, { "epoch": 5.58, "learning_rate": 2.105629780297859e-06, "loss": 0.818, "step": 400600 }, { "epoch": 5.58, "learning_rate": 2.0986639546385432e-06, "loss": 0.8218, "step": 400700 }, { "epoch": 5.58, "learning_rate": 2.091698128979228e-06, "loss": 0.831, "step": 400800 }, { "epoch": 5.59, "learning_rate": 2.0847323033199126e-06, "loss": 0.8279, "step": 400900 }, { "epoch": 5.59, "learning_rate": 2.077766477660597e-06, "loss": 0.8396, "step": 401000 }, { "epoch": 5.59, "learning_rate": 2.070800652001282e-06, "loss": 0.8516, "step": 401100 }, { "epoch": 5.59, "learning_rate": 2.0638348263419666e-06, "loss": 0.8415, "step": 401200 }, { "epoch": 5.59, "learning_rate": 2.056869000682651e-06, "loss": 0.8537, "step": 401300 }, { "epoch": 5.59, "learning_rate": 2.0499031750233356e-06, "loss": 0.8165, "step": 401400 }, { "epoch": 5.59, "learning_rate": 2.0429373493640203e-06, "loss": 0.8216, "step": 401500 }, { "epoch": 5.59, "learning_rate": 2.0359715237047045e-06, "loss": 0.8369, "step": 401600 }, { "epoch": 5.6, "learning_rate": 2.0290056980453896e-06, "loss": 0.8306, "step": 401700 }, { "epoch": 5.6, "learning_rate": 2.022039872386074e-06, "loss": 0.8369, "step": 401800 }, { "epoch": 5.6, "learning_rate": 2.0150740467267586e-06, "loss": 0.8312, "step": 401900 }, { "epoch": 5.6, "learning_rate": 2.0081082210674433e-06, "loss": 0.8204, "step": 402000 }, { "epoch": 5.6, "learning_rate": 2.0011423954081275e-06, "loss": 0.8472, "step": 402100 }, { "epoch": 5.6, "learning_rate": 1.994176569748812e-06, "loss": 0.8314, "step": 402200 }, { "epoch": 5.6, "learning_rate": 1.9872107440894973e-06, "loss": 0.8275, "step": 402300 }, { "epoch": 5.61, "learning_rate": 1.9802449184301816e-06, "loss": 0.8527, "step": 402400 }, { "epoch": 5.61, "learning_rate": 1.9733487510274597e-06, "loss": 0.8428, "step": 402500 }, { "epoch": 5.61, "learning_rate": 1.966382925368144e-06, "loss": 0.8339, "step": 402600 }, { "epoch": 5.61, "learning_rate": 1.9594170997088286e-06, "loss": 0.8574, "step": 402700 }, { "epoch": 5.61, "learning_rate": 1.9524512740495133e-06, "loss": 0.8299, "step": 402800 }, { "epoch": 5.61, "learning_rate": 1.9454854483901976e-06, "loss": 0.8344, "step": 402900 }, { "epoch": 5.61, "learning_rate": 1.9385196227308823e-06, "loss": 0.7946, "step": 403000 }, { "epoch": 5.62, "learning_rate": 1.931553797071567e-06, "loss": 0.806, "step": 403100 }, { "epoch": 5.62, "learning_rate": 1.9245879714122516e-06, "loss": 0.8634, "step": 403200 }, { "epoch": 5.62, "learning_rate": 1.9176221457529363e-06, "loss": 0.8452, "step": 403300 }, { "epoch": 5.62, "learning_rate": 1.9106563200936206e-06, "loss": 0.8458, "step": 403400 }, { "epoch": 5.62, "learning_rate": 1.9036904944343053e-06, "loss": 0.8422, "step": 403500 }, { "epoch": 5.62, "learning_rate": 1.8967246687749901e-06, "loss": 0.8609, "step": 403600 }, { "epoch": 5.62, "learning_rate": 1.8897588431156744e-06, "loss": 0.8105, "step": 403700 }, { "epoch": 5.63, "learning_rate": 1.882793017456359e-06, "loss": 0.8159, "step": 403800 }, { "epoch": 5.63, "learning_rate": 1.875827191797044e-06, "loss": 0.8604, "step": 403900 }, { "epoch": 5.63, "learning_rate": 1.8688613661377285e-06, "loss": 0.8309, "step": 404000 }, { "epoch": 5.63, "learning_rate": 1.861895540478413e-06, "loss": 0.8026, "step": 404100 }, { "epoch": 5.63, "learning_rate": 1.8549297148190976e-06, "loss": 0.8271, "step": 404200 }, { "epoch": 5.63, "learning_rate": 1.847963889159782e-06, "loss": 0.837, "step": 404300 }, { "epoch": 5.63, "learning_rate": 1.8409980635004668e-06, "loss": 0.824, "step": 404400 }, { "epoch": 5.64, "learning_rate": 1.8340322378411514e-06, "loss": 0.8557, "step": 404500 }, { "epoch": 5.64, "learning_rate": 1.827066412181836e-06, "loss": 0.8109, "step": 404600 }, { "epoch": 5.64, "learning_rate": 1.8201005865225206e-06, "loss": 0.8213, "step": 404700 }, { "epoch": 5.64, "learning_rate": 1.8131347608632053e-06, "loss": 0.8316, "step": 404800 }, { "epoch": 5.64, "learning_rate": 1.8061689352038898e-06, "loss": 0.8072, "step": 404900 }, { "epoch": 5.64, "learning_rate": 1.7992031095445742e-06, "loss": 0.8013, "step": 405000 }, { "epoch": 5.64, "learning_rate": 1.7923069421418521e-06, "loss": 0.8226, "step": 405100 }, { "epoch": 5.65, "learning_rate": 1.7853411164825366e-06, "loss": 0.8322, "step": 405200 }, { "epoch": 5.65, "learning_rate": 1.7783752908232215e-06, "loss": 0.8296, "step": 405300 }, { "epoch": 5.65, "learning_rate": 1.771409465163906e-06, "loss": 0.8525, "step": 405400 }, { "epoch": 5.65, "learning_rate": 1.7644436395045905e-06, "loss": 0.8294, "step": 405500 }, { "epoch": 5.65, "learning_rate": 1.7574778138452751e-06, "loss": 0.8088, "step": 405600 }, { "epoch": 5.65, "learning_rate": 1.7505119881859598e-06, "loss": 0.8109, "step": 405700 }, { "epoch": 5.65, "learning_rate": 1.7435461625266443e-06, "loss": 0.836, "step": 405800 }, { "epoch": 5.65, "learning_rate": 1.7365803368673288e-06, "loss": 0.8353, "step": 405900 }, { "epoch": 5.66, "learning_rate": 1.7296145112080137e-06, "loss": 0.8345, "step": 406000 }, { "epoch": 5.66, "learning_rate": 1.7226486855486981e-06, "loss": 0.8032, "step": 406100 }, { "epoch": 5.66, "learning_rate": 1.7156828598893826e-06, "loss": 0.8448, "step": 406200 }, { "epoch": 5.66, "learning_rate": 1.7087170342300673e-06, "loss": 0.8559, "step": 406300 }, { "epoch": 5.66, "learning_rate": 1.701751208570752e-06, "loss": 0.8094, "step": 406400 }, { "epoch": 5.66, "learning_rate": 1.6947853829114364e-06, "loss": 0.8585, "step": 406500 }, { "epoch": 5.66, "learning_rate": 1.6878195572521211e-06, "loss": 0.8252, "step": 406600 }, { "epoch": 5.67, "learning_rate": 1.6808537315928058e-06, "loss": 0.7963, "step": 406700 }, { "epoch": 5.67, "learning_rate": 1.6738879059334903e-06, "loss": 0.8484, "step": 406800 }, { "epoch": 5.67, "learning_rate": 1.666922080274175e-06, "loss": 0.8171, "step": 406900 }, { "epoch": 5.67, "learning_rate": 1.6599562546148594e-06, "loss": 0.8206, "step": 407000 }, { "epoch": 5.67, "learning_rate": 1.6529904289555441e-06, "loss": 0.8143, "step": 407100 }, { "epoch": 5.67, "learning_rate": 1.6460942615528218e-06, "loss": 0.8433, "step": 407200 }, { "epoch": 5.67, "learning_rate": 1.6391284358935065e-06, "loss": 0.8295, "step": 407300 }, { "epoch": 5.68, "learning_rate": 1.6321626102341912e-06, "loss": 0.8231, "step": 407400 }, { "epoch": 5.68, "learning_rate": 1.6251967845748757e-06, "loss": 0.8251, "step": 407500 }, { "epoch": 5.68, "learning_rate": 1.6182309589155601e-06, "loss": 0.8477, "step": 407600 }, { "epoch": 5.68, "learning_rate": 1.611265133256245e-06, "loss": 0.8329, "step": 407700 }, { "epoch": 5.68, "learning_rate": 1.6042993075969295e-06, "loss": 0.8176, "step": 407800 }, { "epoch": 5.68, "learning_rate": 1.597333481937614e-06, "loss": 0.8167, "step": 407900 }, { "epoch": 5.68, "learning_rate": 1.5903676562782989e-06, "loss": 0.8161, "step": 408000 }, { "epoch": 5.69, "learning_rate": 1.5834018306189833e-06, "loss": 0.805, "step": 408100 }, { "epoch": 5.69, "learning_rate": 1.5764360049596678e-06, "loss": 0.8254, "step": 408200 }, { "epoch": 5.69, "learning_rate": 1.5694701793003525e-06, "loss": 0.847, "step": 408300 }, { "epoch": 5.69, "learning_rate": 1.5625043536410372e-06, "loss": 0.7958, "step": 408400 }, { "epoch": 5.69, "learning_rate": 1.5555385279817216e-06, "loss": 0.8541, "step": 408500 }, { "epoch": 5.69, "learning_rate": 1.5485727023224063e-06, "loss": 0.8399, "step": 408600 }, { "epoch": 5.69, "learning_rate": 1.541606876663091e-06, "loss": 0.8413, "step": 408700 }, { "epoch": 5.7, "learning_rate": 1.5346410510037755e-06, "loss": 0.8071, "step": 408800 }, { "epoch": 5.7, "learning_rate": 1.5276752253444602e-06, "loss": 0.8483, "step": 408900 }, { "epoch": 5.7, "learning_rate": 1.5207093996851446e-06, "loss": 0.827, "step": 409000 }, { "epoch": 5.7, "learning_rate": 1.5137435740258293e-06, "loss": 0.8573, "step": 409100 }, { "epoch": 5.7, "learning_rate": 1.506777748366514e-06, "loss": 0.8162, "step": 409200 }, { "epoch": 5.7, "learning_rate": 1.4998119227071985e-06, "loss": 0.8183, "step": 409300 }, { "epoch": 5.7, "learning_rate": 1.4928460970478832e-06, "loss": 0.847, "step": 409400 }, { "epoch": 5.71, "learning_rate": 1.4858802713885678e-06, "loss": 0.8509, "step": 409500 }, { "epoch": 5.71, "learning_rate": 1.4789144457292523e-06, "loss": 0.8433, "step": 409600 }, { "epoch": 5.71, "learning_rate": 1.4719486200699368e-06, "loss": 0.8343, "step": 409700 }, { "epoch": 5.71, "learning_rate": 1.4649827944106217e-06, "loss": 0.8438, "step": 409800 }, { "epoch": 5.71, "learning_rate": 1.4580169687513061e-06, "loss": 0.8591, "step": 409900 }, { "epoch": 5.71, "learning_rate": 1.4510511430919906e-06, "loss": 0.8236, "step": 410000 }, { "epoch": 5.71, "learning_rate": 1.4440853174326755e-06, "loss": 0.8381, "step": 410100 }, { "epoch": 5.71, "learning_rate": 1.43711949177336e-06, "loss": 0.8261, "step": 410200 }, { "epoch": 5.72, "learning_rate": 1.4301536661140445e-06, "loss": 0.8465, "step": 410300 }, { "epoch": 5.72, "learning_rate": 1.4231878404547291e-06, "loss": 0.8374, "step": 410400 }, { "epoch": 5.72, "learning_rate": 1.4162220147954138e-06, "loss": 0.8262, "step": 410500 }, { "epoch": 5.72, "learning_rate": 1.4092561891360983e-06, "loss": 0.8437, "step": 410600 }, { "epoch": 5.72, "learning_rate": 1.402290363476783e-06, "loss": 0.801, "step": 410700 }, { "epoch": 5.72, "learning_rate": 1.3953245378174677e-06, "loss": 0.8187, "step": 410800 }, { "epoch": 5.72, "learning_rate": 1.3884283704147454e-06, "loss": 0.827, "step": 410900 }, { "epoch": 5.73, "learning_rate": 1.3814625447554298e-06, "loss": 0.816, "step": 411000 }, { "epoch": 5.73, "learning_rate": 1.3744967190961145e-06, "loss": 0.8482, "step": 411100 }, { "epoch": 5.73, "learning_rate": 1.367530893436799e-06, "loss": 0.8296, "step": 411200 }, { "epoch": 5.73, "learning_rate": 1.3605650677774837e-06, "loss": 0.8425, "step": 411300 }, { "epoch": 5.73, "learning_rate": 1.3535992421181683e-06, "loss": 0.8138, "step": 411400 }, { "epoch": 5.73, "learning_rate": 1.3466334164588528e-06, "loss": 0.8248, "step": 411500 }, { "epoch": 5.73, "learning_rate": 1.3396675907995375e-06, "loss": 0.8417, "step": 411600 }, { "epoch": 5.74, "learning_rate": 1.332701765140222e-06, "loss": 0.845, "step": 411700 }, { "epoch": 5.74, "learning_rate": 1.3257359394809067e-06, "loss": 0.8463, "step": 411800 }, { "epoch": 5.74, "learning_rate": 1.3187701138215913e-06, "loss": 0.8482, "step": 411900 }, { "epoch": 5.74, "learning_rate": 1.3118042881622758e-06, "loss": 0.8133, "step": 412000 }, { "epoch": 5.74, "learning_rate": 1.3048384625029605e-06, "loss": 0.8457, "step": 412100 }, { "epoch": 5.74, "learning_rate": 1.2978726368436452e-06, "loss": 0.7992, "step": 412200 }, { "epoch": 5.74, "learning_rate": 1.2909068111843297e-06, "loss": 0.8254, "step": 412300 }, { "epoch": 5.75, "learning_rate": 1.2839409855250141e-06, "loss": 0.8398, "step": 412400 }, { "epoch": 5.75, "learning_rate": 1.276975159865699e-06, "loss": 0.8129, "step": 412500 }, { "epoch": 5.75, "learning_rate": 1.2700093342063835e-06, "loss": 0.8211, "step": 412600 }, { "epoch": 5.75, "learning_rate": 1.263043508547068e-06, "loss": 0.8203, "step": 412700 }, { "epoch": 5.75, "learning_rate": 1.2560776828877529e-06, "loss": 0.8585, "step": 412800 }, { "epoch": 5.75, "learning_rate": 1.2491815154850303e-06, "loss": 0.8181, "step": 412900 }, { "epoch": 5.75, "learning_rate": 1.242215689825715e-06, "loss": 0.8559, "step": 413000 }, { "epoch": 5.76, "learning_rate": 1.2352498641663997e-06, "loss": 0.8108, "step": 413100 }, { "epoch": 5.76, "learning_rate": 1.2282840385070842e-06, "loss": 0.8364, "step": 413200 }, { "epoch": 5.76, "learning_rate": 1.2213182128477689e-06, "loss": 0.8198, "step": 413300 }, { "epoch": 5.76, "learning_rate": 1.2143523871884535e-06, "loss": 0.8501, "step": 413400 }, { "epoch": 5.76, "learning_rate": 1.207386561529138e-06, "loss": 0.8246, "step": 413500 }, { "epoch": 5.76, "learning_rate": 1.2004207358698227e-06, "loss": 0.79, "step": 413600 }, { "epoch": 5.76, "learning_rate": 1.1934549102105074e-06, "loss": 0.8225, "step": 413700 }, { "epoch": 5.76, "learning_rate": 1.1864890845511919e-06, "loss": 0.8366, "step": 413800 }, { "epoch": 5.77, "learning_rate": 1.1795232588918765e-06, "loss": 0.8177, "step": 413900 }, { "epoch": 5.77, "learning_rate": 1.172557433232561e-06, "loss": 0.8278, "step": 414000 }, { "epoch": 5.77, "learning_rate": 1.1655916075732457e-06, "loss": 0.8216, "step": 414100 }, { "epoch": 5.77, "learning_rate": 1.1586257819139304e-06, "loss": 0.7972, "step": 414200 }, { "epoch": 5.77, "learning_rate": 1.1516599562546149e-06, "loss": 0.8423, "step": 414300 }, { "epoch": 5.77, "learning_rate": 1.1446941305952995e-06, "loss": 0.8451, "step": 414400 }, { "epoch": 5.77, "learning_rate": 1.1377283049359842e-06, "loss": 0.8252, "step": 414500 }, { "epoch": 5.78, "learning_rate": 1.1307624792766687e-06, "loss": 0.8351, "step": 414600 }, { "epoch": 5.78, "learning_rate": 1.1237966536173532e-06, "loss": 0.854, "step": 414700 }, { "epoch": 5.78, "learning_rate": 1.116830827958038e-06, "loss": 0.8025, "step": 414800 }, { "epoch": 5.78, "learning_rate": 1.1098650022987225e-06, "loss": 0.8122, "step": 414900 }, { "epoch": 5.78, "learning_rate": 1.102899176639407e-06, "loss": 0.8385, "step": 415000 }, { "epoch": 5.78, "learning_rate": 1.096003009236685e-06, "loss": 0.8497, "step": 415100 }, { "epoch": 5.78, "learning_rate": 1.0890371835773694e-06, "loss": 0.8524, "step": 415200 }, { "epoch": 5.79, "learning_rate": 1.082071357918054e-06, "loss": 0.8413, "step": 415300 }, { "epoch": 5.79, "learning_rate": 1.0751055322587387e-06, "loss": 0.8408, "step": 415400 }, { "epoch": 5.79, "learning_rate": 1.0681397065994232e-06, "loss": 0.8346, "step": 415500 }, { "epoch": 5.79, "learning_rate": 1.061173880940108e-06, "loss": 0.8443, "step": 415600 }, { "epoch": 5.79, "learning_rate": 1.0542080552807926e-06, "loss": 0.8039, "step": 415700 }, { "epoch": 5.79, "learning_rate": 1.047242229621477e-06, "loss": 0.8309, "step": 415800 }, { "epoch": 5.79, "learning_rate": 1.0402764039621617e-06, "loss": 0.8151, "step": 415900 }, { "epoch": 5.8, "learning_rate": 1.0333105783028462e-06, "loss": 0.8235, "step": 416000 }, { "epoch": 5.8, "learning_rate": 1.026344752643531e-06, "loss": 0.8586, "step": 416100 }, { "epoch": 5.8, "learning_rate": 1.0193789269842156e-06, "loss": 0.8187, "step": 416200 }, { "epoch": 5.8, "learning_rate": 1.0124131013249e-06, "loss": 0.8205, "step": 416300 }, { "epoch": 5.8, "learning_rate": 1.0054472756655847e-06, "loss": 0.8414, "step": 416400 }, { "epoch": 5.8, "learning_rate": 9.984814500062692e-07, "loss": 0.8385, "step": 416500 }, { "epoch": 5.8, "learning_rate": 9.915156243469539e-07, "loss": 0.8484, "step": 416600 }, { "epoch": 5.81, "learning_rate": 9.845497986876384e-07, "loss": 0.8357, "step": 416700 }, { "epoch": 5.81, "learning_rate": 9.77583973028323e-07, "loss": 0.8065, "step": 416800 }, { "epoch": 5.81, "learning_rate": 9.706181473690077e-07, "loss": 0.8406, "step": 416900 }, { "epoch": 5.81, "learning_rate": 9.636523217096922e-07, "loss": 0.8033, "step": 417000 }, { "epoch": 5.81, "learning_rate": 9.566864960503769e-07, "loss": 0.7802, "step": 417100 }, { "epoch": 5.81, "learning_rate": 9.497206703910616e-07, "loss": 0.8161, "step": 417200 }, { "epoch": 5.81, "learning_rate": 9.42754844731746e-07, "loss": 0.8294, "step": 417300 }, { "epoch": 5.82, "learning_rate": 9.357890190724307e-07, "loss": 0.8428, "step": 417400 }, { "epoch": 5.82, "learning_rate": 9.288231934131153e-07, "loss": 0.8358, "step": 417500 }, { "epoch": 5.82, "learning_rate": 9.218573677537999e-07, "loss": 0.8485, "step": 417600 }, { "epoch": 5.82, "learning_rate": 9.148915420944845e-07, "loss": 0.8188, "step": 417700 }, { "epoch": 5.82, "learning_rate": 9.07925716435169e-07, "loss": 0.8022, "step": 417800 }, { "epoch": 5.82, "learning_rate": 9.009598907758537e-07, "loss": 0.8418, "step": 417900 }, { "epoch": 5.82, "learning_rate": 8.939940651165383e-07, "loss": 0.8339, "step": 418000 }, { "epoch": 5.82, "learning_rate": 8.870282394572229e-07, "loss": 0.8511, "step": 418100 }, { "epoch": 5.83, "learning_rate": 8.800624137979074e-07, "loss": 0.8283, "step": 418200 }, { "epoch": 5.83, "learning_rate": 8.730965881385921e-07, "loss": 0.7943, "step": 418300 }, { "epoch": 5.83, "learning_rate": 8.661307624792766e-07, "loss": 0.8262, "step": 418400 }, { "epoch": 5.83, "learning_rate": 8.591649368199613e-07, "loss": 0.8273, "step": 418500 }, { "epoch": 5.83, "learning_rate": 8.52268769417239e-07, "loss": 0.8275, "step": 418600 }, { "epoch": 5.83, "learning_rate": 8.453029437579237e-07, "loss": 0.8275, "step": 418700 }, { "epoch": 5.83, "learning_rate": 8.383371180986082e-07, "loss": 0.857, "step": 418800 }, { "epoch": 5.84, "learning_rate": 8.313712924392928e-07, "loss": 0.8198, "step": 418900 }, { "epoch": 5.84, "learning_rate": 8.244054667799775e-07, "loss": 0.8198, "step": 419000 }, { "epoch": 5.84, "learning_rate": 8.174396411206621e-07, "loss": 0.8533, "step": 419100 }, { "epoch": 5.84, "learning_rate": 8.104738154613467e-07, "loss": 0.8134, "step": 419200 }, { "epoch": 5.84, "learning_rate": 8.035079898020312e-07, "loss": 0.837, "step": 419300 }, { "epoch": 5.84, "learning_rate": 7.965421641427159e-07, "loss": 0.8556, "step": 419400 }, { "epoch": 5.84, "learning_rate": 7.895763384834004e-07, "loss": 0.8463, "step": 419500 }, { "epoch": 5.85, "learning_rate": 7.826105128240851e-07, "loss": 0.856, "step": 419600 }, { "epoch": 5.85, "learning_rate": 7.756446871647698e-07, "loss": 0.8344, "step": 419700 }, { "epoch": 5.85, "learning_rate": 7.686788615054542e-07, "loss": 0.8052, "step": 419800 }, { "epoch": 5.85, "learning_rate": 7.617130358461389e-07, "loss": 0.8433, "step": 419900 }, { "epoch": 5.85, "learning_rate": 7.547472101868235e-07, "loss": 0.8475, "step": 420000 }, { "epoch": 5.85, "learning_rate": 7.477813845275081e-07, "loss": 0.8058, "step": 420100 }, { "epoch": 5.85, "learning_rate": 7.408155588681926e-07, "loss": 0.8244, "step": 420200 }, { "epoch": 5.86, "learning_rate": 7.338497332088772e-07, "loss": 0.8162, "step": 420300 }, { "epoch": 5.86, "learning_rate": 7.268839075495619e-07, "loss": 0.8354, "step": 420400 }, { "epoch": 5.86, "learning_rate": 7.199180818902465e-07, "loss": 0.8327, "step": 420500 }, { "epoch": 5.86, "learning_rate": 7.129522562309311e-07, "loss": 0.8383, "step": 420600 }, { "epoch": 5.86, "learning_rate": 7.059864305716156e-07, "loss": 0.8278, "step": 420700 }, { "epoch": 5.86, "learning_rate": 6.990206049123003e-07, "loss": 0.8357, "step": 420800 }, { "epoch": 5.86, "learning_rate": 6.920547792529848e-07, "loss": 0.8503, "step": 420900 }, { "epoch": 5.87, "learning_rate": 6.850889535936695e-07, "loss": 0.8103, "step": 421000 }, { "epoch": 5.87, "learning_rate": 6.781231279343542e-07, "loss": 0.8206, "step": 421100 }, { "epoch": 5.87, "learning_rate": 6.711573022750386e-07, "loss": 0.8256, "step": 421200 }, { "epoch": 5.87, "learning_rate": 6.641914766157233e-07, "loss": 0.8429, "step": 421300 }, { "epoch": 5.87, "learning_rate": 6.572256509564079e-07, "loss": 0.8381, "step": 421400 }, { "epoch": 5.87, "learning_rate": 6.502598252970925e-07, "loss": 0.8227, "step": 421500 }, { "epoch": 5.87, "learning_rate": 6.43293999637777e-07, "loss": 0.8342, "step": 421600 }, { "epoch": 5.87, "learning_rate": 6.363281739784617e-07, "loss": 0.8107, "step": 421700 }, { "epoch": 5.88, "learning_rate": 6.293623483191463e-07, "loss": 0.8456, "step": 421800 }, { "epoch": 5.88, "learning_rate": 6.223965226598309e-07, "loss": 0.8257, "step": 421900 }, { "epoch": 5.88, "learning_rate": 6.154306970005156e-07, "loss": 0.8303, "step": 422000 }, { "epoch": 5.88, "learning_rate": 6.084648713412e-07, "loss": 0.8303, "step": 422100 }, { "epoch": 5.88, "learning_rate": 6.014990456818847e-07, "loss": 0.8193, "step": 422200 }, { "epoch": 5.88, "learning_rate": 5.946028782791624e-07, "loss": 0.8381, "step": 422300 }, { "epoch": 5.88, "learning_rate": 5.876370526198471e-07, "loss": 0.8263, "step": 422400 }, { "epoch": 5.89, "learning_rate": 5.806712269605317e-07, "loss": 0.8478, "step": 422500 }, { "epoch": 5.89, "learning_rate": 5.737054013012163e-07, "loss": 0.8227, "step": 422600 }, { "epoch": 5.89, "learning_rate": 5.667395756419008e-07, "loss": 0.8371, "step": 422700 }, { "epoch": 5.89, "learning_rate": 5.597737499825854e-07, "loss": 0.8067, "step": 422800 }, { "epoch": 5.89, "learning_rate": 5.5280792432327e-07, "loss": 0.8135, "step": 422900 }, { "epoch": 5.89, "learning_rate": 5.458420986639547e-07, "loss": 0.8414, "step": 423000 }, { "epoch": 5.89, "learning_rate": 5.388762730046393e-07, "loss": 0.8406, "step": 423100 }, { "epoch": 5.9, "learning_rate": 5.319104473453238e-07, "loss": 0.8134, "step": 423200 }, { "epoch": 5.9, "learning_rate": 5.249446216860085e-07, "loss": 0.8446, "step": 423300 }, { "epoch": 5.9, "learning_rate": 5.17978796026693e-07, "loss": 0.8345, "step": 423400 }, { "epoch": 5.9, "learning_rate": 5.110129703673777e-07, "loss": 0.8047, "step": 423500 }, { "epoch": 5.9, "learning_rate": 5.040471447080622e-07, "loss": 0.8492, "step": 423600 }, { "epoch": 5.9, "learning_rate": 4.970813190487468e-07, "loss": 0.8023, "step": 423700 }, { "epoch": 5.9, "learning_rate": 4.901154933894315e-07, "loss": 0.8161, "step": 423800 }, { "epoch": 5.91, "learning_rate": 4.831496677301161e-07, "loss": 0.8424, "step": 423900 }, { "epoch": 5.91, "learning_rate": 4.761838420708007e-07, "loss": 0.836, "step": 424000 }, { "epoch": 5.91, "learning_rate": 4.6921801641148524e-07, "loss": 0.8129, "step": 424100 }, { "epoch": 5.91, "learning_rate": 4.6225219075216987e-07, "loss": 0.8521, "step": 424200 }, { "epoch": 5.91, "learning_rate": 4.552863650928545e-07, "loss": 0.8312, "step": 424300 }, { "epoch": 5.91, "learning_rate": 4.483205394335391e-07, "loss": 0.8064, "step": 424400 }, { "epoch": 5.91, "learning_rate": 4.4135471377422365e-07, "loss": 0.8389, "step": 424500 }, { "epoch": 5.92, "learning_rate": 4.3438888811490823e-07, "loss": 0.8164, "step": 424600 }, { "epoch": 5.92, "learning_rate": 4.2742306245559286e-07, "loss": 0.8236, "step": 424700 }, { "epoch": 5.92, "learning_rate": 4.204572367962775e-07, "loss": 0.8056, "step": 424800 }, { "epoch": 5.92, "learning_rate": 4.1349141113696207e-07, "loss": 0.8357, "step": 424900 }, { "epoch": 5.92, "learning_rate": 4.065255854776467e-07, "loss": 0.8209, "step": 425000 }, { "epoch": 5.92, "learning_rate": 3.995597598183313e-07, "loss": 0.8222, "step": 425100 }, { "epoch": 5.92, "learning_rate": 3.9259393415901585e-07, "loss": 0.8028, "step": 425200 }, { "epoch": 5.93, "learning_rate": 3.856281084997005e-07, "loss": 0.8125, "step": 425300 }, { "epoch": 5.93, "learning_rate": 3.7873194109697824e-07, "loss": 0.8536, "step": 425400 }, { "epoch": 5.93, "learning_rate": 3.7176611543766287e-07, "loss": 0.8474, "step": 425500 }, { "epoch": 5.93, "learning_rate": 3.6480028977834744e-07, "loss": 0.8179, "step": 425600 }, { "epoch": 5.93, "learning_rate": 3.57834464119032e-07, "loss": 0.8267, "step": 425700 }, { "epoch": 5.93, "learning_rate": 3.5086863845971665e-07, "loss": 0.8247, "step": 425800 }, { "epoch": 5.93, "learning_rate": 3.4390281280040123e-07, "loss": 0.8374, "step": 425900 }, { "epoch": 5.93, "learning_rate": 3.3693698714108586e-07, "loss": 0.7893, "step": 426000 }, { "epoch": 5.94, "learning_rate": 3.2997116148177044e-07, "loss": 0.8325, "step": 426100 }, { "epoch": 5.94, "learning_rate": 3.2300533582245507e-07, "loss": 0.8137, "step": 426200 }, { "epoch": 5.94, "learning_rate": 3.1603951016313964e-07, "loss": 0.8398, "step": 426300 }, { "epoch": 5.94, "learning_rate": 3.090736845038242e-07, "loss": 0.8326, "step": 426400 }, { "epoch": 5.94, "learning_rate": 3.0210785884450885e-07, "loss": 0.8341, "step": 426500 }, { "epoch": 5.94, "learning_rate": 2.9514203318519343e-07, "loss": 0.8363, "step": 426600 }, { "epoch": 5.94, "learning_rate": 2.8817620752587806e-07, "loss": 0.825, "step": 426700 }, { "epoch": 5.95, "learning_rate": 2.812103818665627e-07, "loss": 0.8365, "step": 426800 }, { "epoch": 5.95, "learning_rate": 2.7424455620724727e-07, "loss": 0.8254, "step": 426900 }, { "epoch": 5.95, "learning_rate": 2.6727873054793184e-07, "loss": 0.8007, "step": 427000 }, { "epoch": 5.95, "learning_rate": 2.603129048886165e-07, "loss": 0.8189, "step": 427100 }, { "epoch": 5.95, "learning_rate": 2.5334707922930105e-07, "loss": 0.8243, "step": 427200 }, { "epoch": 5.95, "learning_rate": 2.4638125356998563e-07, "loss": 0.8188, "step": 427300 }, { "epoch": 5.95, "learning_rate": 2.3941542791067026e-07, "loss": 0.8351, "step": 427400 }, { "epoch": 5.96, "learning_rate": 2.32519260507948e-07, "loss": 0.8359, "step": 427500 }, { "epoch": 5.96, "learning_rate": 2.2555343484863262e-07, "loss": 0.8314, "step": 427600 }, { "epoch": 5.96, "learning_rate": 2.1858760918931722e-07, "loss": 0.802, "step": 427700 }, { "epoch": 5.96, "learning_rate": 2.1162178353000182e-07, "loss": 0.8333, "step": 427800 }, { "epoch": 5.96, "learning_rate": 2.0465595787068643e-07, "loss": 0.8329, "step": 427900 }, { "epoch": 5.96, "learning_rate": 1.97690132211371e-07, "loss": 0.805, "step": 428000 }, { "epoch": 5.96, "learning_rate": 1.907243065520556e-07, "loss": 0.8559, "step": 428100 }, { "epoch": 5.97, "learning_rate": 1.8375848089274024e-07, "loss": 0.836, "step": 428200 }, { "epoch": 5.97, "learning_rate": 1.7679265523342482e-07, "loss": 0.8504, "step": 428300 }, { "epoch": 5.97, "learning_rate": 1.6982682957410942e-07, "loss": 0.8513, "step": 428400 }, { "epoch": 5.97, "learning_rate": 1.6286100391479402e-07, "loss": 0.8228, "step": 428500 }, { "epoch": 5.97, "learning_rate": 1.5589517825547863e-07, "loss": 0.8337, "step": 428600 }, { "epoch": 5.97, "learning_rate": 1.4892935259616323e-07, "loss": 0.8333, "step": 428700 }, { "epoch": 5.97, "learning_rate": 1.419635269368478e-07, "loss": 0.816, "step": 428800 }, { "epoch": 5.98, "learning_rate": 1.3499770127753244e-07, "loss": 0.8424, "step": 428900 }, { "epoch": 5.98, "learning_rate": 1.2803187561821702e-07, "loss": 0.8313, "step": 429000 }, { "epoch": 5.98, "learning_rate": 1.2106604995890162e-07, "loss": 0.8185, "step": 429100 }, { "epoch": 5.98, "learning_rate": 1.1410022429958622e-07, "loss": 0.811, "step": 429200 }, { "epoch": 5.98, "learning_rate": 1.0713439864027084e-07, "loss": 0.8382, "step": 429300 }, { "epoch": 5.98, "learning_rate": 1.0016857298095543e-07, "loss": 0.8313, "step": 429400 }, { "epoch": 5.98, "learning_rate": 9.320274732164004e-08, "loss": 0.8064, "step": 429500 }, { "epoch": 5.99, "learning_rate": 8.623692166232464e-08, "loss": 0.8072, "step": 429600 }, { "epoch": 5.99, "learning_rate": 7.927109600300923e-08, "loss": 0.8101, "step": 429700 }, { "epoch": 5.99, "learning_rate": 7.230527034369385e-08, "loss": 0.8358, "step": 429800 }, { "epoch": 5.99, "learning_rate": 6.533944468437844e-08, "loss": 0.8318, "step": 429900 }, { "epoch": 5.99, "learning_rate": 5.837361902506304e-08, "loss": 0.8202, "step": 430000 }, { "epoch": 5.99, "learning_rate": 5.1407793365747645e-08, "loss": 0.8055, "step": 430100 }, { "epoch": 5.99, "learning_rate": 4.444196770643224e-08, "loss": 0.8096, "step": 430200 }, { "epoch": 5.99, "learning_rate": 3.7476142047116846e-08, "loss": 0.8211, "step": 430300 }, { "epoch": 6.0, "learning_rate": 3.051031638780144e-08, "loss": 0.8008, "step": 430400 }, { "epoch": 6.0, "learning_rate": 2.3544490728486047e-08, "loss": 0.8318, "step": 430500 }, { "epoch": 6.0, "learning_rate": 1.657866506917065e-08, "loss": 0.8358, "step": 430600 }, { "epoch": 6.0, "eval_gen_len": 20.0, "eval_loss": 1.199650764465332, "eval_rouge1": 12.5153, "eval_rouge2": 3.778, "eval_rougeL": 12.0382, "eval_rougeLsum": 12.1332, "eval_runtime": 1506.4096, "eval_samples_per_second": 8.874, "eval_steps_per_second": 2.219, "step": 430674 }, { "epoch": 6.0, "step": 430674, "total_flos": 4.0669778872979374e+18, "train_loss": 1.0701684290170863, "train_runtime": 175298.0292, "train_samples_per_second": 9.827, "train_steps_per_second": 2.457 } ], "max_steps": 430674, "num_train_epochs": 6, "total_flos": 4.0669778872979374e+18, "trial_name": null, "trial_params": null }