{ "best_metric": null, "best_model_checkpoint": null, "epoch": 6.0, "global_step": 1254162, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.1, "learning_rate": 1.0000000000000002e-06, "loss": 11.2242, "step": 500 }, { "epoch": 0.2, "learning_rate": 2.0000000000000003e-06, "loss": 10.4452, "step": 1000 }, { "epoch": 0.3, "learning_rate": 3e-06, "loss": 10.0268, "step": 1500 }, { "epoch": 0.4, "learning_rate": 4.000000000000001e-06, "loss": 9.5717, "step": 2000 }, { "epoch": 0.5, "learning_rate": 5e-06, "loss": 9.1643, "step": 2500 }, { "epoch": 0.6, "learning_rate": 6e-06, "loss": 8.8893, "step": 3000 }, { "epoch": 0.7, "learning_rate": 7e-06, "loss": 8.7069, "step": 3500 }, { "epoch": 0.8, "learning_rate": 8.000000000000001e-06, "loss": 8.5915, "step": 4000 }, { "epoch": 0.9, "learning_rate": 9e-06, "loss": 8.5111, "step": 4500 }, { "epoch": 1.0, "learning_rate": 1e-05, "loss": 8.4616, "step": 5000 }, { "epoch": 1.1, "learning_rate": 9.666844349680172e-06, "loss": 8.4, "step": 5500 }, { "epoch": 1.2, "learning_rate": 9.333688699360341e-06, "loss": 8.3691, "step": 6000 }, { "epoch": 1.3, "learning_rate": 9.000533049040512e-06, "loss": 8.3255, "step": 6500 }, { "epoch": 1.4, "learning_rate": 8.667377398720683e-06, "loss": 8.2952, "step": 7000 }, { "epoch": 1.5, "learning_rate": 8.334221748400854e-06, "loss": 8.272, "step": 7500 }, { "epoch": 1.6, "learning_rate": 8.001066098081024e-06, "loss": 8.2567, "step": 8000 }, { "epoch": 1.7, "learning_rate": 7.667910447761195e-06, "loss": 8.2145, "step": 8500 }, { "epoch": 1.8, "learning_rate": 7.334754797441366e-06, "loss": 8.2102, "step": 9000 }, { "epoch": 1.9, "learning_rate": 7.001599147121536e-06, "loss": 8.172, "step": 9500 }, { "epoch": 2.0, "learning_rate": 6.668443496801706e-06, "loss": 8.1503, "step": 10000 }, { "epoch": 2.1, "learning_rate": 6.3352878464818765e-06, "loss": 8.1389, "step": 10500 }, { "epoch": 2.2, "learning_rate": 6.002132196162047e-06, "loss": 8.13, "step": 11000 }, { "epoch": 2.3, "learning_rate": 5.668976545842218e-06, "loss": 8.1228, "step": 11500 }, { "epoch": 2.4, "learning_rate": 5.335820895522389e-06, "loss": 8.1019, "step": 12000 }, { "epoch": 2.5, "learning_rate": 5.002665245202559e-06, "loss": 8.0855, "step": 12500 }, { "epoch": 2.6, "learning_rate": 4.669509594882729e-06, "loss": 8.0685, "step": 13000 }, { "epoch": 2.7, "learning_rate": 4.3363539445629e-06, "loss": 8.0593, "step": 13500 }, { "epoch": 2.8, "learning_rate": 4.0031982942430705e-06, "loss": 8.0628, "step": 14000 }, { "epoch": 2.9, "learning_rate": 3.670042643923241e-06, "loss": 8.051, "step": 14500 }, { "epoch": 3.0, "learning_rate": 3.336886993603412e-06, "loss": 8.0213, "step": 15000 }, { "epoch": 3.1, "learning_rate": 3.0037313432835824e-06, "loss": 8.0247, "step": 15500 }, { "epoch": 3.2, "learning_rate": 2.6705756929637526e-06, "loss": 8.0126, "step": 16000 }, { "epoch": 3.3, "learning_rate": 2.3374200426439233e-06, "loss": 8.0008, "step": 16500 }, { "epoch": 3.4, "learning_rate": 2.004264392324094e-06, "loss": 7.9928, "step": 17000 }, { "epoch": 3.5, "learning_rate": 1.6711087420042645e-06, "loss": 8.0083, "step": 17500 }, { "epoch": 3.6, "learning_rate": 1.3379530916844352e-06, "loss": 8.0019, "step": 18000 }, { "epoch": 3.7, "learning_rate": 1.0047974413646058e-06, "loss": 7.9937, "step": 18500 }, { "epoch": 3.8, "learning_rate": 6.716417910447762e-07, "loss": 7.9982, "step": 19000 }, { "epoch": 3.9, "learning_rate": 3.3848614072494674e-07, "loss": 7.998, "step": 19500 }, { "epoch": 4.0, "learning_rate": 5.330490405117271e-09, "loss": 7.9901, "step": 20000 }, { "epoch": 4.0, "step": 20008, "total_flos": 109772945935441920, "train_runtime": 13759.1079, "train_samples_per_second": 1.454 }, { "epoch": 0.09, "learning_rate": 9.84e-07, "loss": 8.0645, "step": 20500 }, { "epoch": 0.09, "learning_rate": 1.984e-06, "loss": 8.0449, "step": 21000 }, { "epoch": 0.09, "learning_rate": 2.984e-06, "loss": 8.0578, "step": 21500 }, { "epoch": 0.1, "learning_rate": 3.984e-06, "loss": 8.0419, "step": 22000 }, { "epoch": 0.1, "learning_rate": 4.984000000000001e-06, "loss": 8.0566, "step": 22500 }, { "epoch": 0.1, "learning_rate": 5.984000000000001e-06, "loss": 8.0375, "step": 23000 }, { "epoch": 0.1, "learning_rate": 6.984e-06, "loss": 8.035, "step": 23500 }, { "epoch": 0.11, "learning_rate": 7.984e-06, "loss": 8.0378, "step": 24000 }, { "epoch": 0.11, "learning_rate": 8.984e-06, "loss": 8.0114, "step": 24500 }, { "epoch": 0.11, "learning_rate": 9.984e-06, "loss": 8.0101, "step": 25000 }, { "epoch": 0.11, "learning_rate": 9.994587673095457e-06, "loss": 7.998, "step": 25500 }, { "epoch": 0.11, "learning_rate": 9.989087340875389e-06, "loss": 7.9806, "step": 26000 }, { "epoch": 0.12, "learning_rate": 9.983587008655323e-06, "loss": 7.9583, "step": 26500 }, { "epoch": 0.12, "learning_rate": 9.978086676435259e-06, "loss": 7.9416, "step": 27000 }, { "epoch": 0.12, "learning_rate": 9.972586344215191e-06, "loss": 7.933, "step": 27500 }, { "epoch": 0.12, "learning_rate": 9.967086011995125e-06, "loss": 7.9444, "step": 28000 }, { "epoch": 0.12, "learning_rate": 9.961585679775059e-06, "loss": 7.9236, "step": 28500 }, { "epoch": 0.13, "learning_rate": 9.956085347554993e-06, "loss": 7.8992, "step": 29000 }, { "epoch": 0.13, "learning_rate": 9.950585015334927e-06, "loss": 7.9036, "step": 29500 }, { "epoch": 0.13, "learning_rate": 9.94508468311486e-06, "loss": 7.8702, "step": 30000 }, { "epoch": 0.13, "learning_rate": 9.939584350894796e-06, "loss": 7.897, "step": 30500 }, { "epoch": 0.14, "learning_rate": 9.934084018674728e-06, "loss": 7.8663, "step": 31000 }, { "epoch": 0.14, "learning_rate": 9.928583686454662e-06, "loss": 7.8587, "step": 31500 }, { "epoch": 0.14, "learning_rate": 9.923083354234596e-06, "loss": 7.8413, "step": 32000 }, { "epoch": 0.14, "learning_rate": 9.91758302201453e-06, "loss": 7.8537, "step": 32500 }, { "epoch": 0.14, "learning_rate": 9.912082689794464e-06, "loss": 7.8288, "step": 33000 }, { "epoch": 0.15, "learning_rate": 9.906582357574398e-06, "loss": 7.8265, "step": 33500 }, { "epoch": 0.15, "learning_rate": 9.901082025354332e-06, "loss": 7.828, "step": 34000 }, { "epoch": 0.15, "learning_rate": 9.895581693134266e-06, "loss": 7.8077, "step": 34500 }, { "epoch": 0.15, "learning_rate": 9.8900813609142e-06, "loss": 7.8088, "step": 35000 }, { "epoch": 0.16, "learning_rate": 9.884581028694134e-06, "loss": 7.7975, "step": 35500 }, { "epoch": 0.16, "learning_rate": 9.879080696474068e-06, "loss": 7.7805, "step": 36000 }, { "epoch": 0.16, "learning_rate": 9.873580364254002e-06, "loss": 7.7865, "step": 36500 }, { "epoch": 0.16, "learning_rate": 9.868080032033936e-06, "loss": 7.7577, "step": 37000 }, { "epoch": 0.16, "learning_rate": 9.86257969981387e-06, "loss": 7.7753, "step": 37500 }, { "epoch": 0.17, "learning_rate": 9.857079367593803e-06, "loss": 7.7512, "step": 38000 }, { "epoch": 0.17, "learning_rate": 9.851579035373737e-06, "loss": 7.7535, "step": 38500 }, { "epoch": 0.17, "learning_rate": 9.846078703153671e-06, "loss": 7.762, "step": 39000 }, { "epoch": 0.17, "learning_rate": 9.840578370933605e-06, "loss": 7.7484, "step": 39500 }, { "epoch": 0.18, "learning_rate": 9.835078038713539e-06, "loss": 7.7356, "step": 40000 }, { "epoch": 0.18, "learning_rate": 9.829577706493473e-06, "loss": 7.7407, "step": 40500 }, { "epoch": 0.18, "learning_rate": 9.824077374273407e-06, "loss": 7.7426, "step": 41000 }, { "epoch": 0.18, "learning_rate": 9.818577042053341e-06, "loss": 7.7142, "step": 41500 }, { "epoch": 0.18, "learning_rate": 9.813076709833275e-06, "loss": 7.7211, "step": 42000 }, { "epoch": 0.19, "learning_rate": 9.807576377613209e-06, "loss": 7.7111, "step": 42500 }, { "epoch": 0.19, "learning_rate": 9.802076045393143e-06, "loss": 7.7011, "step": 43000 }, { "epoch": 0.19, "learning_rate": 9.796575713173077e-06, "loss": 7.7001, "step": 43500 }, { "epoch": 0.19, "learning_rate": 9.79107538095301e-06, "loss": 7.6904, "step": 44000 }, { "epoch": 0.19, "learning_rate": 9.785575048732944e-06, "loss": 7.6927, "step": 44500 }, { "epoch": 0.2, "learning_rate": 9.780074716512878e-06, "loss": 7.697, "step": 45000 }, { "epoch": 0.2, "learning_rate": 9.774574384292812e-06, "loss": 7.6826, "step": 45500 }, { "epoch": 0.2, "learning_rate": 9.769074052072746e-06, "loss": 7.6948, "step": 46000 }, { "epoch": 0.2, "learning_rate": 9.76357371985268e-06, "loss": 7.6657, "step": 46500 }, { "epoch": 0.21, "learning_rate": 9.758073387632614e-06, "loss": 7.6656, "step": 47000 }, { "epoch": 0.21, "learning_rate": 9.752573055412548e-06, "loss": 7.6701, "step": 47500 }, { "epoch": 0.21, "learning_rate": 9.74707272319248e-06, "loss": 7.6587, "step": 48000 }, { "epoch": 0.21, "learning_rate": 9.741572390972416e-06, "loss": 7.6808, "step": 48500 }, { "epoch": 0.21, "learning_rate": 9.73607205875235e-06, "loss": 7.6528, "step": 49000 }, { "epoch": 0.22, "learning_rate": 9.730571726532284e-06, "loss": 7.6578, "step": 49500 }, { "epoch": 0.22, "learning_rate": 9.725071394312218e-06, "loss": 7.6404, "step": 50000 }, { "epoch": 0.22, "learning_rate": 9.719571062092152e-06, "loss": 7.6389, "step": 50500 }, { "epoch": 0.22, "learning_rate": 9.714070729872086e-06, "loss": 7.6365, "step": 51000 }, { "epoch": 0.23, "learning_rate": 9.708570397652018e-06, "loss": 7.6291, "step": 51500 }, { "epoch": 0.23, "learning_rate": 9.703070065431953e-06, "loss": 7.6347, "step": 52000 }, { "epoch": 0.23, "learning_rate": 9.697569733211887e-06, "loss": 7.6025, "step": 52500 }, { "epoch": 0.23, "learning_rate": 9.69206940099182e-06, "loss": 7.6127, "step": 53000 }, { "epoch": 0.23, "learning_rate": 9.686569068771755e-06, "loss": 7.6166, "step": 53500 }, { "epoch": 0.24, "learning_rate": 9.681068736551689e-06, "loss": 7.6208, "step": 54000 }, { "epoch": 0.24, "learning_rate": 9.675568404331623e-06, "loss": 7.6046, "step": 54500 }, { "epoch": 0.24, "learning_rate": 9.670068072111555e-06, "loss": 7.6301, "step": 55000 }, { "epoch": 0.24, "learning_rate": 9.664567739891491e-06, "loss": 7.6091, "step": 55500 }, { "epoch": 0.25, "learning_rate": 9.659067407671425e-06, "loss": 7.5908, "step": 56000 }, { "epoch": 0.25, "learning_rate": 9.653567075451357e-06, "loss": 7.6034, "step": 56500 }, { "epoch": 0.25, "learning_rate": 9.648066743231293e-06, "loss": 7.6109, "step": 57000 }, { "epoch": 0.25, "learning_rate": 9.642566411011227e-06, "loss": 7.5909, "step": 57500 }, { "epoch": 0.25, "learning_rate": 9.637066078791159e-06, "loss": 7.5836, "step": 58000 }, { "epoch": 0.26, "learning_rate": 9.631565746571093e-06, "loss": 7.5926, "step": 58500 }, { "epoch": 0.26, "learning_rate": 9.626065414351028e-06, "loss": 7.5807, "step": 59000 }, { "epoch": 0.26, "learning_rate": 9.620565082130962e-06, "loss": 7.5775, "step": 59500 }, { "epoch": 0.26, "learning_rate": 9.615064749910895e-06, "loss": 7.5696, "step": 60000 }, { "epoch": 0.26, "learning_rate": 9.60956441769083e-06, "loss": 7.5757, "step": 60500 }, { "epoch": 0.27, "learning_rate": 9.604064085470764e-06, "loss": 7.5625, "step": 61000 }, { "epoch": 0.27, "learning_rate": 9.598563753250696e-06, "loss": 7.5729, "step": 61500 }, { "epoch": 0.27, "learning_rate": 9.59306342103063e-06, "loss": 7.5756, "step": 62000 }, { "epoch": 0.27, "learning_rate": 9.587563088810566e-06, "loss": 7.5439, "step": 62500 }, { "epoch": 0.28, "learning_rate": 9.582062756590498e-06, "loss": 7.5754, "step": 63000 }, { "epoch": 0.28, "learning_rate": 9.576562424370432e-06, "loss": 7.5438, "step": 63500 }, { "epoch": 0.28, "learning_rate": 9.571062092150368e-06, "loss": 7.5685, "step": 64000 }, { "epoch": 0.28, "learning_rate": 9.565561759930302e-06, "loss": 7.5329, "step": 64500 }, { "epoch": 0.28, "learning_rate": 9.560061427710234e-06, "loss": 7.5443, "step": 65000 }, { "epoch": 0.29, "learning_rate": 9.554561095490168e-06, "loss": 7.5507, "step": 65500 }, { "epoch": 0.29, "learning_rate": 9.549060763270103e-06, "loss": 7.5399, "step": 66000 }, { "epoch": 0.29, "learning_rate": 9.543560431050036e-06, "loss": 7.5313, "step": 66500 }, { "epoch": 0.29, "learning_rate": 9.53806009882997e-06, "loss": 7.5484, "step": 67000 }, { "epoch": 0.3, "learning_rate": 9.532559766609905e-06, "loss": 7.5436, "step": 67500 }, { "epoch": 0.3, "learning_rate": 9.527059434389837e-06, "loss": 7.5258, "step": 68000 }, { "epoch": 0.3, "learning_rate": 9.521559102169771e-06, "loss": 7.5261, "step": 68500 }, { "epoch": 0.3, "learning_rate": 9.516058769949705e-06, "loss": 7.5312, "step": 69000 }, { "epoch": 0.3, "learning_rate": 9.51055843772964e-06, "loss": 7.5295, "step": 69500 }, { "epoch": 0.31, "learning_rate": 9.505058105509573e-06, "loss": 7.5164, "step": 70000 }, { "epoch": 0.31, "learning_rate": 9.499557773289507e-06, "loss": 7.5121, "step": 70500 }, { "epoch": 0.31, "learning_rate": 9.494057441069441e-06, "loss": 7.5284, "step": 71000 }, { "epoch": 0.31, "learning_rate": 9.488557108849375e-06, "loss": 7.5277, "step": 71500 }, { "epoch": 0.32, "learning_rate": 9.483056776629309e-06, "loss": 7.5442, "step": 72000 }, { "epoch": 0.32, "learning_rate": 9.477556444409243e-06, "loss": 7.5118, "step": 72500 }, { "epoch": 0.32, "learning_rate": 9.472056112189177e-06, "loss": 7.5094, "step": 73000 }, { "epoch": 0.32, "learning_rate": 9.46655577996911e-06, "loss": 7.49, "step": 73500 }, { "epoch": 0.32, "learning_rate": 9.461055447749045e-06, "loss": 7.5142, "step": 74000 }, { "epoch": 0.33, "learning_rate": 9.455555115528978e-06, "loss": 7.4973, "step": 74500 }, { "epoch": 0.33, "learning_rate": 9.450054783308912e-06, "loss": 7.5101, "step": 75000 }, { "epoch": 0.33, "learning_rate": 9.444554451088846e-06, "loss": 7.5007, "step": 75500 }, { "epoch": 0.33, "learning_rate": 9.43905411886878e-06, "loss": 7.5064, "step": 76000 }, { "epoch": 0.33, "learning_rate": 9.433553786648714e-06, "loss": 7.5037, "step": 76500 }, { "epoch": 0.34, "learning_rate": 9.428053454428648e-06, "loss": 7.4904, "step": 77000 }, { "epoch": 0.34, "learning_rate": 9.422553122208582e-06, "loss": 7.5029, "step": 77500 }, { "epoch": 0.34, "learning_rate": 9.417052789988516e-06, "loss": 7.4924, "step": 78000 }, { "epoch": 0.34, "learning_rate": 9.41155245776845e-06, "loss": 7.4783, "step": 78500 }, { "epoch": 0.35, "learning_rate": 9.406052125548384e-06, "loss": 7.4917, "step": 79000 }, { "epoch": 0.35, "learning_rate": 9.400551793328318e-06, "loss": 7.4844, "step": 79500 }, { "epoch": 0.35, "learning_rate": 9.395051461108252e-06, "loss": 7.4634, "step": 80000 }, { "epoch": 0.35, "learning_rate": 9.389551128888186e-06, "loss": 7.4885, "step": 80500 }, { "epoch": 0.35, "learning_rate": 9.38405079666812e-06, "loss": 7.4879, "step": 81000 }, { "epoch": 0.36, "learning_rate": 9.378550464448053e-06, "loss": 7.4821, "step": 81500 }, { "epoch": 0.36, "learning_rate": 9.373050132227987e-06, "loss": 7.4794, "step": 82000 }, { "epoch": 0.36, "learning_rate": 9.367549800007921e-06, "loss": 7.4733, "step": 82500 }, { "epoch": 0.36, "learning_rate": 9.362049467787855e-06, "loss": 7.4652, "step": 83000 }, { "epoch": 0.37, "learning_rate": 9.35654913556779e-06, "loss": 7.4794, "step": 83500 }, { "epoch": 0.37, "learning_rate": 9.351048803347723e-06, "loss": 7.4743, "step": 84000 }, { "epoch": 0.37, "learning_rate": 9.345548471127657e-06, "loss": 7.4543, "step": 84500 }, { "epoch": 0.37, "learning_rate": 9.340048138907591e-06, "loss": 7.4596, "step": 85000 }, { "epoch": 0.37, "learning_rate": 9.334547806687525e-06, "loss": 7.4688, "step": 85500 }, { "epoch": 0.38, "learning_rate": 9.329047474467459e-06, "loss": 7.4658, "step": 86000 }, { "epoch": 0.38, "learning_rate": 9.323547142247393e-06, "loss": 7.4542, "step": 86500 }, { "epoch": 0.38, "learning_rate": 9.318046810027327e-06, "loss": 7.4514, "step": 87000 }, { "epoch": 0.38, "learning_rate": 9.31254647780726e-06, "loss": 7.4532, "step": 87500 }, { "epoch": 0.39, "learning_rate": 9.307046145587195e-06, "loss": 7.4519, "step": 88000 }, { "epoch": 0.39, "learning_rate": 9.301545813367129e-06, "loss": 7.456, "step": 88500 }, { "epoch": 0.39, "learning_rate": 9.296045481147062e-06, "loss": 7.4514, "step": 89000 }, { "epoch": 0.39, "learning_rate": 9.290545148926996e-06, "loss": 7.4514, "step": 89500 }, { "epoch": 0.39, "learning_rate": 9.28504481670693e-06, "loss": 7.4499, "step": 90000 }, { "epoch": 0.4, "learning_rate": 9.279544484486864e-06, "loss": 7.4462, "step": 90500 }, { "epoch": 0.4, "learning_rate": 9.274044152266798e-06, "loss": 7.4492, "step": 91000 }, { "epoch": 0.4, "learning_rate": 9.268543820046732e-06, "loss": 7.4339, "step": 91500 }, { "epoch": 0.4, "learning_rate": 9.263043487826664e-06, "loss": 7.4386, "step": 92000 }, { "epoch": 0.4, "learning_rate": 9.2575431556066e-06, "loss": 7.43, "step": 92500 }, { "epoch": 0.41, "learning_rate": 9.252042823386534e-06, "loss": 7.422, "step": 93000 }, { "epoch": 0.41, "learning_rate": 9.246542491166468e-06, "loss": 7.4264, "step": 93500 }, { "epoch": 0.41, "learning_rate": 9.241042158946402e-06, "loss": 7.4333, "step": 94000 }, { "epoch": 0.41, "learning_rate": 9.235541826726336e-06, "loss": 7.4206, "step": 94500 }, { "epoch": 0.42, "learning_rate": 9.23004149450627e-06, "loss": 7.4295, "step": 95000 }, { "epoch": 0.42, "learning_rate": 9.224541162286202e-06, "loss": 7.4215, "step": 95500 }, { "epoch": 0.42, "learning_rate": 9.219040830066137e-06, "loss": 7.4242, "step": 96000 }, { "epoch": 0.42, "learning_rate": 9.213540497846071e-06, "loss": 7.4249, "step": 96500 }, { "epoch": 0.42, "learning_rate": 9.208040165626004e-06, "loss": 7.417, "step": 97000 }, { "epoch": 0.43, "learning_rate": 9.202539833405938e-06, "loss": 7.4293, "step": 97500 }, { "epoch": 0.43, "learning_rate": 9.197039501185873e-06, "loss": 7.4275, "step": 98000 }, { "epoch": 0.43, "learning_rate": 9.191539168965805e-06, "loss": 7.4298, "step": 98500 }, { "epoch": 0.43, "learning_rate": 9.18603883674574e-06, "loss": 7.4207, "step": 99000 }, { "epoch": 0.44, "learning_rate": 9.180538504525675e-06, "loss": 7.4235, "step": 99500 }, { "epoch": 0.44, "learning_rate": 9.175038172305609e-06, "loss": 7.4082, "step": 100000 }, { "epoch": 0.44, "learning_rate": 9.169537840085541e-06, "loss": 7.4255, "step": 100500 }, { "epoch": 0.44, "learning_rate": 9.164037507865475e-06, "loss": 7.4198, "step": 101000 }, { "epoch": 0.44, "learning_rate": 9.15853717564541e-06, "loss": 7.4257, "step": 101500 }, { "epoch": 0.45, "learning_rate": 9.153036843425343e-06, "loss": 7.4116, "step": 102000 }, { "epoch": 0.45, "learning_rate": 9.147536511205277e-06, "loss": 7.411, "step": 102500 }, { "epoch": 0.45, "learning_rate": 9.142036178985212e-06, "loss": 7.3992, "step": 103000 }, { "epoch": 0.45, "learning_rate": 9.136535846765145e-06, "loss": 7.4208, "step": 103500 }, { "epoch": 0.46, "learning_rate": 9.131035514545079e-06, "loss": 7.3927, "step": 104000 }, { "epoch": 0.46, "learning_rate": 9.125535182325013e-06, "loss": 7.3963, "step": 104500 }, { "epoch": 0.46, "learning_rate": 9.120034850104948e-06, "loss": 7.3989, "step": 105000 }, { "epoch": 0.46, "learning_rate": 9.11453451788488e-06, "loss": 7.3897, "step": 105500 }, { "epoch": 0.46, "learning_rate": 9.109034185664814e-06, "loss": 7.3833, "step": 106000 }, { "epoch": 0.47, "learning_rate": 9.10353385344475e-06, "loss": 7.3838, "step": 106500 }, { "epoch": 0.47, "learning_rate": 9.098033521224682e-06, "loss": 7.3721, "step": 107000 }, { "epoch": 0.47, "learning_rate": 9.092533189004616e-06, "loss": 7.3441, "step": 107500 }, { "epoch": 0.47, "learning_rate": 9.08703285678455e-06, "loss": 7.3448, "step": 108000 }, { "epoch": 0.47, "learning_rate": 9.081532524564484e-06, "loss": 7.3308, "step": 108500 }, { "epoch": 0.48, "learning_rate": 9.076032192344418e-06, "loss": 7.3492, "step": 109000 }, { "epoch": 0.48, "learning_rate": 9.070531860124352e-06, "loss": 7.3236, "step": 109500 }, { "epoch": 0.48, "learning_rate": 9.065031527904287e-06, "loss": 7.3321, "step": 110000 }, { "epoch": 0.48, "learning_rate": 9.05953119568422e-06, "loss": 7.329, "step": 110500 }, { "epoch": 0.49, "learning_rate": 9.054030863464154e-06, "loss": 7.3076, "step": 111000 }, { "epoch": 0.49, "learning_rate": 9.048530531244088e-06, "loss": 7.2991, "step": 111500 }, { "epoch": 0.49, "learning_rate": 9.043030199024021e-06, "loss": 7.2965, "step": 112000 }, { "epoch": 0.49, "learning_rate": 9.037529866803955e-06, "loss": 7.2772, "step": 112500 }, { "epoch": 0.49, "learning_rate": 9.03202953458389e-06, "loss": 7.2735, "step": 113000 }, { "epoch": 0.5, "learning_rate": 9.026529202363823e-06, "loss": 7.2715, "step": 113500 }, { "epoch": 0.5, "learning_rate": 9.021028870143757e-06, "loss": 7.2794, "step": 114000 }, { "epoch": 0.5, "learning_rate": 9.015528537923691e-06, "loss": 7.2733, "step": 114500 }, { "epoch": 0.5, "learning_rate": 9.010028205703625e-06, "loss": 7.2622, "step": 115000 }, { "epoch": 0.51, "learning_rate": 9.004527873483559e-06, "loss": 7.2666, "step": 115500 }, { "epoch": 0.51, "learning_rate": 8.999027541263493e-06, "loss": 7.2396, "step": 116000 }, { "epoch": 0.51, "learning_rate": 8.993527209043427e-06, "loss": 7.2583, "step": 116500 }, { "epoch": 0.51, "learning_rate": 8.98802687682336e-06, "loss": 7.2522, "step": 117000 }, { "epoch": 0.51, "learning_rate": 8.982526544603295e-06, "loss": 7.2489, "step": 117500 }, { "epoch": 0.52, "learning_rate": 8.977026212383229e-06, "loss": 7.2362, "step": 118000 }, { "epoch": 0.52, "learning_rate": 8.971525880163163e-06, "loss": 7.2387, "step": 118500 }, { "epoch": 0.52, "learning_rate": 8.966025547943096e-06, "loss": 7.2258, "step": 119000 }, { "epoch": 0.52, "learning_rate": 8.96052521572303e-06, "loss": 7.2387, "step": 119500 }, { "epoch": 0.53, "learning_rate": 8.955024883502964e-06, "loss": 7.2345, "step": 120000 }, { "epoch": 0.53, "learning_rate": 8.949524551282898e-06, "loss": 7.2113, "step": 120500 }, { "epoch": 0.53, "learning_rate": 8.944024219062832e-06, "loss": 7.2161, "step": 121000 }, { "epoch": 0.53, "learning_rate": 8.938523886842766e-06, "loss": 7.2371, "step": 121500 }, { "epoch": 0.53, "learning_rate": 8.9330235546227e-06, "loss": 7.2099, "step": 122000 }, { "epoch": 0.54, "learning_rate": 8.927523222402634e-06, "loss": 7.2023, "step": 122500 }, { "epoch": 0.54, "learning_rate": 8.922022890182568e-06, "loss": 7.1956, "step": 123000 }, { "epoch": 0.54, "learning_rate": 8.916522557962502e-06, "loss": 7.196, "step": 123500 }, { "epoch": 0.54, "learning_rate": 8.911022225742436e-06, "loss": 7.1946, "step": 124000 }, { "epoch": 0.54, "learning_rate": 8.90552189352237e-06, "loss": 7.1853, "step": 124500 }, { "epoch": 0.55, "learning_rate": 8.900021561302304e-06, "loss": 7.1787, "step": 125000 }, { "epoch": 0.55, "learning_rate": 8.894521229082238e-06, "loss": 7.1835, "step": 125500 }, { "epoch": 0.55, "learning_rate": 8.889020896862171e-06, "loss": 7.2015, "step": 126000 }, { "epoch": 0.55, "learning_rate": 8.883520564642105e-06, "loss": 7.1835, "step": 126500 }, { "epoch": 0.56, "learning_rate": 8.87802023242204e-06, "loss": 7.1753, "step": 127000 }, { "epoch": 0.56, "learning_rate": 8.872519900201972e-06, "loss": 7.1731, "step": 127500 }, { "epoch": 0.56, "learning_rate": 8.867019567981907e-06, "loss": 7.1458, "step": 128000 }, { "epoch": 0.56, "learning_rate": 8.861519235761841e-06, "loss": 7.1542, "step": 128500 }, { "epoch": 0.56, "learning_rate": 8.856018903541775e-06, "loss": 7.1435, "step": 129000 }, { "epoch": 0.57, "learning_rate": 8.850518571321709e-06, "loss": 7.1592, "step": 129500 }, { "epoch": 0.57, "learning_rate": 8.845018239101643e-06, "loss": 7.1575, "step": 130000 }, { "epoch": 0.57, "learning_rate": 8.839517906881577e-06, "loss": 7.1637, "step": 130500 }, { "epoch": 0.57, "learning_rate": 8.834017574661509e-06, "loss": 7.1407, "step": 131000 }, { "epoch": 0.58, "learning_rate": 8.828517242441445e-06, "loss": 7.1479, "step": 131500 }, { "epoch": 0.58, "learning_rate": 8.823016910221379e-06, "loss": 7.1383, "step": 132000 }, { "epoch": 0.58, "learning_rate": 8.817516578001311e-06, "loss": 7.1254, "step": 132500 }, { "epoch": 0.58, "learning_rate": 8.812016245781246e-06, "loss": 7.1358, "step": 133000 }, { "epoch": 0.58, "learning_rate": 8.80651591356118e-06, "loss": 7.1213, "step": 133500 }, { "epoch": 0.59, "learning_rate": 8.801015581341114e-06, "loss": 7.1027, "step": 134000 }, { "epoch": 0.59, "learning_rate": 8.795515249121047e-06, "loss": 7.1114, "step": 134500 }, { "epoch": 0.59, "learning_rate": 8.790014916900982e-06, "loss": 7.12, "step": 135000 }, { "epoch": 0.59, "learning_rate": 8.784514584680916e-06, "loss": 7.1124, "step": 135500 }, { "epoch": 0.6, "learning_rate": 8.779014252460848e-06, "loss": 7.1064, "step": 136000 }, { "epoch": 0.6, "learning_rate": 8.773513920240784e-06, "loss": 7.116, "step": 136500 }, { "epoch": 0.6, "learning_rate": 8.768013588020718e-06, "loss": 7.092, "step": 137000 }, { "epoch": 0.6, "learning_rate": 8.76251325580065e-06, "loss": 7.0904, "step": 137500 }, { "epoch": 0.6, "learning_rate": 8.757012923580584e-06, "loss": 7.0863, "step": 138000 }, { "epoch": 0.61, "learning_rate": 8.75151259136052e-06, "loss": 7.0805, "step": 138500 }, { "epoch": 0.61, "learning_rate": 8.746012259140454e-06, "loss": 7.0823, "step": 139000 }, { "epoch": 0.61, "learning_rate": 8.740511926920386e-06, "loss": 7.0707, "step": 139500 }, { "epoch": 0.61, "learning_rate": 8.735011594700321e-06, "loss": 7.0688, "step": 140000 }, { "epoch": 0.61, "learning_rate": 8.729511262480255e-06, "loss": 7.0627, "step": 140500 }, { "epoch": 0.62, "learning_rate": 8.724010930260188e-06, "loss": 7.0688, "step": 141000 }, { "epoch": 0.62, "learning_rate": 8.718510598040122e-06, "loss": 7.0622, "step": 141500 }, { "epoch": 0.62, "learning_rate": 8.713010265820057e-06, "loss": 7.0455, "step": 142000 }, { "epoch": 0.62, "learning_rate": 8.70750993359999e-06, "loss": 7.0413, "step": 142500 }, { "epoch": 0.63, "learning_rate": 8.702009601379923e-06, "loss": 7.0441, "step": 143000 }, { "epoch": 0.63, "learning_rate": 8.696509269159859e-06, "loss": 7.0476, "step": 143500 }, { "epoch": 0.63, "learning_rate": 8.691008936939793e-06, "loss": 7.0418, "step": 144000 }, { "epoch": 0.63, "learning_rate": 8.685508604719725e-06, "loss": 7.0182, "step": 144500 }, { "epoch": 0.63, "learning_rate": 8.680008272499659e-06, "loss": 7.0226, "step": 145000 }, { "epoch": 0.64, "learning_rate": 8.674507940279595e-06, "loss": 7.014, "step": 145500 }, { "epoch": 0.64, "learning_rate": 8.669007608059527e-06, "loss": 7.0208, "step": 146000 }, { "epoch": 0.64, "learning_rate": 8.663507275839461e-06, "loss": 7.0294, "step": 146500 }, { "epoch": 0.64, "learning_rate": 8.658006943619396e-06, "loss": 7.0099, "step": 147000 }, { "epoch": 0.65, "learning_rate": 8.652506611399329e-06, "loss": 7.0196, "step": 147500 }, { "epoch": 0.65, "learning_rate": 8.647006279179263e-06, "loss": 7.005, "step": 148000 }, { "epoch": 0.65, "learning_rate": 8.641505946959197e-06, "loss": 6.9846, "step": 148500 }, { "epoch": 0.65, "learning_rate": 8.636005614739132e-06, "loss": 6.9799, "step": 149000 }, { "epoch": 0.65, "learning_rate": 8.630505282519064e-06, "loss": 6.9991, "step": 149500 }, { "epoch": 0.66, "learning_rate": 8.625004950298998e-06, "loss": 6.9868, "step": 150000 }, { "epoch": 0.66, "learning_rate": 8.619504618078932e-06, "loss": 6.9632, "step": 150500 }, { "epoch": 0.66, "learning_rate": 8.614004285858866e-06, "loss": 6.9719, "step": 151000 }, { "epoch": 0.66, "learning_rate": 8.6085039536388e-06, "loss": 6.9815, "step": 151500 }, { "epoch": 0.67, "learning_rate": 8.603003621418734e-06, "loss": 6.9849, "step": 152000 }, { "epoch": 0.67, "learning_rate": 8.597503289198668e-06, "loss": 6.9632, "step": 152500 }, { "epoch": 0.67, "learning_rate": 8.592002956978602e-06, "loss": 6.9641, "step": 153000 }, { "epoch": 0.67, "learning_rate": 8.586502624758536e-06, "loss": 6.9708, "step": 153500 }, { "epoch": 0.67, "learning_rate": 8.58100229253847e-06, "loss": 6.9457, "step": 154000 }, { "epoch": 0.68, "learning_rate": 8.575501960318404e-06, "loss": 6.9505, "step": 154500 }, { "epoch": 0.68, "learning_rate": 8.570001628098338e-06, "loss": 6.9442, "step": 155000 }, { "epoch": 0.68, "learning_rate": 8.564501295878272e-06, "loss": 6.9395, "step": 155500 }, { "epoch": 0.68, "learning_rate": 8.559000963658206e-06, "loss": 6.9363, "step": 156000 }, { "epoch": 0.68, "learning_rate": 8.55350063143814e-06, "loss": 6.9424, "step": 156500 }, { "epoch": 0.69, "learning_rate": 8.548000299218073e-06, "loss": 6.9474, "step": 157000 }, { "epoch": 0.69, "learning_rate": 8.542499966998007e-06, "loss": 6.9172, "step": 157500 }, { "epoch": 0.69, "learning_rate": 8.536999634777941e-06, "loss": 6.9108, "step": 158000 }, { "epoch": 0.69, "learning_rate": 8.531499302557875e-06, "loss": 6.8984, "step": 158500 }, { "epoch": 0.7, "learning_rate": 8.525998970337809e-06, "loss": 6.9112, "step": 159000 }, { "epoch": 0.7, "learning_rate": 8.520498638117743e-06, "loss": 6.9091, "step": 159500 }, { "epoch": 0.7, "learning_rate": 8.514998305897677e-06, "loss": 6.918, "step": 160000 }, { "epoch": 0.7, "learning_rate": 8.509497973677611e-06, "loss": 6.8966, "step": 160500 }, { "epoch": 0.7, "learning_rate": 8.503997641457545e-06, "loss": 6.8854, "step": 161000 }, { "epoch": 0.71, "learning_rate": 8.498497309237479e-06, "loss": 6.8797, "step": 161500 }, { "epoch": 0.71, "learning_rate": 8.492996977017413e-06, "loss": 6.8944, "step": 162000 }, { "epoch": 0.71, "learning_rate": 8.487496644797347e-06, "loss": 6.8815, "step": 162500 }, { "epoch": 0.71, "learning_rate": 8.48199631257728e-06, "loss": 6.871, "step": 163000 }, { "epoch": 0.72, "learning_rate": 8.476495980357214e-06, "loss": 6.8839, "step": 163500 }, { "epoch": 0.72, "learning_rate": 8.470995648137148e-06, "loss": 6.844, "step": 164000 }, { "epoch": 0.72, "learning_rate": 8.465495315917082e-06, "loss": 6.8761, "step": 164500 }, { "epoch": 0.72, "learning_rate": 8.459994983697016e-06, "loss": 6.8561, "step": 165000 }, { "epoch": 0.72, "learning_rate": 8.45449465147695e-06, "loss": 6.8393, "step": 165500 }, { "epoch": 0.73, "learning_rate": 8.448994319256884e-06, "loss": 6.8438, "step": 166000 }, { "epoch": 0.73, "learning_rate": 8.443493987036818e-06, "loss": 6.8317, "step": 166500 }, { "epoch": 0.73, "learning_rate": 8.437993654816752e-06, "loss": 6.8367, "step": 167000 }, { "epoch": 0.73, "learning_rate": 8.432493322596686e-06, "loss": 6.8271, "step": 167500 }, { "epoch": 0.74, "learning_rate": 8.42699299037662e-06, "loss": 6.817, "step": 168000 }, { "epoch": 0.74, "learning_rate": 8.421492658156554e-06, "loss": 6.8253, "step": 168500 }, { "epoch": 0.74, "learning_rate": 8.415992325936488e-06, "loss": 6.8266, "step": 169000 }, { "epoch": 0.74, "learning_rate": 8.410491993716422e-06, "loss": 6.8241, "step": 169500 }, { "epoch": 0.74, "learning_rate": 8.404991661496356e-06, "loss": 6.7968, "step": 170000 }, { "epoch": 0.75, "learning_rate": 8.39949132927629e-06, "loss": 6.8109, "step": 170500 }, { "epoch": 0.75, "learning_rate": 8.393990997056223e-06, "loss": 6.7955, "step": 171000 }, { "epoch": 0.75, "learning_rate": 8.388490664836156e-06, "loss": 6.7989, "step": 171500 }, { "epoch": 0.75, "learning_rate": 8.382990332616091e-06, "loss": 6.7945, "step": 172000 }, { "epoch": 0.75, "learning_rate": 8.377490000396025e-06, "loss": 6.7752, "step": 172500 }, { "epoch": 0.76, "learning_rate": 8.371989668175959e-06, "loss": 6.7703, "step": 173000 }, { "epoch": 0.76, "learning_rate": 8.366489335955893e-06, "loss": 6.7813, "step": 173500 }, { "epoch": 0.76, "learning_rate": 8.360989003735827e-06, "loss": 6.769, "step": 174000 }, { "epoch": 0.76, "learning_rate": 8.355488671515761e-06, "loss": 6.7649, "step": 174500 }, { "epoch": 0.77, "learning_rate": 8.349988339295693e-06, "loss": 6.7667, "step": 175000 }, { "epoch": 0.77, "learning_rate": 8.344488007075629e-06, "loss": 6.7566, "step": 175500 }, { "epoch": 0.77, "learning_rate": 8.338987674855563e-06, "loss": 6.7585, "step": 176000 }, { "epoch": 0.77, "learning_rate": 8.333487342635495e-06, "loss": 6.7401, "step": 176500 }, { "epoch": 0.77, "learning_rate": 8.327987010415429e-06, "loss": 6.7236, "step": 177000 }, { "epoch": 0.78, "learning_rate": 8.322486678195364e-06, "loss": 6.732, "step": 177500 }, { "epoch": 0.78, "learning_rate": 8.316986345975298e-06, "loss": 6.7241, "step": 178000 }, { "epoch": 0.78, "learning_rate": 8.31148601375523e-06, "loss": 6.7206, "step": 178500 }, { "epoch": 0.78, "learning_rate": 8.305985681535166e-06, "loss": 6.7128, "step": 179000 }, { "epoch": 0.79, "learning_rate": 8.3004853493151e-06, "loss": 6.7026, "step": 179500 }, { "epoch": 0.79, "learning_rate": 8.294985017095032e-06, "loss": 6.7112, "step": 180000 }, { "epoch": 0.79, "learning_rate": 8.289484684874966e-06, "loss": 6.7032, "step": 180500 }, { "epoch": 0.79, "learning_rate": 8.283984352654902e-06, "loss": 6.6976, "step": 181000 }, { "epoch": 0.79, "learning_rate": 8.278484020434834e-06, "loss": 6.6837, "step": 181500 }, { "epoch": 0.8, "learning_rate": 8.272983688214768e-06, "loss": 6.6787, "step": 182000 }, { "epoch": 0.8, "learning_rate": 8.267483355994704e-06, "loss": 6.6798, "step": 182500 }, { "epoch": 0.8, "learning_rate": 8.261983023774636e-06, "loss": 6.6519, "step": 183000 }, { "epoch": 0.8, "learning_rate": 8.25648269155457e-06, "loss": 6.646, "step": 183500 }, { "epoch": 0.81, "learning_rate": 8.250982359334504e-06, "loss": 6.6308, "step": 184000 }, { "epoch": 0.81, "learning_rate": 8.24548202711444e-06, "loss": 6.6452, "step": 184500 }, { "epoch": 0.81, "learning_rate": 8.239981694894372e-06, "loss": 6.6276, "step": 185000 }, { "epoch": 0.81, "learning_rate": 8.234481362674306e-06, "loss": 6.6282, "step": 185500 }, { "epoch": 0.81, "learning_rate": 8.228981030454241e-06, "loss": 6.6154, "step": 186000 }, { "epoch": 0.82, "learning_rate": 8.223480698234173e-06, "loss": 6.6159, "step": 186500 }, { "epoch": 0.82, "learning_rate": 8.217980366014107e-06, "loss": 6.6133, "step": 187000 }, { "epoch": 0.82, "learning_rate": 8.212480033794041e-06, "loss": 6.6075, "step": 187500 }, { "epoch": 0.82, "learning_rate": 8.206979701573975e-06, "loss": 6.5915, "step": 188000 }, { "epoch": 0.82, "learning_rate": 8.20147936935391e-06, "loss": 6.5642, "step": 188500 }, { "epoch": 0.83, "learning_rate": 8.195979037133843e-06, "loss": 6.5548, "step": 189000 }, { "epoch": 0.83, "learning_rate": 8.190478704913779e-06, "loss": 6.5503, "step": 189500 }, { "epoch": 0.83, "learning_rate": 8.184978372693711e-06, "loss": 6.5392, "step": 190000 }, { "epoch": 0.83, "learning_rate": 8.179478040473645e-06, "loss": 6.5297, "step": 190500 }, { "epoch": 0.84, "learning_rate": 8.173977708253579e-06, "loss": 6.5266, "step": 191000 }, { "epoch": 0.84, "learning_rate": 8.168477376033513e-06, "loss": 6.5272, "step": 191500 }, { "epoch": 0.84, "learning_rate": 8.162977043813447e-06, "loss": 6.5002, "step": 192000 }, { "epoch": 0.84, "learning_rate": 8.15747671159338e-06, "loss": 6.4881, "step": 192500 }, { "epoch": 0.84, "learning_rate": 8.151976379373315e-06, "loss": 6.4877, "step": 193000 }, { "epoch": 0.85, "learning_rate": 8.146476047153248e-06, "loss": 6.4956, "step": 193500 }, { "epoch": 0.85, "learning_rate": 8.140975714933182e-06, "loss": 6.4729, "step": 194000 }, { "epoch": 0.85, "learning_rate": 8.135475382713116e-06, "loss": 6.4722, "step": 194500 }, { "epoch": 0.85, "learning_rate": 8.12997505049305e-06, "loss": 6.4524, "step": 195000 }, { "epoch": 0.86, "learning_rate": 8.124474718272984e-06, "loss": 6.4595, "step": 195500 }, { "epoch": 0.86, "learning_rate": 8.118974386052918e-06, "loss": 6.4414, "step": 196000 }, { "epoch": 0.86, "learning_rate": 8.113474053832852e-06, "loss": 6.4218, "step": 196500 }, { "epoch": 0.86, "learning_rate": 8.107973721612786e-06, "loss": 6.4155, "step": 197000 }, { "epoch": 0.86, "learning_rate": 8.10247338939272e-06, "loss": 6.4034, "step": 197500 }, { "epoch": 0.87, "learning_rate": 8.096973057172654e-06, "loss": 6.4217, "step": 198000 }, { "epoch": 0.87, "learning_rate": 8.091472724952588e-06, "loss": 6.3908, "step": 198500 }, { "epoch": 0.87, "learning_rate": 8.085972392732522e-06, "loss": 6.3917, "step": 199000 }, { "epoch": 0.87, "learning_rate": 8.080472060512456e-06, "loss": 6.3961, "step": 199500 }, { "epoch": 0.88, "learning_rate": 8.07497172829239e-06, "loss": 6.3781, "step": 200000 }, { "epoch": 0.88, "learning_rate": 8.069471396072323e-06, "loss": 6.3586, "step": 200500 }, { "epoch": 0.88, "learning_rate": 8.063971063852257e-06, "loss": 6.3485, "step": 201000 }, { "epoch": 0.88, "learning_rate": 8.058470731632191e-06, "loss": 6.3428, "step": 201500 }, { "epoch": 0.88, "learning_rate": 8.052970399412125e-06, "loss": 6.34, "step": 202000 }, { "epoch": 0.89, "learning_rate": 8.04747006719206e-06, "loss": 6.3431, "step": 202500 }, { "epoch": 0.89, "learning_rate": 8.041969734971993e-06, "loss": 6.3253, "step": 203000 }, { "epoch": 0.89, "learning_rate": 8.036469402751927e-06, "loss": 6.3391, "step": 203500 }, { "epoch": 0.89, "learning_rate": 8.030969070531861e-06, "loss": 6.3018, "step": 204000 }, { "epoch": 0.89, "learning_rate": 8.025468738311795e-06, "loss": 6.296, "step": 204500 }, { "epoch": 0.9, "learning_rate": 8.019968406091729e-06, "loss": 6.3219, "step": 205000 }, { "epoch": 0.9, "learning_rate": 8.014468073871663e-06, "loss": 6.2961, "step": 205500 }, { "epoch": 0.9, "learning_rate": 8.008967741651597e-06, "loss": 6.2849, "step": 206000 }, { "epoch": 0.9, "learning_rate": 8.00346740943153e-06, "loss": 6.3006, "step": 206500 }, { "epoch": 0.91, "learning_rate": 7.997967077211465e-06, "loss": 6.268, "step": 207000 }, { "epoch": 0.91, "learning_rate": 7.992466744991398e-06, "loss": 6.2677, "step": 207500 }, { "epoch": 0.91, "learning_rate": 7.986966412771332e-06, "loss": 6.2703, "step": 208000 }, { "epoch": 0.91, "learning_rate": 7.981466080551266e-06, "loss": 6.2545, "step": 208500 }, { "epoch": 0.91, "learning_rate": 7.9759657483312e-06, "loss": 6.251, "step": 209000 }, { "epoch": 0.92, "learning_rate": 7.970465416111134e-06, "loss": 6.245, "step": 209500 }, { "epoch": 0.92, "learning_rate": 7.964965083891068e-06, "loss": 6.2186, "step": 210000 }, { "epoch": 0.92, "learning_rate": 7.959464751671e-06, "loss": 6.2041, "step": 210500 }, { "epoch": 0.92, "learning_rate": 7.953964419450936e-06, "loss": 6.203, "step": 211000 }, { "epoch": 0.93, "learning_rate": 7.94846408723087e-06, "loss": 6.2025, "step": 211500 }, { "epoch": 0.93, "learning_rate": 7.942963755010802e-06, "loss": 6.2099, "step": 212000 }, { "epoch": 0.93, "learning_rate": 7.937463422790738e-06, "loss": 6.1776, "step": 212500 }, { "epoch": 0.93, "learning_rate": 7.931963090570672e-06, "loss": 6.1721, "step": 213000 }, { "epoch": 0.93, "learning_rate": 7.926462758350606e-06, "loss": 6.1524, "step": 213500 }, { "epoch": 0.94, "learning_rate": 7.920962426130538e-06, "loss": 6.1663, "step": 214000 }, { "epoch": 0.94, "learning_rate": 7.915462093910473e-06, "loss": 6.1561, "step": 214500 }, { "epoch": 0.94, "learning_rate": 7.909961761690407e-06, "loss": 6.1268, "step": 215000 }, { "epoch": 0.94, "learning_rate": 7.90446142947034e-06, "loss": 6.1254, "step": 215500 }, { "epoch": 0.95, "learning_rate": 7.898961097250275e-06, "loss": 6.1022, "step": 216000 }, { "epoch": 0.95, "learning_rate": 7.89346076503021e-06, "loss": 6.1058, "step": 216500 }, { "epoch": 0.95, "learning_rate": 7.887960432810141e-06, "loss": 6.0854, "step": 217000 }, { "epoch": 0.95, "learning_rate": 7.882460100590075e-06, "loss": 6.0492, "step": 217500 }, { "epoch": 0.95, "learning_rate": 7.876959768370011e-06, "loss": 6.0561, "step": 218000 }, { "epoch": 0.96, "learning_rate": 7.871459436149945e-06, "loss": 6.0556, "step": 218500 }, { "epoch": 0.96, "learning_rate": 7.865959103929877e-06, "loss": 6.0144, "step": 219000 }, { "epoch": 0.96, "learning_rate": 7.860458771709813e-06, "loss": 5.9812, "step": 219500 }, { "epoch": 0.96, "learning_rate": 7.854958439489747e-06, "loss": 5.9995, "step": 220000 }, { "epoch": 0.96, "learning_rate": 7.849458107269679e-06, "loss": 5.963, "step": 220500 }, { "epoch": 0.97, "learning_rate": 7.843957775049613e-06, "loss": 5.963, "step": 221000 }, { "epoch": 0.97, "learning_rate": 7.838457442829548e-06, "loss": 5.9367, "step": 221500 }, { "epoch": 0.97, "learning_rate": 7.83295711060948e-06, "loss": 5.9004, "step": 222000 }, { "epoch": 0.97, "learning_rate": 7.827456778389415e-06, "loss": 5.8854, "step": 222500 }, { "epoch": 0.98, "learning_rate": 7.82195644616935e-06, "loss": 5.8579, "step": 223000 }, { "epoch": 0.98, "learning_rate": 7.816456113949284e-06, "loss": 5.8545, "step": 223500 }, { "epoch": 0.98, "learning_rate": 7.810955781729216e-06, "loss": 5.8517, "step": 224000 }, { "epoch": 0.98, "learning_rate": 7.80545544950915e-06, "loss": 5.8351, "step": 224500 }, { "epoch": 0.98, "learning_rate": 7.799955117289086e-06, "loss": 5.821, "step": 225000 }, { "epoch": 0.99, "learning_rate": 7.794454785069018e-06, "loss": 5.7983, "step": 225500 }, { "epoch": 0.99, "learning_rate": 7.788954452848952e-06, "loss": 5.7687, "step": 226000 }, { "epoch": 0.99, "learning_rate": 7.783454120628888e-06, "loss": 5.7821, "step": 226500 }, { "epoch": 0.99, "learning_rate": 7.77795378840882e-06, "loss": 5.7513, "step": 227000 }, { "epoch": 1.0, "learning_rate": 7.772453456188754e-06, "loss": 5.7364, "step": 227500 }, { "epoch": 1.0, "learning_rate": 7.766953123968688e-06, "loss": 5.7413, "step": 228000 }, { "epoch": 1.0, "learning_rate": 7.761452791748623e-06, "loss": 5.7348, "step": 228500 }, { "epoch": 1.0, "learning_rate": 7.755952459528556e-06, "loss": 5.6842, "step": 229000 }, { "epoch": 1.0, "learning_rate": 7.75045212730849e-06, "loss": 5.696, "step": 229500 }, { "epoch": 1.01, "learning_rate": 7.744951795088424e-06, "loss": 5.6709, "step": 230000 }, { "epoch": 1.01, "learning_rate": 7.739451462868358e-06, "loss": 5.6695, "step": 230500 }, { "epoch": 1.01, "learning_rate": 7.733951130648291e-06, "loss": 5.672, "step": 231000 }, { "epoch": 1.01, "learning_rate": 7.728450798428225e-06, "loss": 5.6584, "step": 231500 }, { "epoch": 1.02, "learning_rate": 7.72295046620816e-06, "loss": 5.6375, "step": 232000 }, { "epoch": 1.02, "learning_rate": 7.717450133988093e-06, "loss": 5.629, "step": 232500 }, { "epoch": 1.02, "learning_rate": 7.711949801768027e-06, "loss": 5.5977, "step": 233000 }, { "epoch": 1.02, "learning_rate": 7.706449469547961e-06, "loss": 5.6079, "step": 233500 }, { "epoch": 1.02, "learning_rate": 7.700949137327895e-06, "loss": 5.5887, "step": 234000 }, { "epoch": 1.03, "learning_rate": 7.695448805107829e-06, "loss": 5.5741, "step": 234500 }, { "epoch": 1.03, "learning_rate": 7.689948472887763e-06, "loss": 5.5803, "step": 235000 }, { "epoch": 1.03, "learning_rate": 7.684448140667697e-06, "loss": 5.583, "step": 235500 }, { "epoch": 1.03, "learning_rate": 7.67894780844763e-06, "loss": 5.5438, "step": 236000 }, { "epoch": 1.03, "learning_rate": 7.673447476227565e-06, "loss": 5.5482, "step": 236500 }, { "epoch": 1.04, "learning_rate": 7.667947144007499e-06, "loss": 5.5311, "step": 237000 }, { "epoch": 1.04, "learning_rate": 7.662446811787433e-06, "loss": 5.5251, "step": 237500 }, { "epoch": 1.04, "learning_rate": 7.656946479567366e-06, "loss": 5.4892, "step": 238000 }, { "epoch": 1.04, "learning_rate": 7.6514461473473e-06, "loss": 5.5004, "step": 238500 }, { "epoch": 1.05, "learning_rate": 7.645945815127234e-06, "loss": 5.4966, "step": 239000 }, { "epoch": 1.05, "learning_rate": 7.640445482907168e-06, "loss": 5.4785, "step": 239500 }, { "epoch": 1.05, "learning_rate": 7.634945150687102e-06, "loss": 5.466, "step": 240000 }, { "epoch": 1.05, "learning_rate": 7.629444818467036e-06, "loss": 5.4547, "step": 240500 }, { "epoch": 1.05, "learning_rate": 7.62394448624697e-06, "loss": 5.4556, "step": 241000 }, { "epoch": 1.06, "learning_rate": 7.618444154026904e-06, "loss": 5.4539, "step": 241500 }, { "epoch": 1.06, "learning_rate": 7.612943821806837e-06, "loss": 5.4244, "step": 242000 }, { "epoch": 1.06, "learning_rate": 7.607443489586772e-06, "loss": 5.4353, "step": 242500 }, { "epoch": 1.06, "learning_rate": 7.601943157366706e-06, "loss": 5.4283, "step": 243000 }, { "epoch": 1.07, "learning_rate": 7.59644282514664e-06, "loss": 5.4089, "step": 243500 }, { "epoch": 1.07, "learning_rate": 7.590942492926573e-06, "loss": 5.4123, "step": 244000 }, { "epoch": 1.07, "learning_rate": 7.5854421607065075e-06, "loss": 5.3944, "step": 244500 }, { "epoch": 1.07, "learning_rate": 7.5799418284864414e-06, "loss": 5.3855, "step": 245000 }, { "epoch": 1.07, "learning_rate": 7.5744414962663745e-06, "loss": 5.3802, "step": 245500 }, { "epoch": 1.08, "learning_rate": 7.568941164046309e-06, "loss": 5.3804, "step": 246000 }, { "epoch": 1.08, "learning_rate": 7.563440831826243e-06, "loss": 5.3597, "step": 246500 }, { "epoch": 1.08, "learning_rate": 7.557940499606176e-06, "loss": 5.3678, "step": 247000 }, { "epoch": 1.08, "learning_rate": 7.55244016738611e-06, "loss": 5.3537, "step": 247500 }, { "epoch": 1.09, "learning_rate": 7.546939835166045e-06, "loss": 5.3499, "step": 248000 }, { "epoch": 1.09, "learning_rate": 7.541439502945979e-06, "loss": 5.3228, "step": 248500 }, { "epoch": 1.09, "learning_rate": 7.535939170725912e-06, "loss": 5.3057, "step": 249000 }, { "epoch": 1.09, "learning_rate": 7.530438838505847e-06, "loss": 5.3239, "step": 249500 }, { "epoch": 1.09, "learning_rate": 7.524938506285781e-06, "loss": 5.3015, "step": 250000 }, { "epoch": 1.1, "learning_rate": 7.519438174065714e-06, "loss": 5.307, "step": 250500 }, { "epoch": 1.1, "learning_rate": 7.513937841845648e-06, "loss": 5.2976, "step": 251000 }, { "epoch": 1.1, "learning_rate": 7.5084375096255825e-06, "loss": 5.2911, "step": 251500 }, { "epoch": 1.1, "learning_rate": 7.502937177405516e-06, "loss": 5.2935, "step": 252000 }, { "epoch": 1.1, "learning_rate": 7.4974368451854495e-06, "loss": 5.2853, "step": 252500 }, { "epoch": 1.11, "learning_rate": 7.491936512965384e-06, "loss": 5.269, "step": 253000 }, { "epoch": 1.11, "learning_rate": 7.486436180745318e-06, "loss": 5.2803, "step": 253500 }, { "epoch": 1.11, "learning_rate": 7.480935848525251e-06, "loss": 5.2618, "step": 254000 }, { "epoch": 1.11, "learning_rate": 7.475435516305185e-06, "loss": 5.2511, "step": 254500 }, { "epoch": 1.12, "learning_rate": 7.46993518408512e-06, "loss": 5.2339, "step": 255000 }, { "epoch": 1.12, "learning_rate": 7.464434851865053e-06, "loss": 5.2311, "step": 255500 }, { "epoch": 1.12, "learning_rate": 7.458934519644987e-06, "loss": 5.2512, "step": 256000 }, { "epoch": 1.12, "learning_rate": 7.45343418742492e-06, "loss": 5.1977, "step": 256500 }, { "epoch": 1.12, "learning_rate": 7.447933855204855e-06, "loss": 5.23, "step": 257000 }, { "epoch": 1.13, "learning_rate": 7.442433522984789e-06, "loss": 5.1894, "step": 257500 }, { "epoch": 1.13, "learning_rate": 7.436933190764723e-06, "loss": 5.2134, "step": 258000 }, { "epoch": 1.13, "learning_rate": 7.4314328585446575e-06, "loss": 5.1867, "step": 258500 }, { "epoch": 1.13, "learning_rate": 7.425932526324591e-06, "loss": 5.2069, "step": 259000 }, { "epoch": 1.14, "learning_rate": 7.4204321941045245e-06, "loss": 5.1914, "step": 259500 }, { "epoch": 1.14, "learning_rate": 7.414931861884458e-06, "loss": 5.1679, "step": 260000 }, { "epoch": 1.14, "learning_rate": 7.409431529664392e-06, "loss": 5.1823, "step": 260500 }, { "epoch": 1.14, "learning_rate": 7.403931197444326e-06, "loss": 5.1772, "step": 261000 }, { "epoch": 1.14, "learning_rate": 7.398430865224259e-06, "loss": 5.1581, "step": 261500 }, { "epoch": 1.15, "learning_rate": 7.392930533004194e-06, "loss": 5.153, "step": 262000 }, { "epoch": 1.15, "learning_rate": 7.387430200784128e-06, "loss": 5.1411, "step": 262500 }, { "epoch": 1.15, "learning_rate": 7.381929868564062e-06, "loss": 5.1442, "step": 263000 }, { "epoch": 1.15, "learning_rate": 7.376429536343995e-06, "loss": 5.1217, "step": 263500 }, { "epoch": 1.16, "learning_rate": 7.37092920412393e-06, "loss": 5.1317, "step": 264000 }, { "epoch": 1.16, "learning_rate": 7.365428871903864e-06, "loss": 5.1199, "step": 264500 }, { "epoch": 1.16, "learning_rate": 7.359928539683797e-06, "loss": 5.1086, "step": 265000 }, { "epoch": 1.16, "learning_rate": 7.354428207463732e-06, "loss": 5.1164, "step": 265500 }, { "epoch": 1.16, "learning_rate": 7.348927875243666e-06, "loss": 5.0998, "step": 266000 }, { "epoch": 1.17, "learning_rate": 7.343427543023599e-06, "loss": 5.0962, "step": 266500 }, { "epoch": 1.17, "learning_rate": 7.337927210803533e-06, "loss": 5.0752, "step": 267000 }, { "epoch": 1.17, "learning_rate": 7.332426878583467e-06, "loss": 5.0783, "step": 267500 }, { "epoch": 1.17, "learning_rate": 7.326926546363401e-06, "loss": 5.0755, "step": 268000 }, { "epoch": 1.18, "learning_rate": 7.321426214143334e-06, "loss": 5.0896, "step": 268500 }, { "epoch": 1.18, "learning_rate": 7.315925881923269e-06, "loss": 5.0868, "step": 269000 }, { "epoch": 1.18, "learning_rate": 7.310425549703203e-06, "loss": 5.0485, "step": 269500 }, { "epoch": 1.18, "learning_rate": 7.304925217483136e-06, "loss": 5.0642, "step": 270000 }, { "epoch": 1.18, "learning_rate": 7.29942488526307e-06, "loss": 5.0527, "step": 270500 }, { "epoch": 1.19, "learning_rate": 7.293924553043005e-06, "loss": 5.0557, "step": 271000 }, { "epoch": 1.19, "learning_rate": 7.288424220822938e-06, "loss": 5.0394, "step": 271500 }, { "epoch": 1.19, "learning_rate": 7.282923888602872e-06, "loss": 5.0433, "step": 272000 }, { "epoch": 1.19, "learning_rate": 7.277423556382807e-06, "loss": 5.0491, "step": 272500 }, { "epoch": 1.19, "learning_rate": 7.271923224162741e-06, "loss": 5.0097, "step": 273000 }, { "epoch": 1.2, "learning_rate": 7.266422891942674e-06, "loss": 5.0197, "step": 273500 }, { "epoch": 1.2, "learning_rate": 7.260922559722608e-06, "loss": 5.0195, "step": 274000 }, { "epoch": 1.2, "learning_rate": 7.255422227502542e-06, "loss": 5.0133, "step": 274500 }, { "epoch": 1.2, "learning_rate": 7.2499218952824755e-06, "loss": 5.0104, "step": 275000 }, { "epoch": 1.21, "learning_rate": 7.244421563062409e-06, "loss": 5.0117, "step": 275500 }, { "epoch": 1.21, "learning_rate": 7.238921230842344e-06, "loss": 4.9886, "step": 276000 }, { "epoch": 1.21, "learning_rate": 7.233420898622277e-06, "loss": 4.9858, "step": 276500 }, { "epoch": 1.21, "learning_rate": 7.227920566402211e-06, "loss": 4.9469, "step": 277000 }, { "epoch": 1.21, "learning_rate": 7.222420234182145e-06, "loss": 4.9754, "step": 277500 }, { "epoch": 1.22, "learning_rate": 7.21691990196208e-06, "loss": 4.9809, "step": 278000 }, { "epoch": 1.22, "learning_rate": 7.211419569742013e-06, "loss": 4.9723, "step": 278500 }, { "epoch": 1.22, "learning_rate": 7.205919237521947e-06, "loss": 4.9464, "step": 279000 }, { "epoch": 1.22, "learning_rate": 7.200418905301882e-06, "loss": 4.9646, "step": 279500 }, { "epoch": 1.23, "learning_rate": 7.194918573081815e-06, "loss": 4.9474, "step": 280000 }, { "epoch": 1.23, "learning_rate": 7.189418240861749e-06, "loss": 4.9361, "step": 280500 }, { "epoch": 1.23, "learning_rate": 7.183917908641682e-06, "loss": 4.9507, "step": 281000 }, { "epoch": 1.23, "learning_rate": 7.1784175764216166e-06, "loss": 4.9352, "step": 281500 }, { "epoch": 1.23, "learning_rate": 7.1729172442015505e-06, "loss": 4.9218, "step": 282000 }, { "epoch": 1.24, "learning_rate": 7.167416911981484e-06, "loss": 4.9262, "step": 282500 }, { "epoch": 1.24, "learning_rate": 7.1619165797614175e-06, "loss": 4.927, "step": 283000 }, { "epoch": 1.24, "learning_rate": 7.156416247541352e-06, "loss": 4.9142, "step": 283500 }, { "epoch": 1.24, "learning_rate": 7.150915915321286e-06, "loss": 4.925, "step": 284000 }, { "epoch": 1.25, "learning_rate": 7.145415583101219e-06, "loss": 4.903, "step": 284500 }, { "epoch": 1.25, "learning_rate": 7.139915250881154e-06, "loss": 4.9167, "step": 285000 }, { "epoch": 1.25, "learning_rate": 7.134414918661088e-06, "loss": 4.8977, "step": 285500 }, { "epoch": 1.25, "learning_rate": 7.128914586441021e-06, "loss": 4.8975, "step": 286000 }, { "epoch": 1.25, "learning_rate": 7.123414254220955e-06, "loss": 4.9021, "step": 286500 }, { "epoch": 1.26, "learning_rate": 7.11791392200089e-06, "loss": 4.8819, "step": 287000 }, { "epoch": 1.26, "learning_rate": 7.112413589780824e-06, "loss": 4.8766, "step": 287500 }, { "epoch": 1.26, "learning_rate": 7.106913257560757e-06, "loss": 4.8668, "step": 288000 }, { "epoch": 1.26, "learning_rate": 7.1014129253406916e-06, "loss": 4.883, "step": 288500 }, { "epoch": 1.26, "learning_rate": 7.0959125931206255e-06, "loss": 4.8537, "step": 289000 }, { "epoch": 1.27, "learning_rate": 7.0904122609005586e-06, "loss": 4.8567, "step": 289500 }, { "epoch": 1.27, "learning_rate": 7.0849119286804925e-06, "loss": 4.862, "step": 290000 }, { "epoch": 1.27, "learning_rate": 7.079411596460427e-06, "loss": 4.8569, "step": 290500 }, { "epoch": 1.27, "learning_rate": 7.07391126424036e-06, "loss": 4.8463, "step": 291000 }, { "epoch": 1.28, "learning_rate": 7.068410932020294e-06, "loss": 4.8474, "step": 291500 }, { "epoch": 1.28, "learning_rate": 7.062910599800229e-06, "loss": 4.8524, "step": 292000 }, { "epoch": 1.28, "learning_rate": 7.057410267580163e-06, "loss": 4.8418, "step": 292500 }, { "epoch": 1.28, "learning_rate": 7.051909935360096e-06, "loss": 4.8377, "step": 293000 }, { "epoch": 1.28, "learning_rate": 7.04640960314003e-06, "loss": 4.8286, "step": 293500 }, { "epoch": 1.29, "learning_rate": 7.040909270919965e-06, "loss": 4.8111, "step": 294000 }, { "epoch": 1.29, "learning_rate": 7.035408938699898e-06, "loss": 4.827, "step": 294500 }, { "epoch": 1.29, "learning_rate": 7.029908606479832e-06, "loss": 4.8078, "step": 295000 }, { "epoch": 1.29, "learning_rate": 7.0244082742597666e-06, "loss": 4.7961, "step": 295500 }, { "epoch": 1.3, "learning_rate": 7.0189079420397e-06, "loss": 4.7923, "step": 296000 }, { "epoch": 1.3, "learning_rate": 7.0134076098196336e-06, "loss": 4.7849, "step": 296500 }, { "epoch": 1.3, "learning_rate": 7.0079072775995675e-06, "loss": 4.7861, "step": 297000 }, { "epoch": 1.3, "learning_rate": 7.0024069453795014e-06, "loss": 4.7816, "step": 297500 }, { "epoch": 1.3, "learning_rate": 6.996906613159435e-06, "loss": 4.7775, "step": 298000 }, { "epoch": 1.31, "learning_rate": 6.991406280939369e-06, "loss": 4.7626, "step": 298500 }, { "epoch": 1.31, "learning_rate": 6.985905948719304e-06, "loss": 4.7622, "step": 299000 }, { "epoch": 1.31, "learning_rate": 6.980405616499237e-06, "loss": 4.7791, "step": 299500 }, { "epoch": 1.31, "learning_rate": 6.974905284279171e-06, "loss": 4.7738, "step": 300000 }, { "epoch": 1.32, "learning_rate": 6.969404952059104e-06, "loss": 4.7706, "step": 300500 }, { "epoch": 1.32, "learning_rate": 6.963904619839039e-06, "loss": 4.7621, "step": 301000 }, { "epoch": 1.32, "learning_rate": 6.958404287618973e-06, "loss": 4.7464, "step": 301500 }, { "epoch": 1.32, "learning_rate": 6.952903955398907e-06, "loss": 4.7444, "step": 302000 }, { "epoch": 1.32, "learning_rate": 6.947403623178841e-06, "loss": 4.7462, "step": 302500 }, { "epoch": 1.33, "learning_rate": 6.941903290958775e-06, "loss": 4.743, "step": 303000 }, { "epoch": 1.33, "learning_rate": 6.9364029587387086e-06, "loss": 4.733, "step": 303500 }, { "epoch": 1.33, "learning_rate": 6.930902626518642e-06, "loss": 4.7404, "step": 304000 }, { "epoch": 1.33, "learning_rate": 6.9254022942985764e-06, "loss": 4.7168, "step": 304500 }, { "epoch": 1.33, "learning_rate": 6.91990196207851e-06, "loss": 4.725, "step": 305000 }, { "epoch": 1.34, "learning_rate": 6.9144016298584434e-06, "loss": 4.7023, "step": 305500 }, { "epoch": 1.34, "learning_rate": 6.908901297638378e-06, "loss": 4.724, "step": 306000 }, { "epoch": 1.34, "learning_rate": 6.903400965418312e-06, "loss": 4.6999, "step": 306500 }, { "epoch": 1.34, "learning_rate": 6.897900633198246e-06, "loss": 4.7117, "step": 307000 }, { "epoch": 1.35, "learning_rate": 6.892400300978179e-06, "loss": 4.6932, "step": 307500 }, { "epoch": 1.35, "learning_rate": 6.886899968758114e-06, "loss": 4.7022, "step": 308000 }, { "epoch": 1.35, "learning_rate": 6.881399636538048e-06, "loss": 4.6799, "step": 308500 }, { "epoch": 1.35, "learning_rate": 6.875899304317981e-06, "loss": 4.6824, "step": 309000 }, { "epoch": 1.35, "learning_rate": 6.870398972097915e-06, "loss": 4.6786, "step": 309500 }, { "epoch": 1.36, "learning_rate": 6.86489863987785e-06, "loss": 4.6678, "step": 310000 }, { "epoch": 1.36, "learning_rate": 6.859398307657783e-06, "loss": 4.6949, "step": 310500 }, { "epoch": 1.36, "learning_rate": 6.853897975437717e-06, "loss": 4.6843, "step": 311000 }, { "epoch": 1.36, "learning_rate": 6.8483976432176514e-06, "loss": 4.6726, "step": 311500 }, { "epoch": 1.37, "learning_rate": 6.8428973109975845e-06, "loss": 4.6734, "step": 312000 }, { "epoch": 1.37, "learning_rate": 6.8373969787775184e-06, "loss": 4.6867, "step": 312500 }, { "epoch": 1.37, "learning_rate": 6.831896646557452e-06, "loss": 4.6817, "step": 313000 }, { "epoch": 1.37, "learning_rate": 6.826396314337387e-06, "loss": 4.6662, "step": 313500 }, { "epoch": 1.37, "learning_rate": 6.82089598211732e-06, "loss": 4.6444, "step": 314000 }, { "epoch": 1.38, "learning_rate": 6.815395649897254e-06, "loss": 4.6522, "step": 314500 }, { "epoch": 1.38, "learning_rate": 6.809895317677189e-06, "loss": 4.6509, "step": 315000 }, { "epoch": 1.38, "learning_rate": 6.804394985457122e-06, "loss": 4.656, "step": 315500 }, { "epoch": 1.38, "learning_rate": 6.798894653237056e-06, "loss": 4.6498, "step": 316000 }, { "epoch": 1.39, "learning_rate": 6.79339432101699e-06, "loss": 4.6359, "step": 316500 }, { "epoch": 1.39, "learning_rate": 6.787893988796924e-06, "loss": 4.6461, "step": 317000 }, { "epoch": 1.39, "learning_rate": 6.782393656576858e-06, "loss": 4.6279, "step": 317500 }, { "epoch": 1.39, "learning_rate": 6.776893324356792e-06, "loss": 4.6282, "step": 318000 }, { "epoch": 1.39, "learning_rate": 6.7713929921367264e-06, "loss": 4.6195, "step": 318500 }, { "epoch": 1.4, "learning_rate": 6.7658926599166595e-06, "loss": 4.6142, "step": 319000 }, { "epoch": 1.4, "learning_rate": 6.7603923276965934e-06, "loss": 4.6072, "step": 319500 }, { "epoch": 1.4, "learning_rate": 6.7548919954765265e-06, "loss": 4.6094, "step": 320000 }, { "epoch": 1.4, "learning_rate": 6.749391663256461e-06, "loss": 4.5979, "step": 320500 }, { "epoch": 1.4, "learning_rate": 6.743891331036395e-06, "loss": 4.6058, "step": 321000 }, { "epoch": 1.41, "learning_rate": 6.738390998816329e-06, "loss": 4.6106, "step": 321500 }, { "epoch": 1.41, "learning_rate": 6.732890666596263e-06, "loss": 4.5854, "step": 322000 }, { "epoch": 1.41, "learning_rate": 6.727390334376197e-06, "loss": 4.5927, "step": 322500 }, { "epoch": 1.41, "learning_rate": 6.721890002156131e-06, "loss": 4.5851, "step": 323000 }, { "epoch": 1.42, "learning_rate": 6.716389669936064e-06, "loss": 4.6067, "step": 323500 }, { "epoch": 1.42, "learning_rate": 6.710889337715999e-06, "loss": 4.5957, "step": 324000 }, { "epoch": 1.42, "learning_rate": 6.705389005495933e-06, "loss": 4.5881, "step": 324500 }, { "epoch": 1.42, "learning_rate": 6.699888673275866e-06, "loss": 4.5913, "step": 325000 }, { "epoch": 1.42, "learning_rate": 6.694388341055801e-06, "loss": 4.5669, "step": 325500 }, { "epoch": 1.43, "learning_rate": 6.6888880088357345e-06, "loss": 4.5722, "step": 326000 }, { "epoch": 1.43, "learning_rate": 6.683387676615668e-06, "loss": 4.5706, "step": 326500 }, { "epoch": 1.43, "learning_rate": 6.6778873443956015e-06, "loss": 4.5651, "step": 327000 }, { "epoch": 1.43, "learning_rate": 6.672387012175536e-06, "loss": 4.5625, "step": 327500 }, { "epoch": 1.44, "learning_rate": 6.66688667995547e-06, "loss": 4.5714, "step": 328000 }, { "epoch": 1.44, "learning_rate": 6.661386347735403e-06, "loss": 4.5521, "step": 328500 }, { "epoch": 1.44, "learning_rate": 6.655886015515338e-06, "loss": 4.5595, "step": 329000 }, { "epoch": 1.44, "learning_rate": 6.650385683295272e-06, "loss": 4.5633, "step": 329500 }, { "epoch": 1.44, "learning_rate": 6.644885351075205e-06, "loss": 4.5584, "step": 330000 }, { "epoch": 1.45, "learning_rate": 6.639385018855139e-06, "loss": 4.5715, "step": 330500 }, { "epoch": 1.45, "learning_rate": 6.633884686635074e-06, "loss": 4.5522, "step": 331000 }, { "epoch": 1.45, "learning_rate": 6.628384354415007e-06, "loss": 4.5238, "step": 331500 }, { "epoch": 1.45, "learning_rate": 6.622884022194941e-06, "loss": 4.5435, "step": 332000 }, { "epoch": 1.46, "learning_rate": 6.617383689974876e-06, "loss": 4.5146, "step": 332500 }, { "epoch": 1.46, "learning_rate": 6.6118833577548095e-06, "loss": 4.5555, "step": 333000 }, { "epoch": 1.46, "learning_rate": 6.606383025534743e-06, "loss": 4.5224, "step": 333500 }, { "epoch": 1.46, "learning_rate": 6.6008826933146765e-06, "loss": 4.5254, "step": 334000 }, { "epoch": 1.46, "learning_rate": 6.595382361094611e-06, "loss": 4.5067, "step": 334500 }, { "epoch": 1.47, "learning_rate": 6.589882028874544e-06, "loss": 4.5127, "step": 335000 }, { "epoch": 1.47, "learning_rate": 6.584381696654478e-06, "loss": 4.5133, "step": 335500 }, { "epoch": 1.47, "learning_rate": 6.578881364434412e-06, "loss": 4.5143, "step": 336000 }, { "epoch": 1.47, "learning_rate": 6.573381032214346e-06, "loss": 4.4947, "step": 336500 }, { "epoch": 1.47, "learning_rate": 6.56788069999428e-06, "loss": 4.4982, "step": 337000 }, { "epoch": 1.48, "learning_rate": 6.562380367774214e-06, "loss": 4.5174, "step": 337500 }, { "epoch": 1.48, "learning_rate": 6.556880035554149e-06, "loss": 4.4794, "step": 338000 }, { "epoch": 1.48, "learning_rate": 6.551379703334082e-06, "loss": 4.5072, "step": 338500 }, { "epoch": 1.48, "learning_rate": 6.545879371114016e-06, "loss": 4.486, "step": 339000 }, { "epoch": 1.49, "learning_rate": 6.540379038893949e-06, "loss": 4.4656, "step": 339500 }, { "epoch": 1.49, "learning_rate": 6.534878706673884e-06, "loss": 4.5007, "step": 340000 }, { "epoch": 1.49, "learning_rate": 6.529378374453818e-06, "loss": 4.4965, "step": 340500 }, { "epoch": 1.49, "learning_rate": 6.523878042233751e-06, "loss": 4.4591, "step": 341000 }, { "epoch": 1.49, "learning_rate": 6.5183777100136855e-06, "loss": 4.4768, "step": 341500 }, { "epoch": 1.5, "learning_rate": 6.512877377793619e-06, "loss": 4.477, "step": 342000 }, { "epoch": 1.5, "learning_rate": 6.507377045573553e-06, "loss": 4.4734, "step": 342500 }, { "epoch": 1.5, "learning_rate": 6.501876713353486e-06, "loss": 4.4715, "step": 343000 }, { "epoch": 1.5, "learning_rate": 6.496376381133421e-06, "loss": 4.4744, "step": 343500 }, { "epoch": 1.51, "learning_rate": 6.490876048913355e-06, "loss": 4.4673, "step": 344000 }, { "epoch": 1.51, "learning_rate": 6.485375716693288e-06, "loss": 4.4569, "step": 344500 }, { "epoch": 1.51, "learning_rate": 6.479875384473223e-06, "loss": 4.4386, "step": 345000 }, { "epoch": 1.51, "learning_rate": 6.474375052253157e-06, "loss": 4.46, "step": 345500 }, { "epoch": 1.51, "learning_rate": 6.46887472003309e-06, "loss": 4.4314, "step": 346000 }, { "epoch": 1.52, "learning_rate": 6.463374387813024e-06, "loss": 4.4401, "step": 346500 }, { "epoch": 1.52, "learning_rate": 6.457874055592959e-06, "loss": 4.4454, "step": 347000 }, { "epoch": 1.52, "learning_rate": 6.452373723372893e-06, "loss": 4.4322, "step": 347500 }, { "epoch": 1.52, "learning_rate": 6.446873391152826e-06, "loss": 4.4496, "step": 348000 }, { "epoch": 1.53, "learning_rate": 6.4413730589327605e-06, "loss": 4.4398, "step": 348500 }, { "epoch": 1.53, "learning_rate": 6.435872726712694e-06, "loss": 4.4377, "step": 349000 }, { "epoch": 1.53, "learning_rate": 6.4303723944926275e-06, "loss": 4.4474, "step": 349500 }, { "epoch": 1.53, "learning_rate": 6.424872062272561e-06, "loss": 4.4255, "step": 350000 }, { "epoch": 1.53, "learning_rate": 6.419371730052496e-06, "loss": 4.4139, "step": 350500 }, { "epoch": 1.54, "learning_rate": 6.413871397832429e-06, "loss": 4.4063, "step": 351000 }, { "epoch": 1.54, "learning_rate": 6.408371065612363e-06, "loss": 4.4205, "step": 351500 }, { "epoch": 1.54, "learning_rate": 6.402870733392298e-06, "loss": 4.4158, "step": 352000 }, { "epoch": 1.54, "learning_rate": 6.397370401172232e-06, "loss": 4.4186, "step": 352500 }, { "epoch": 1.54, "learning_rate": 6.391870068952165e-06, "loss": 4.4078, "step": 353000 }, { "epoch": 1.55, "learning_rate": 6.386369736732099e-06, "loss": 4.3984, "step": 353500 }, { "epoch": 1.55, "learning_rate": 6.380869404512034e-06, "loss": 4.4146, "step": 354000 }, { "epoch": 1.55, "learning_rate": 6.375369072291967e-06, "loss": 4.4225, "step": 354500 }, { "epoch": 1.55, "learning_rate": 6.369868740071901e-06, "loss": 4.4101, "step": 355000 }, { "epoch": 1.56, "learning_rate": 6.3643684078518355e-06, "loss": 4.3992, "step": 355500 }, { "epoch": 1.56, "learning_rate": 6.3588680756317686e-06, "loss": 4.4008, "step": 356000 }, { "epoch": 1.56, "learning_rate": 6.3533677434117025e-06, "loss": 4.4077, "step": 356500 }, { "epoch": 1.56, "learning_rate": 6.347867411191636e-06, "loss": 4.3789, "step": 357000 }, { "epoch": 1.56, "learning_rate": 6.342367078971571e-06, "loss": 4.3914, "step": 357500 }, { "epoch": 1.57, "learning_rate": 6.336866746751504e-06, "loss": 4.3788, "step": 358000 }, { "epoch": 1.57, "learning_rate": 6.331366414531438e-06, "loss": 4.3638, "step": 358500 }, { "epoch": 1.57, "learning_rate": 6.325866082311373e-06, "loss": 4.3766, "step": 359000 }, { "epoch": 1.57, "learning_rate": 6.320365750091306e-06, "loss": 4.3919, "step": 359500 }, { "epoch": 1.58, "learning_rate": 6.31486541787124e-06, "loss": 4.3809, "step": 360000 }, { "epoch": 1.58, "learning_rate": 6.309365085651173e-06, "loss": 4.3673, "step": 360500 }, { "epoch": 1.58, "learning_rate": 6.303864753431108e-06, "loss": 4.3655, "step": 361000 }, { "epoch": 1.58, "learning_rate": 6.298364421211042e-06, "loss": 4.3688, "step": 361500 }, { "epoch": 1.58, "learning_rate": 6.292864088990976e-06, "loss": 4.3605, "step": 362000 }, { "epoch": 1.59, "learning_rate": 6.287363756770909e-06, "loss": 4.3698, "step": 362500 }, { "epoch": 1.59, "learning_rate": 6.2818634245508436e-06, "loss": 4.3801, "step": 363000 }, { "epoch": 1.59, "learning_rate": 6.2763630923307775e-06, "loss": 4.35, "step": 363500 }, { "epoch": 1.59, "learning_rate": 6.2708627601107106e-06, "loss": 4.3594, "step": 364000 }, { "epoch": 1.6, "learning_rate": 6.265362427890645e-06, "loss": 4.3559, "step": 364500 }, { "epoch": 1.6, "learning_rate": 6.259862095670579e-06, "loss": 4.3507, "step": 365000 }, { "epoch": 1.6, "learning_rate": 6.254361763450512e-06, "loss": 4.3555, "step": 365500 }, { "epoch": 1.6, "learning_rate": 6.248861431230446e-06, "loss": 4.3418, "step": 366000 }, { "epoch": 1.6, "learning_rate": 6.243361099010381e-06, "loss": 4.354, "step": 366500 }, { "epoch": 1.61, "learning_rate": 6.237860766790315e-06, "loss": 4.3442, "step": 367000 }, { "epoch": 1.61, "learning_rate": 6.232360434570248e-06, "loss": 4.3484, "step": 367500 }, { "epoch": 1.61, "learning_rate": 6.226860102350183e-06, "loss": 4.3402, "step": 368000 }, { "epoch": 1.61, "learning_rate": 6.221359770130117e-06, "loss": 4.3284, "step": 368500 }, { "epoch": 1.61, "learning_rate": 6.21585943791005e-06, "loss": 4.3331, "step": 369000 }, { "epoch": 1.62, "learning_rate": 6.210359105689984e-06, "loss": 4.3404, "step": 369500 }, { "epoch": 1.62, "learning_rate": 6.2048587734699186e-06, "loss": 4.3379, "step": 370000 }, { "epoch": 1.62, "learning_rate": 6.199358441249852e-06, "loss": 4.3199, "step": 370500 }, { "epoch": 1.62, "learning_rate": 6.1938581090297856e-06, "loss": 4.3336, "step": 371000 }, { "epoch": 1.63, "learning_rate": 6.18835777680972e-06, "loss": 4.3294, "step": 371500 }, { "epoch": 1.63, "learning_rate": 6.182857444589654e-06, "loss": 4.3154, "step": 372000 }, { "epoch": 1.63, "learning_rate": 6.177357112369587e-06, "loss": 4.3203, "step": 372500 }, { "epoch": 1.63, "learning_rate": 6.171856780149521e-06, "loss": 4.3223, "step": 373000 }, { "epoch": 1.63, "learning_rate": 6.166356447929456e-06, "loss": 4.3287, "step": 373500 }, { "epoch": 1.64, "learning_rate": 6.160856115709389e-06, "loss": 4.3136, "step": 374000 }, { "epoch": 1.64, "learning_rate": 6.155355783489323e-06, "loss": 4.3073, "step": 374500 }, { "epoch": 1.64, "learning_rate": 6.149855451269258e-06, "loss": 4.3099, "step": 375000 }, { "epoch": 1.64, "learning_rate": 6.144355119049191e-06, "loss": 4.3075, "step": 375500 }, { "epoch": 1.65, "learning_rate": 6.138854786829125e-06, "loss": 4.315, "step": 376000 }, { "epoch": 1.65, "learning_rate": 6.133354454609059e-06, "loss": 4.3127, "step": 376500 }, { "epoch": 1.65, "learning_rate": 6.127854122388993e-06, "loss": 4.2933, "step": 377000 }, { "epoch": 1.65, "learning_rate": 6.122353790168927e-06, "loss": 4.2924, "step": 377500 }, { "epoch": 1.65, "learning_rate": 6.116853457948861e-06, "loss": 4.2817, "step": 378000 }, { "epoch": 1.66, "learning_rate": 6.111353125728795e-06, "loss": 4.3183, "step": 378500 }, { "epoch": 1.66, "learning_rate": 6.1058527935087284e-06, "loss": 4.3031, "step": 379000 }, { "epoch": 1.66, "learning_rate": 6.100352461288662e-06, "loss": 4.2815, "step": 379500 }, { "epoch": 1.66, "learning_rate": 6.0948521290685954e-06, "loss": 4.2801, "step": 380000 }, { "epoch": 1.67, "learning_rate": 6.08935179684853e-06, "loss": 4.2856, "step": 380500 }, { "epoch": 1.67, "learning_rate": 6.083851464628464e-06, "loss": 4.2868, "step": 381000 }, { "epoch": 1.67, "learning_rate": 6.078351132408398e-06, "loss": 4.2638, "step": 381500 }, { "epoch": 1.67, "learning_rate": 6.072850800188332e-06, "loss": 4.2782, "step": 382000 }, { "epoch": 1.67, "learning_rate": 6.067350467968266e-06, "loss": 4.2894, "step": 382500 }, { "epoch": 1.68, "learning_rate": 6.0618501357482e-06, "loss": 4.256, "step": 383000 }, { "epoch": 1.68, "learning_rate": 6.056349803528133e-06, "loss": 4.2703, "step": 383500 }, { "epoch": 1.68, "learning_rate": 6.050849471308068e-06, "loss": 4.2549, "step": 384000 }, { "epoch": 1.68, "learning_rate": 6.045349139088002e-06, "loss": 4.2678, "step": 384500 }, { "epoch": 1.68, "learning_rate": 6.039848806867935e-06, "loss": 4.2549, "step": 385000 }, { "epoch": 1.69, "learning_rate": 6.0343484746478695e-06, "loss": 4.2614, "step": 385500 }, { "epoch": 1.69, "learning_rate": 6.0288481424278034e-06, "loss": 4.2745, "step": 386000 }, { "epoch": 1.69, "learning_rate": 6.023347810207737e-06, "loss": 4.2644, "step": 386500 }, { "epoch": 1.69, "learning_rate": 6.0178474779876705e-06, "loss": 4.2588, "step": 387000 }, { "epoch": 1.7, "learning_rate": 6.012347145767605e-06, "loss": 4.2519, "step": 387500 }, { "epoch": 1.7, "learning_rate": 6.006846813547539e-06, "loss": 4.2536, "step": 388000 }, { "epoch": 1.7, "learning_rate": 6.001346481327472e-06, "loss": 4.2446, "step": 388500 }, { "epoch": 1.7, "learning_rate": 5.995846149107406e-06, "loss": 4.2396, "step": 389000 }, { "epoch": 1.7, "learning_rate": 5.990345816887341e-06, "loss": 4.2419, "step": 389500 }, { "epoch": 1.71, "learning_rate": 5.984845484667274e-06, "loss": 4.2453, "step": 390000 }, { "epoch": 1.71, "learning_rate": 5.979345152447208e-06, "loss": 4.2507, "step": 390500 }, { "epoch": 1.71, "learning_rate": 5.973844820227143e-06, "loss": 4.2294, "step": 391000 }, { "epoch": 1.71, "learning_rate": 5.968344488007076e-06, "loss": 4.2284, "step": 391500 }, { "epoch": 1.72, "learning_rate": 5.96284415578701e-06, "loss": 4.231, "step": 392000 }, { "epoch": 1.72, "learning_rate": 5.957343823566944e-06, "loss": 4.2395, "step": 392500 }, { "epoch": 1.72, "learning_rate": 5.9518434913468784e-06, "loss": 4.2113, "step": 393000 }, { "epoch": 1.72, "learning_rate": 5.9463431591268115e-06, "loss": 4.2103, "step": 393500 }, { "epoch": 1.72, "learning_rate": 5.9408428269067455e-06, "loss": 4.2229, "step": 394000 }, { "epoch": 1.73, "learning_rate": 5.93534249468668e-06, "loss": 4.222, "step": 394500 }, { "epoch": 1.73, "learning_rate": 5.929842162466613e-06, "loss": 4.2092, "step": 395000 }, { "epoch": 1.73, "learning_rate": 5.924341830246547e-06, "loss": 4.2056, "step": 395500 }, { "epoch": 1.73, "learning_rate": 5.918841498026481e-06, "loss": 4.2295, "step": 396000 }, { "epoch": 1.74, "learning_rate": 5.913341165806415e-06, "loss": 4.2, "step": 396500 }, { "epoch": 1.74, "learning_rate": 5.907840833586349e-06, "loss": 4.2339, "step": 397000 }, { "epoch": 1.74, "learning_rate": 5.902340501366283e-06, "loss": 4.2158, "step": 397500 }, { "epoch": 1.74, "learning_rate": 5.896840169146218e-06, "loss": 4.2161, "step": 398000 }, { "epoch": 1.74, "learning_rate": 5.891339836926151e-06, "loss": 4.2005, "step": 398500 }, { "epoch": 1.75, "learning_rate": 5.885839504706085e-06, "loss": 4.2143, "step": 399000 }, { "epoch": 1.75, "learning_rate": 5.880339172486018e-06, "loss": 4.2276, "step": 399500 }, { "epoch": 1.75, "learning_rate": 5.874838840265953e-06, "loss": 4.2186, "step": 400000 }, { "epoch": 1.75, "learning_rate": 5.8693385080458865e-06, "loss": 4.1812, "step": 400500 }, { "epoch": 1.75, "learning_rate": 5.8638381758258205e-06, "loss": 4.2175, "step": 401000 }, { "epoch": 1.76, "learning_rate": 5.858337843605754e-06, "loss": 4.1985, "step": 401500 }, { "epoch": 1.76, "learning_rate": 5.852837511385688e-06, "loss": 4.1935, "step": 402000 }, { "epoch": 1.76, "learning_rate": 5.847337179165622e-06, "loss": 4.1974, "step": 402500 }, { "epoch": 1.76, "learning_rate": 5.841836846945555e-06, "loss": 4.2105, "step": 403000 }, { "epoch": 1.77, "learning_rate": 5.83633651472549e-06, "loss": 4.2008, "step": 403500 }, { "epoch": 1.77, "learning_rate": 5.830836182505424e-06, "loss": 4.1886, "step": 404000 }, { "epoch": 1.77, "learning_rate": 5.825335850285357e-06, "loss": 4.1874, "step": 404500 }, { "epoch": 1.77, "learning_rate": 5.819835518065292e-06, "loss": 4.1874, "step": 405000 }, { "epoch": 1.77, "learning_rate": 5.814335185845226e-06, "loss": 4.1816, "step": 405500 }, { "epoch": 1.78, "learning_rate": 5.808834853625159e-06, "loss": 4.1745, "step": 406000 }, { "epoch": 1.78, "learning_rate": 5.803334521405093e-06, "loss": 4.1802, "step": 406500 }, { "epoch": 1.78, "learning_rate": 5.797834189185028e-06, "loss": 4.1832, "step": 407000 }, { "epoch": 1.78, "learning_rate": 5.7923338569649615e-06, "loss": 4.1693, "step": 407500 }, { "epoch": 1.79, "learning_rate": 5.786833524744895e-06, "loss": 4.1977, "step": 408000 }, { "epoch": 1.79, "learning_rate": 5.781333192524829e-06, "loss": 4.1706, "step": 408500 }, { "epoch": 1.79, "learning_rate": 5.775832860304763e-06, "loss": 4.1828, "step": 409000 }, { "epoch": 1.79, "learning_rate": 5.770332528084696e-06, "loss": 4.1706, "step": 409500 }, { "epoch": 1.79, "learning_rate": 5.76483219586463e-06, "loss": 4.154, "step": 410000 }, { "epoch": 1.8, "learning_rate": 5.759331863644565e-06, "loss": 4.1507, "step": 410500 }, { "epoch": 1.8, "learning_rate": 5.753831531424498e-06, "loss": 4.1611, "step": 411000 }, { "epoch": 1.8, "learning_rate": 5.748331199204432e-06, "loss": 4.1719, "step": 411500 }, { "epoch": 1.8, "learning_rate": 5.742830866984367e-06, "loss": 4.1487, "step": 412000 }, { "epoch": 1.81, "learning_rate": 5.737330534764301e-06, "loss": 4.1684, "step": 412500 }, { "epoch": 1.81, "learning_rate": 5.731830202544234e-06, "loss": 4.1549, "step": 413000 }, { "epoch": 1.81, "learning_rate": 5.726329870324168e-06, "loss": 4.1765, "step": 413500 }, { "epoch": 1.81, "learning_rate": 5.720829538104103e-06, "loss": 4.1499, "step": 414000 }, { "epoch": 1.81, "learning_rate": 5.715329205884036e-06, "loss": 4.1573, "step": 414500 }, { "epoch": 1.82, "learning_rate": 5.70982887366397e-06, "loss": 4.1644, "step": 415000 }, { "epoch": 1.82, "learning_rate": 5.7043285414439035e-06, "loss": 4.1414, "step": 415500 }, { "epoch": 1.82, "learning_rate": 5.6988282092238375e-06, "loss": 4.1282, "step": 416000 }, { "epoch": 1.82, "learning_rate": 5.693327877003771e-06, "loss": 4.1548, "step": 416500 }, { "epoch": 1.82, "learning_rate": 5.687827544783705e-06, "loss": 4.1391, "step": 417000 }, { "epoch": 1.83, "learning_rate": 5.68232721256364e-06, "loss": 4.1278, "step": 417500 }, { "epoch": 1.83, "learning_rate": 5.676826880343573e-06, "loss": 4.1243, "step": 418000 }, { "epoch": 1.83, "learning_rate": 5.671326548123507e-06, "loss": 4.1298, "step": 418500 }, { "epoch": 1.83, "learning_rate": 5.66582621590344e-06, "loss": 4.1463, "step": 419000 }, { "epoch": 1.84, "learning_rate": 5.660325883683375e-06, "loss": 4.1488, "step": 419500 }, { "epoch": 1.84, "learning_rate": 5.654825551463309e-06, "loss": 4.1529, "step": 420000 }, { "epoch": 1.84, "learning_rate": 5.649325219243242e-06, "loss": 4.1406, "step": 420500 }, { "epoch": 1.84, "learning_rate": 5.643824887023177e-06, "loss": 4.1463, "step": 421000 }, { "epoch": 1.84, "learning_rate": 5.638324554803111e-06, "loss": 4.1317, "step": 421500 }, { "epoch": 1.85, "learning_rate": 5.632824222583045e-06, "loss": 4.1392, "step": 422000 }, { "epoch": 1.85, "learning_rate": 5.627323890362978e-06, "loss": 4.1185, "step": 422500 }, { "epoch": 1.85, "learning_rate": 5.6218235581429125e-06, "loss": 4.1239, "step": 423000 }, { "epoch": 1.85, "learning_rate": 5.616323225922846e-06, "loss": 4.128, "step": 423500 }, { "epoch": 1.86, "learning_rate": 5.6108228937027795e-06, "loss": 4.1302, "step": 424000 }, { "epoch": 1.86, "learning_rate": 5.605322561482714e-06, "loss": 4.1128, "step": 424500 }, { "epoch": 1.86, "learning_rate": 5.599822229262648e-06, "loss": 4.1167, "step": 425000 }, { "epoch": 1.86, "learning_rate": 5.594321897042581e-06, "loss": 4.1224, "step": 425500 }, { "epoch": 1.86, "learning_rate": 5.588821564822515e-06, "loss": 4.1151, "step": 426000 }, { "epoch": 1.87, "learning_rate": 5.58332123260245e-06, "loss": 4.1033, "step": 426500 }, { "epoch": 1.87, "learning_rate": 5.577820900382384e-06, "loss": 4.1025, "step": 427000 }, { "epoch": 1.87, "learning_rate": 5.572320568162317e-06, "loss": 4.1059, "step": 427500 }, { "epoch": 1.87, "learning_rate": 5.566820235942252e-06, "loss": 4.1141, "step": 428000 }, { "epoch": 1.88, "learning_rate": 5.561319903722186e-06, "loss": 4.1059, "step": 428500 }, { "epoch": 1.88, "learning_rate": 5.555819571502119e-06, "loss": 4.1238, "step": 429000 }, { "epoch": 1.88, "learning_rate": 5.550319239282053e-06, "loss": 4.1249, "step": 429500 }, { "epoch": 1.88, "learning_rate": 5.5448189070619875e-06, "loss": 4.0975, "step": 430000 }, { "epoch": 1.88, "learning_rate": 5.5393185748419206e-06, "loss": 4.0973, "step": 430500 }, { "epoch": 1.89, "learning_rate": 5.5338182426218545e-06, "loss": 4.1075, "step": 431000 }, { "epoch": 1.89, "learning_rate": 5.528317910401789e-06, "loss": 4.0997, "step": 431500 }, { "epoch": 1.89, "learning_rate": 5.522817578181723e-06, "loss": 4.1091, "step": 432000 }, { "epoch": 1.89, "learning_rate": 5.517317245961656e-06, "loss": 4.1006, "step": 432500 }, { "epoch": 1.89, "learning_rate": 5.51181691374159e-06, "loss": 4.0814, "step": 433000 }, { "epoch": 1.9, "learning_rate": 5.506316581521525e-06, "loss": 4.0982, "step": 433500 }, { "epoch": 1.9, "learning_rate": 5.500816249301458e-06, "loss": 4.1054, "step": 434000 }, { "epoch": 1.9, "learning_rate": 5.495315917081392e-06, "loss": 4.1086, "step": 434500 }, { "epoch": 1.9, "learning_rate": 5.489815584861327e-06, "loss": 4.0927, "step": 435000 }, { "epoch": 1.91, "learning_rate": 5.48431525264126e-06, "loss": 4.0906, "step": 435500 }, { "epoch": 1.91, "learning_rate": 5.478814920421194e-06, "loss": 4.083, "step": 436000 }, { "epoch": 1.91, "learning_rate": 5.473314588201128e-06, "loss": 4.0806, "step": 436500 }, { "epoch": 1.91, "learning_rate": 5.4678142559810625e-06, "loss": 4.0919, "step": 437000 }, { "epoch": 1.91, "learning_rate": 5.4623139237609956e-06, "loss": 4.0932, "step": 437500 }, { "epoch": 1.92, "learning_rate": 5.4568135915409295e-06, "loss": 4.0956, "step": 438000 }, { "epoch": 1.92, "learning_rate": 5.451313259320864e-06, "loss": 4.0834, "step": 438500 }, { "epoch": 1.92, "learning_rate": 5.445812927100797e-06, "loss": 4.0945, "step": 439000 }, { "epoch": 1.92, "learning_rate": 5.440312594880731e-06, "loss": 4.0671, "step": 439500 }, { "epoch": 1.93, "learning_rate": 5.434812262660664e-06, "loss": 4.0632, "step": 440000 }, { "epoch": 1.93, "learning_rate": 5.429311930440599e-06, "loss": 4.0858, "step": 440500 }, { "epoch": 1.93, "learning_rate": 5.423811598220533e-06, "loss": 4.0743, "step": 441000 }, { "epoch": 1.93, "learning_rate": 5.418311266000467e-06, "loss": 4.0862, "step": 441500 }, { "epoch": 1.93, "learning_rate": 5.4128109337804e-06, "loss": 4.067, "step": 442000 }, { "epoch": 1.94, "learning_rate": 5.407310601560335e-06, "loss": 4.0933, "step": 442500 }, { "epoch": 1.94, "learning_rate": 5.401810269340269e-06, "loss": 4.0772, "step": 443000 }, { "epoch": 1.94, "learning_rate": 5.396309937120202e-06, "loss": 4.0608, "step": 443500 }, { "epoch": 1.94, "learning_rate": 5.390809604900137e-06, "loss": 4.0552, "step": 444000 }, { "epoch": 1.95, "learning_rate": 5.3853092726800706e-06, "loss": 4.0621, "step": 444500 }, { "epoch": 1.95, "learning_rate": 5.379808940460004e-06, "loss": 4.0642, "step": 445000 }, { "epoch": 1.95, "learning_rate": 5.374308608239938e-06, "loss": 4.0664, "step": 445500 }, { "epoch": 1.95, "learning_rate": 5.368808276019872e-06, "loss": 4.066, "step": 446000 }, { "epoch": 1.95, "learning_rate": 5.363307943799806e-06, "loss": 4.0523, "step": 446500 }, { "epoch": 1.96, "learning_rate": 5.357807611579739e-06, "loss": 4.0584, "step": 447000 }, { "epoch": 1.96, "learning_rate": 5.352307279359674e-06, "loss": 4.0603, "step": 447500 }, { "epoch": 1.96, "learning_rate": 5.346806947139608e-06, "loss": 4.0674, "step": 448000 }, { "epoch": 1.96, "learning_rate": 5.341306614919541e-06, "loss": 4.049, "step": 448500 }, { "epoch": 1.96, "learning_rate": 5.335806282699475e-06, "loss": 4.0774, "step": 449000 }, { "epoch": 1.97, "learning_rate": 5.33030595047941e-06, "loss": 4.0627, "step": 449500 }, { "epoch": 1.97, "learning_rate": 5.324805618259343e-06, "loss": 4.0462, "step": 450000 }, { "epoch": 1.97, "learning_rate": 5.319305286039277e-06, "loss": 4.0567, "step": 450500 }, { "epoch": 1.97, "learning_rate": 5.313804953819212e-06, "loss": 4.0407, "step": 451000 }, { "epoch": 1.98, "learning_rate": 5.3083046215991456e-06, "loss": 4.0465, "step": 451500 }, { "epoch": 1.98, "learning_rate": 5.302804289379079e-06, "loss": 4.0469, "step": 452000 }, { "epoch": 1.98, "learning_rate": 5.297303957159013e-06, "loss": 4.0454, "step": 452500 }, { "epoch": 1.98, "learning_rate": 5.291803624938947e-06, "loss": 4.0364, "step": 453000 }, { "epoch": 1.98, "learning_rate": 5.2863032927188804e-06, "loss": 4.0388, "step": 453500 }, { "epoch": 1.99, "learning_rate": 5.280802960498814e-06, "loss": 4.0361, "step": 454000 }, { "epoch": 1.99, "learning_rate": 5.275302628278749e-06, "loss": 4.0377, "step": 454500 }, { "epoch": 1.99, "learning_rate": 5.269802296058682e-06, "loss": 4.0349, "step": 455000 }, { "epoch": 1.99, "learning_rate": 5.264301963838616e-06, "loss": 4.0458, "step": 455500 }, { "epoch": 2.0, "learning_rate": 5.25880163161855e-06, "loss": 4.0532, "step": 456000 }, { "epoch": 2.0, "learning_rate": 5.253301299398484e-06, "loss": 4.0337, "step": 456500 }, { "epoch": 2.0, "learning_rate": 5.247800967178418e-06, "loss": 4.0266, "step": 457000 }, { "epoch": 2.0, "learning_rate": 5.242300634958352e-06, "loss": 4.012, "step": 457500 }, { "epoch": 2.0, "learning_rate": 5.236800302738287e-06, "loss": 4.0138, "step": 458000 }, { "epoch": 2.01, "learning_rate": 5.23129997051822e-06, "loss": 4.037, "step": 458500 }, { "epoch": 2.01, "learning_rate": 5.225799638298154e-06, "loss": 4.0339, "step": 459000 }, { "epoch": 2.01, "learning_rate": 5.220299306078087e-06, "loss": 4.0195, "step": 459500 }, { "epoch": 2.01, "learning_rate": 5.2147989738580215e-06, "loss": 4.0101, "step": 460000 }, { "epoch": 2.02, "learning_rate": 5.2092986416379554e-06, "loss": 4.021, "step": 460500 }, { "epoch": 2.02, "learning_rate": 5.203798309417889e-06, "loss": 4.0386, "step": 461000 }, { "epoch": 2.02, "learning_rate": 5.198297977197823e-06, "loss": 4.0007, "step": 461500 }, { "epoch": 2.02, "learning_rate": 5.192797644977757e-06, "loss": 4.0172, "step": 462000 }, { "epoch": 2.02, "learning_rate": 5.187297312757691e-06, "loss": 4.017, "step": 462500 }, { "epoch": 2.03, "learning_rate": 5.181796980537624e-06, "loss": 4.0018, "step": 463000 }, { "epoch": 2.03, "learning_rate": 5.176296648317559e-06, "loss": 4.017, "step": 463500 }, { "epoch": 2.03, "learning_rate": 5.170796316097493e-06, "loss": 4.0322, "step": 464000 }, { "epoch": 2.03, "learning_rate": 5.165295983877426e-06, "loss": 4.0057, "step": 464500 }, { "epoch": 2.03, "learning_rate": 5.159795651657361e-06, "loss": 4.017, "step": 465000 }, { "epoch": 2.04, "learning_rate": 5.154295319437295e-06, "loss": 4.0169, "step": 465500 }, { "epoch": 2.04, "learning_rate": 5.148794987217229e-06, "loss": 4.0138, "step": 466000 }, { "epoch": 2.04, "learning_rate": 5.143294654997162e-06, "loss": 4.0021, "step": 466500 }, { "epoch": 2.04, "learning_rate": 5.1377943227770965e-06, "loss": 4.0079, "step": 467000 }, { "epoch": 2.05, "learning_rate": 5.1322939905570304e-06, "loss": 3.9932, "step": 467500 }, { "epoch": 2.05, "learning_rate": 5.1267936583369635e-06, "loss": 4.0081, "step": 468000 }, { "epoch": 2.05, "learning_rate": 5.1212933261168975e-06, "loss": 3.9981, "step": 468500 }, { "epoch": 2.05, "learning_rate": 5.115792993896832e-06, "loss": 4.0078, "step": 469000 }, { "epoch": 2.05, "learning_rate": 5.110292661676765e-06, "loss": 3.9841, "step": 469500 }, { "epoch": 2.06, "learning_rate": 5.104792329456699e-06, "loss": 3.9979, "step": 470000 }, { "epoch": 2.06, "learning_rate": 5.099291997236634e-06, "loss": 3.9957, "step": 470500 }, { "epoch": 2.06, "learning_rate": 5.093791665016567e-06, "loss": 4.0029, "step": 471000 }, { "epoch": 2.06, "learning_rate": 5.088291332796501e-06, "loss": 3.9814, "step": 471500 }, { "epoch": 2.07, "learning_rate": 5.082791000576435e-06, "loss": 3.9898, "step": 472000 }, { "epoch": 2.07, "learning_rate": 5.07729066835637e-06, "loss": 3.9957, "step": 472500 }, { "epoch": 2.07, "learning_rate": 5.071790336136303e-06, "loss": 3.9726, "step": 473000 }, { "epoch": 2.07, "learning_rate": 5.066290003916237e-06, "loss": 4.0125, "step": 473500 }, { "epoch": 2.07, "learning_rate": 5.0607896716961715e-06, "loss": 3.9859, "step": 474000 }, { "epoch": 2.08, "learning_rate": 5.055289339476105e-06, "loss": 3.9817, "step": 474500 }, { "epoch": 2.08, "learning_rate": 5.0497890072560385e-06, "loss": 3.9765, "step": 475000 }, { "epoch": 2.08, "learning_rate": 5.0442886750359725e-06, "loss": 3.9675, "step": 475500 }, { "epoch": 2.08, "learning_rate": 5.038788342815906e-06, "loss": 3.9764, "step": 476000 }, { "epoch": 2.09, "learning_rate": 5.03328801059584e-06, "loss": 3.9743, "step": 476500 }, { "epoch": 2.09, "learning_rate": 5.027787678375774e-06, "loss": 3.9681, "step": 477000 }, { "epoch": 2.09, "learning_rate": 5.022287346155709e-06, "loss": 3.9895, "step": 477500 }, { "epoch": 2.09, "learning_rate": 5.016787013935642e-06, "loss": 3.9755, "step": 478000 }, { "epoch": 2.09, "learning_rate": 5.011286681715576e-06, "loss": 3.9612, "step": 478500 }, { "epoch": 2.1, "learning_rate": 5.005786349495509e-06, "loss": 3.9687, "step": 479000 }, { "epoch": 2.1, "learning_rate": 5.000286017275444e-06, "loss": 3.9685, "step": 479500 }, { "epoch": 2.1, "learning_rate": 4.994785685055378e-06, "loss": 3.9742, "step": 480000 }, { "epoch": 2.1, "learning_rate": 4.989285352835312e-06, "loss": 3.9636, "step": 480500 }, { "epoch": 2.1, "learning_rate": 4.983785020615246e-06, "loss": 3.9729, "step": 481000 }, { "epoch": 2.11, "learning_rate": 4.97828468839518e-06, "loss": 3.9758, "step": 481500 }, { "epoch": 2.11, "learning_rate": 4.9727843561751135e-06, "loss": 3.9642, "step": 482000 }, { "epoch": 2.11, "learning_rate": 4.9672840239550475e-06, "loss": 3.9607, "step": 482500 }, { "epoch": 2.11, "learning_rate": 4.961783691734981e-06, "loss": 3.9643, "step": 483000 }, { "epoch": 2.12, "learning_rate": 4.956283359514915e-06, "loss": 3.9637, "step": 483500 }, { "epoch": 2.12, "learning_rate": 4.950783027294849e-06, "loss": 3.9623, "step": 484000 }, { "epoch": 2.12, "learning_rate": 4.945282695074783e-06, "loss": 3.9623, "step": 484500 }, { "epoch": 2.12, "learning_rate": 4.939782362854717e-06, "loss": 3.9709, "step": 485000 }, { "epoch": 2.12, "learning_rate": 4.93428203063465e-06, "loss": 3.9395, "step": 485500 }, { "epoch": 2.13, "learning_rate": 4.928781698414585e-06, "loss": 3.9428, "step": 486000 }, { "epoch": 2.13, "learning_rate": 4.923281366194518e-06, "loss": 4.0036, "step": 486500 }, { "epoch": 2.13, "learning_rate": 4.917781033974453e-06, "loss": 3.9457, "step": 487000 }, { "epoch": 2.13, "learning_rate": 4.912280701754386e-06, "loss": 3.9635, "step": 487500 }, { "epoch": 2.14, "learning_rate": 4.90678036953432e-06, "loss": 3.954, "step": 488000 }, { "epoch": 2.14, "learning_rate": 4.901280037314255e-06, "loss": 3.9367, "step": 488500 }, { "epoch": 2.14, "learning_rate": 4.895779705094188e-06, "loss": 3.9686, "step": 489000 }, { "epoch": 2.14, "learning_rate": 4.8902793728741225e-06, "loss": 3.9572, "step": 489500 }, { "epoch": 2.14, "learning_rate": 4.8847790406540556e-06, "loss": 3.9387, "step": 490000 }, { "epoch": 2.15, "learning_rate": 4.8792787084339895e-06, "loss": 3.9297, "step": 490500 }, { "epoch": 2.15, "learning_rate": 4.873778376213923e-06, "loss": 3.9491, "step": 491000 }, { "epoch": 2.15, "learning_rate": 4.868278043993857e-06, "loss": 3.9532, "step": 491500 }, { "epoch": 2.15, "learning_rate": 4.862777711773792e-06, "loss": 3.9393, "step": 492000 }, { "epoch": 2.16, "learning_rate": 4.857277379553725e-06, "loss": 3.9418, "step": 492500 }, { "epoch": 2.16, "learning_rate": 4.851777047333659e-06, "loss": 3.9304, "step": 493000 }, { "epoch": 2.16, "learning_rate": 4.846276715113593e-06, "loss": 3.9539, "step": 493500 }, { "epoch": 2.16, "learning_rate": 4.840776382893527e-06, "loss": 3.9458, "step": 494000 }, { "epoch": 2.16, "learning_rate": 4.835276050673461e-06, "loss": 3.9491, "step": 494500 }, { "epoch": 2.17, "learning_rate": 4.829775718453395e-06, "loss": 3.9425, "step": 495000 }, { "epoch": 2.17, "learning_rate": 4.824275386233329e-06, "loss": 3.9529, "step": 495500 }, { "epoch": 2.17, "learning_rate": 4.818775054013263e-06, "loss": 3.9418, "step": 496000 }, { "epoch": 2.17, "learning_rate": 4.813274721793197e-06, "loss": 3.945, "step": 496500 }, { "epoch": 2.17, "learning_rate": 4.8077743895731306e-06, "loss": 3.9581, "step": 497000 }, { "epoch": 2.18, "learning_rate": 4.8022740573530645e-06, "loss": 3.9464, "step": 497500 }, { "epoch": 2.18, "learning_rate": 4.796773725132998e-06, "loss": 3.9287, "step": 498000 }, { "epoch": 2.18, "learning_rate": 4.791273392912932e-06, "loss": 3.9462, "step": 498500 }, { "epoch": 2.18, "learning_rate": 4.785773060692866e-06, "loss": 3.9331, "step": 499000 }, { "epoch": 2.19, "learning_rate": 4.7802727284728e-06, "loss": 3.9289, "step": 499500 }, { "epoch": 2.19, "learning_rate": 4.774772396252734e-06, "loss": 3.9348, "step": 500000 }, { "epoch": 2.19, "learning_rate": 4.769272064032668e-06, "loss": 3.929, "step": 500500 }, { "epoch": 2.19, "learning_rate": 4.763771731812602e-06, "loss": 3.9286, "step": 501000 }, { "epoch": 2.19, "learning_rate": 4.758271399592536e-06, "loss": 3.9327, "step": 501500 }, { "epoch": 2.2, "learning_rate": 4.75277106737247e-06, "loss": 3.9178, "step": 502000 }, { "epoch": 2.2, "learning_rate": 4.747270735152403e-06, "loss": 3.9321, "step": 502500 }, { "epoch": 2.2, "learning_rate": 4.741770402932338e-06, "loss": 3.9239, "step": 503000 }, { "epoch": 2.2, "learning_rate": 4.736270070712272e-06, "loss": 3.9168, "step": 503500 }, { "epoch": 2.21, "learning_rate": 4.7307697384922056e-06, "loss": 3.9304, "step": 504000 }, { "epoch": 2.21, "learning_rate": 4.7252694062721395e-06, "loss": 3.9262, "step": 504500 }, { "epoch": 2.21, "learning_rate": 4.7197690740520726e-06, "loss": 3.918, "step": 505000 }, { "epoch": 2.21, "learning_rate": 4.714268741832007e-06, "loss": 3.9114, "step": 505500 }, { "epoch": 2.21, "learning_rate": 4.7087684096119404e-06, "loss": 3.9143, "step": 506000 }, { "epoch": 2.22, "learning_rate": 4.703268077391875e-06, "loss": 3.9256, "step": 506500 }, { "epoch": 2.22, "learning_rate": 4.697767745171809e-06, "loss": 3.9179, "step": 507000 }, { "epoch": 2.22, "learning_rate": 4.692267412951742e-06, "loss": 3.9251, "step": 507500 }, { "epoch": 2.22, "learning_rate": 4.686767080731677e-06, "loss": 3.9256, "step": 508000 }, { "epoch": 2.23, "learning_rate": 4.68126674851161e-06, "loss": 3.908, "step": 508500 }, { "epoch": 2.23, "learning_rate": 4.675766416291545e-06, "loss": 3.9167, "step": 509000 }, { "epoch": 2.23, "learning_rate": 4.670266084071478e-06, "loss": 3.9118, "step": 509500 }, { "epoch": 2.23, "learning_rate": 4.664765751851412e-06, "loss": 3.9192, "step": 510000 }, { "epoch": 2.23, "learning_rate": 4.659265419631347e-06, "loss": 3.9013, "step": 510500 }, { "epoch": 2.24, "learning_rate": 4.65376508741128e-06, "loss": 3.9102, "step": 511000 }, { "epoch": 2.24, "learning_rate": 4.6482647551912145e-06, "loss": 3.9228, "step": 511500 }, { "epoch": 2.24, "learning_rate": 4.6427644229711476e-06, "loss": 3.9117, "step": 512000 }, { "epoch": 2.24, "learning_rate": 4.6372640907510815e-06, "loss": 3.9039, "step": 512500 }, { "epoch": 2.24, "learning_rate": 4.6317637585310154e-06, "loss": 3.9006, "step": 513000 }, { "epoch": 2.25, "learning_rate": 4.626263426310949e-06, "loss": 3.888, "step": 513500 }, { "epoch": 2.25, "learning_rate": 4.620763094090883e-06, "loss": 3.9163, "step": 514000 }, { "epoch": 2.25, "learning_rate": 4.615262761870817e-06, "loss": 3.8956, "step": 514500 }, { "epoch": 2.25, "learning_rate": 4.609762429650751e-06, "loss": 3.9047, "step": 515000 }, { "epoch": 2.26, "learning_rate": 4.604262097430685e-06, "loss": 3.8887, "step": 515500 }, { "epoch": 2.26, "learning_rate": 4.598761765210619e-06, "loss": 3.8938, "step": 516000 }, { "epoch": 2.26, "learning_rate": 4.593261432990553e-06, "loss": 3.8859, "step": 516500 }, { "epoch": 2.26, "learning_rate": 4.587761100770487e-06, "loss": 3.8827, "step": 517000 }, { "epoch": 2.26, "learning_rate": 4.582260768550421e-06, "loss": 3.9168, "step": 517500 }, { "epoch": 2.27, "learning_rate": 4.576760436330355e-06, "loss": 3.8968, "step": 518000 }, { "epoch": 2.27, "learning_rate": 4.571260104110289e-06, "loss": 3.8848, "step": 518500 }, { "epoch": 2.27, "learning_rate": 4.5657597718902226e-06, "loss": 3.8846, "step": 519000 }, { "epoch": 2.27, "learning_rate": 4.5602594396701565e-06, "loss": 3.8849, "step": 519500 }, { "epoch": 2.28, "learning_rate": 4.5547591074500904e-06, "loss": 3.8856, "step": 520000 }, { "epoch": 2.28, "learning_rate": 4.549258775230024e-06, "loss": 3.9032, "step": 520500 }, { "epoch": 2.28, "learning_rate": 4.543758443009958e-06, "loss": 3.886, "step": 521000 }, { "epoch": 2.28, "learning_rate": 4.538258110789892e-06, "loss": 3.894, "step": 521500 }, { "epoch": 2.28, "learning_rate": 4.532757778569826e-06, "loss": 3.8737, "step": 522000 }, { "epoch": 2.29, "learning_rate": 4.52725744634976e-06, "loss": 3.8734, "step": 522500 }, { "epoch": 2.29, "learning_rate": 4.521757114129694e-06, "loss": 3.8829, "step": 523000 }, { "epoch": 2.29, "learning_rate": 4.516256781909628e-06, "loss": 3.8842, "step": 523500 }, { "epoch": 2.29, "learning_rate": 4.510756449689562e-06, "loss": 3.8766, "step": 524000 }, { "epoch": 2.3, "learning_rate": 4.505256117469495e-06, "loss": 3.8956, "step": 524500 }, { "epoch": 2.3, "learning_rate": 4.49975578524943e-06, "loss": 3.8952, "step": 525000 }, { "epoch": 2.3, "learning_rate": 4.494255453029364e-06, "loss": 3.8863, "step": 525500 }, { "epoch": 2.3, "learning_rate": 4.488755120809298e-06, "loss": 3.8746, "step": 526000 }, { "epoch": 2.3, "learning_rate": 4.4832547885892315e-06, "loss": 3.8856, "step": 526500 }, { "epoch": 2.31, "learning_rate": 4.477754456369165e-06, "loss": 3.9011, "step": 527000 }, { "epoch": 2.31, "learning_rate": 4.472254124149099e-06, "loss": 3.8708, "step": 527500 }, { "epoch": 2.31, "learning_rate": 4.4667537919290324e-06, "loss": 3.8729, "step": 528000 }, { "epoch": 2.31, "learning_rate": 4.461253459708967e-06, "loss": 3.8778, "step": 528500 }, { "epoch": 2.32, "learning_rate": 4.4557531274889e-06, "loss": 3.8994, "step": 529000 }, { "epoch": 2.32, "learning_rate": 4.450252795268834e-06, "loss": 3.8571, "step": 529500 }, { "epoch": 2.32, "learning_rate": 4.444752463048769e-06, "loss": 3.8749, "step": 530000 }, { "epoch": 2.32, "learning_rate": 4.439252130828702e-06, "loss": 3.8686, "step": 530500 }, { "epoch": 2.32, "learning_rate": 4.433751798608637e-06, "loss": 3.8556, "step": 531000 }, { "epoch": 2.33, "learning_rate": 4.42825146638857e-06, "loss": 3.8639, "step": 531500 }, { "epoch": 2.33, "learning_rate": 4.422751134168504e-06, "loss": 3.8651, "step": 532000 }, { "epoch": 2.33, "learning_rate": 4.417250801948438e-06, "loss": 3.8659, "step": 532500 }, { "epoch": 2.33, "learning_rate": 4.411750469728372e-06, "loss": 3.8609, "step": 533000 }, { "epoch": 2.33, "learning_rate": 4.4062501375083065e-06, "loss": 3.8801, "step": 533500 }, { "epoch": 2.34, "learning_rate": 4.40074980528824e-06, "loss": 3.857, "step": 534000 }, { "epoch": 2.34, "learning_rate": 4.3952494730681735e-06, "loss": 3.8627, "step": 534500 }, { "epoch": 2.34, "learning_rate": 4.3897491408481074e-06, "loss": 3.8653, "step": 535000 }, { "epoch": 2.34, "learning_rate": 4.384248808628041e-06, "loss": 3.8654, "step": 535500 }, { "epoch": 2.35, "learning_rate": 4.378748476407975e-06, "loss": 3.8717, "step": 536000 }, { "epoch": 2.35, "learning_rate": 4.373248144187909e-06, "loss": 3.8482, "step": 536500 }, { "epoch": 2.35, "learning_rate": 4.367747811967843e-06, "loss": 3.8707, "step": 537000 }, { "epoch": 2.35, "learning_rate": 4.362247479747777e-06, "loss": 3.8523, "step": 537500 }, { "epoch": 2.35, "learning_rate": 4.356747147527711e-06, "loss": 3.8629, "step": 538000 }, { "epoch": 2.36, "learning_rate": 4.351246815307645e-06, "loss": 3.8712, "step": 538500 }, { "epoch": 2.36, "learning_rate": 4.345746483087579e-06, "loss": 3.8486, "step": 539000 }, { "epoch": 2.36, "learning_rate": 4.340246150867513e-06, "loss": 3.8586, "step": 539500 }, { "epoch": 2.36, "learning_rate": 4.334745818647447e-06, "loss": 3.8724, "step": 540000 }, { "epoch": 2.37, "learning_rate": 4.329245486427381e-06, "loss": 3.8568, "step": 540500 }, { "epoch": 2.37, "learning_rate": 4.323745154207315e-06, "loss": 3.8522, "step": 541000 }, { "epoch": 2.37, "learning_rate": 4.3182448219872485e-06, "loss": 3.8657, "step": 541500 }, { "epoch": 2.37, "learning_rate": 4.3127444897671825e-06, "loss": 3.8592, "step": 542000 }, { "epoch": 2.37, "learning_rate": 4.307244157547116e-06, "loss": 3.8423, "step": 542500 }, { "epoch": 2.38, "learning_rate": 4.30174382532705e-06, "loss": 3.8413, "step": 543000 }, { "epoch": 2.38, "learning_rate": 4.296243493106984e-06, "loss": 3.8402, "step": 543500 }, { "epoch": 2.38, "learning_rate": 4.290743160886917e-06, "loss": 3.8523, "step": 544000 }, { "epoch": 2.38, "learning_rate": 4.285242828666852e-06, "loss": 3.8484, "step": 544500 }, { "epoch": 2.39, "learning_rate": 4.279742496446786e-06, "loss": 3.8438, "step": 545000 }, { "epoch": 2.39, "learning_rate": 4.27424216422672e-06, "loss": 3.8523, "step": 545500 }, { "epoch": 2.39, "learning_rate": 4.268741832006654e-06, "loss": 3.851, "step": 546000 }, { "epoch": 2.39, "learning_rate": 4.263241499786587e-06, "loss": 3.8666, "step": 546500 }, { "epoch": 2.39, "learning_rate": 4.257741167566522e-06, "loss": 3.8476, "step": 547000 }, { "epoch": 2.4, "learning_rate": 4.252240835346455e-06, "loss": 3.8318, "step": 547500 }, { "epoch": 2.4, "learning_rate": 4.24674050312639e-06, "loss": 3.8342, "step": 548000 }, { "epoch": 2.4, "learning_rate": 4.2412401709063235e-06, "loss": 3.8342, "step": 548500 }, { "epoch": 2.4, "learning_rate": 4.235739838686257e-06, "loss": 3.8513, "step": 549000 }, { "epoch": 2.4, "learning_rate": 4.230239506466191e-06, "loss": 3.846, "step": 549500 }, { "epoch": 2.41, "learning_rate": 4.2247391742461245e-06, "loss": 3.8363, "step": 550000 }, { "epoch": 2.41, "learning_rate": 4.219238842026059e-06, "loss": 3.8365, "step": 550500 }, { "epoch": 2.41, "learning_rate": 4.213738509805992e-06, "loss": 3.8335, "step": 551000 }, { "epoch": 2.41, "learning_rate": 4.208238177585926e-06, "loss": 3.8421, "step": 551500 }, { "epoch": 2.42, "learning_rate": 4.202737845365861e-06, "loss": 3.859, "step": 552000 }, { "epoch": 2.42, "learning_rate": 4.197237513145794e-06, "loss": 3.8283, "step": 552500 }, { "epoch": 2.42, "learning_rate": 4.191737180925728e-06, "loss": 3.8288, "step": 553000 }, { "epoch": 2.42, "learning_rate": 4.186236848705662e-06, "loss": 3.8239, "step": 553500 }, { "epoch": 2.42, "learning_rate": 4.180736516485596e-06, "loss": 3.834, "step": 554000 }, { "epoch": 2.43, "learning_rate": 4.17523618426553e-06, "loss": 3.8265, "step": 554500 }, { "epoch": 2.43, "learning_rate": 4.169735852045464e-06, "loss": 3.8312, "step": 555000 }, { "epoch": 2.43, "learning_rate": 4.164235519825398e-06, "loss": 3.8317, "step": 555500 }, { "epoch": 2.43, "learning_rate": 4.158735187605332e-06, "loss": 3.8383, "step": 556000 }, { "epoch": 2.44, "learning_rate": 4.1532348553852655e-06, "loss": 3.8163, "step": 556500 }, { "epoch": 2.44, "learning_rate": 4.1477345231651995e-06, "loss": 3.8049, "step": 557000 }, { "epoch": 2.44, "learning_rate": 4.142234190945133e-06, "loss": 3.8479, "step": 557500 }, { "epoch": 2.44, "learning_rate": 4.136733858725067e-06, "loss": 3.8328, "step": 558000 }, { "epoch": 2.44, "learning_rate": 4.131233526505001e-06, "loss": 3.8151, "step": 558500 }, { "epoch": 2.45, "learning_rate": 4.125733194284935e-06, "loss": 3.8271, "step": 559000 }, { "epoch": 2.45, "learning_rate": 4.120232862064869e-06, "loss": 3.8143, "step": 559500 }, { "epoch": 2.45, "learning_rate": 4.114732529844803e-06, "loss": 3.8264, "step": 560000 }, { "epoch": 2.45, "learning_rate": 4.109232197624737e-06, "loss": 3.8173, "step": 560500 }, { "epoch": 2.46, "learning_rate": 4.103731865404671e-06, "loss": 3.814, "step": 561000 }, { "epoch": 2.46, "learning_rate": 4.098231533184605e-06, "loss": 3.8111, "step": 561500 }, { "epoch": 2.46, "learning_rate": 4.092731200964539e-06, "loss": 3.8192, "step": 562000 }, { "epoch": 2.46, "learning_rate": 4.087230868744473e-06, "loss": 3.8212, "step": 562500 }, { "epoch": 2.46, "learning_rate": 4.081730536524407e-06, "loss": 3.8073, "step": 563000 }, { "epoch": 2.47, "learning_rate": 4.0762302043043405e-06, "loss": 3.8258, "step": 563500 }, { "epoch": 2.47, "learning_rate": 4.0707298720842745e-06, "loss": 3.8391, "step": 564000 }, { "epoch": 2.47, "learning_rate": 4.065229539864208e-06, "loss": 3.8065, "step": 564500 }, { "epoch": 2.47, "learning_rate": 4.059729207644142e-06, "loss": 3.8148, "step": 565000 }, { "epoch": 2.47, "learning_rate": 4.054228875424076e-06, "loss": 3.8089, "step": 565500 }, { "epoch": 2.48, "learning_rate": 4.048728543204009e-06, "loss": 3.8037, "step": 566000 }, { "epoch": 2.48, "learning_rate": 4.043228210983944e-06, "loss": 3.8184, "step": 566500 }, { "epoch": 2.48, "learning_rate": 4.037727878763877e-06, "loss": 3.8412, "step": 567000 }, { "epoch": 2.48, "learning_rate": 4.032227546543811e-06, "loss": 3.8171, "step": 567500 }, { "epoch": 2.49, "learning_rate": 4.026727214323746e-06, "loss": 3.8175, "step": 568000 }, { "epoch": 2.49, "learning_rate": 4.021226882103679e-06, "loss": 3.8184, "step": 568500 }, { "epoch": 2.49, "learning_rate": 4.015726549883614e-06, "loss": 3.8238, "step": 569000 }, { "epoch": 2.49, "learning_rate": 4.010226217663547e-06, "loss": 3.807, "step": 569500 }, { "epoch": 2.49, "learning_rate": 4.004725885443481e-06, "loss": 3.7998, "step": 570000 }, { "epoch": 2.5, "learning_rate": 3.999225553223415e-06, "loss": 3.815, "step": 570500 }, { "epoch": 2.5, "learning_rate": 3.993725221003349e-06, "loss": 3.8179, "step": 571000 }, { "epoch": 2.5, "learning_rate": 3.988224888783283e-06, "loss": 3.803, "step": 571500 }, { "epoch": 2.5, "learning_rate": 3.9827245565632165e-06, "loss": 3.8154, "step": 572000 }, { "epoch": 2.51, "learning_rate": 3.97722422434315e-06, "loss": 3.7961, "step": 572500 }, { "epoch": 2.51, "learning_rate": 3.971723892123084e-06, "loss": 3.8028, "step": 573000 }, { "epoch": 2.51, "learning_rate": 3.966223559903018e-06, "loss": 3.8072, "step": 573500 }, { "epoch": 2.51, "learning_rate": 3.960723227682952e-06, "loss": 3.7901, "step": 574000 }, { "epoch": 2.51, "learning_rate": 3.955222895462886e-06, "loss": 3.7932, "step": 574500 }, { "epoch": 2.52, "learning_rate": 3.94972256324282e-06, "loss": 3.789, "step": 575000 }, { "epoch": 2.52, "learning_rate": 3.944222231022754e-06, "loss": 3.8177, "step": 575500 }, { "epoch": 2.52, "learning_rate": 3.938721898802688e-06, "loss": 3.7971, "step": 576000 }, { "epoch": 2.52, "learning_rate": 3.933221566582622e-06, "loss": 3.802, "step": 576500 }, { "epoch": 2.53, "learning_rate": 3.927721234362556e-06, "loss": 3.8384, "step": 577000 }, { "epoch": 2.53, "learning_rate": 3.92222090214249e-06, "loss": 3.7789, "step": 577500 }, { "epoch": 2.53, "learning_rate": 3.916720569922424e-06, "loss": 3.8068, "step": 578000 }, { "epoch": 2.53, "learning_rate": 3.9112202377023576e-06, "loss": 3.7959, "step": 578500 }, { "epoch": 2.53, "learning_rate": 3.9057199054822915e-06, "loss": 3.7924, "step": 579000 }, { "epoch": 2.54, "learning_rate": 3.900219573262225e-06, "loss": 3.7729, "step": 579500 }, { "epoch": 2.54, "learning_rate": 3.894719241042159e-06, "loss": 3.7925, "step": 580000 }, { "epoch": 2.54, "learning_rate": 3.889218908822093e-06, "loss": 3.7963, "step": 580500 }, { "epoch": 2.54, "learning_rate": 3.883718576602027e-06, "loss": 3.7818, "step": 581000 }, { "epoch": 2.54, "learning_rate": 3.878218244381961e-06, "loss": 3.7931, "step": 581500 }, { "epoch": 2.55, "learning_rate": 3.872717912161894e-06, "loss": 3.7912, "step": 582000 }, { "epoch": 2.55, "learning_rate": 3.867217579941829e-06, "loss": 3.7882, "step": 582500 }, { "epoch": 2.55, "learning_rate": 3.861717247721763e-06, "loss": 3.797, "step": 583000 }, { "epoch": 2.55, "learning_rate": 3.856216915501697e-06, "loss": 3.7891, "step": 583500 }, { "epoch": 2.56, "learning_rate": 3.850716583281631e-06, "loss": 3.7888, "step": 584000 }, { "epoch": 2.56, "learning_rate": 3.845216251061564e-06, "loss": 3.7929, "step": 584500 }, { "epoch": 2.56, "learning_rate": 3.839715918841499e-06, "loss": 3.7851, "step": 585000 }, { "epoch": 2.56, "learning_rate": 3.834215586621432e-06, "loss": 3.803, "step": 585500 }, { "epoch": 2.56, "learning_rate": 3.8287152544013665e-06, "loss": 3.7712, "step": 586000 }, { "epoch": 2.57, "learning_rate": 3.8232149221813e-06, "loss": 3.7852, "step": 586500 }, { "epoch": 2.57, "learning_rate": 3.8177145899612335e-06, "loss": 3.7847, "step": 587000 }, { "epoch": 2.57, "learning_rate": 3.812214257741168e-06, "loss": 3.7644, "step": 587500 }, { "epoch": 2.57, "learning_rate": 3.8067139255211018e-06, "loss": 3.7816, "step": 588000 }, { "epoch": 2.58, "learning_rate": 3.8012135933010357e-06, "loss": 3.8016, "step": 588500 }, { "epoch": 2.58, "learning_rate": 3.7957132610809692e-06, "loss": 3.779, "step": 589000 }, { "epoch": 2.58, "learning_rate": 3.7902129288609036e-06, "loss": 3.7893, "step": 589500 }, { "epoch": 2.58, "learning_rate": 3.7847125966408375e-06, "loss": 3.7595, "step": 590000 }, { "epoch": 2.58, "learning_rate": 3.7792122644207714e-06, "loss": 3.7787, "step": 590500 }, { "epoch": 2.59, "learning_rate": 3.7737119322007054e-06, "loss": 3.7649, "step": 591000 }, { "epoch": 2.59, "learning_rate": 3.768211599980639e-06, "loss": 3.7634, "step": 591500 }, { "epoch": 2.59, "learning_rate": 3.7627112677605732e-06, "loss": 3.7862, "step": 592000 }, { "epoch": 2.59, "learning_rate": 3.7572109355405067e-06, "loss": 3.768, "step": 592500 }, { "epoch": 2.6, "learning_rate": 3.751710603320441e-06, "loss": 3.77, "step": 593000 }, { "epoch": 2.6, "learning_rate": 3.7462102711003746e-06, "loss": 3.7923, "step": 593500 }, { "epoch": 2.6, "learning_rate": 3.7407099388803085e-06, "loss": 3.7903, "step": 594000 }, { "epoch": 2.6, "learning_rate": 3.735209606660243e-06, "loss": 3.7742, "step": 594500 }, { "epoch": 2.6, "learning_rate": 3.7297092744401764e-06, "loss": 3.7723, "step": 595000 }, { "epoch": 2.61, "learning_rate": 3.7242089422201107e-06, "loss": 3.7747, "step": 595500 }, { "epoch": 2.61, "learning_rate": 3.7187086100000442e-06, "loss": 3.7572, "step": 596000 }, { "epoch": 2.61, "learning_rate": 3.713208277779978e-06, "loss": 3.7676, "step": 596500 }, { "epoch": 2.61, "learning_rate": 3.707707945559912e-06, "loss": 3.7739, "step": 597000 }, { "epoch": 2.61, "learning_rate": 3.702207613339846e-06, "loss": 3.7719, "step": 597500 }, { "epoch": 2.62, "learning_rate": 3.6967072811197804e-06, "loss": 3.7698, "step": 598000 }, { "epoch": 2.62, "learning_rate": 3.691206948899714e-06, "loss": 3.7597, "step": 598500 }, { "epoch": 2.62, "learning_rate": 3.685706616679648e-06, "loss": 3.7637, "step": 599000 }, { "epoch": 2.62, "learning_rate": 3.6802062844595813e-06, "loss": 3.7903, "step": 599500 }, { "epoch": 2.63, "learning_rate": 3.6747059522395157e-06, "loss": 3.7736, "step": 600000 }, { "epoch": 2.63, "learning_rate": 3.669205620019449e-06, "loss": 3.7649, "step": 600500 }, { "epoch": 2.63, "learning_rate": 3.6637052877993835e-06, "loss": 3.7754, "step": 601000 }, { "epoch": 2.63, "learning_rate": 3.6582049555793174e-06, "loss": 3.7583, "step": 601500 }, { "epoch": 2.63, "learning_rate": 3.652704623359251e-06, "loss": 3.7565, "step": 602000 }, { "epoch": 2.64, "learning_rate": 3.6472042911391853e-06, "loss": 3.7819, "step": 602500 }, { "epoch": 2.64, "learning_rate": 3.641703958919119e-06, "loss": 3.7609, "step": 603000 }, { "epoch": 2.64, "learning_rate": 3.636203626699053e-06, "loss": 3.783, "step": 603500 }, { "epoch": 2.64, "learning_rate": 3.6307032944789867e-06, "loss": 3.7643, "step": 604000 }, { "epoch": 2.65, "learning_rate": 3.6252029622589206e-06, "loss": 3.7631, "step": 604500 }, { "epoch": 2.65, "learning_rate": 3.619702630038855e-06, "loss": 3.7547, "step": 605000 }, { "epoch": 2.65, "learning_rate": 3.6142022978187884e-06, "loss": 3.7683, "step": 605500 }, { "epoch": 2.65, "learning_rate": 3.608701965598723e-06, "loss": 3.7608, "step": 606000 }, { "epoch": 2.65, "learning_rate": 3.6032016333786563e-06, "loss": 3.7394, "step": 606500 }, { "epoch": 2.66, "learning_rate": 3.5977013011585902e-06, "loss": 3.759, "step": 607000 }, { "epoch": 2.66, "learning_rate": 3.592200968938524e-06, "loss": 3.7483, "step": 607500 }, { "epoch": 2.66, "learning_rate": 3.586700636718458e-06, "loss": 3.7504, "step": 608000 }, { "epoch": 2.66, "learning_rate": 3.5812003044983916e-06, "loss": 3.7799, "step": 608500 }, { "epoch": 2.67, "learning_rate": 3.575699972278326e-06, "loss": 3.7455, "step": 609000 }, { "epoch": 2.67, "learning_rate": 3.57019964005826e-06, "loss": 3.7645, "step": 609500 }, { "epoch": 2.67, "learning_rate": 3.564699307838194e-06, "loss": 3.7539, "step": 610000 }, { "epoch": 2.67, "learning_rate": 3.5591989756181277e-06, "loss": 3.7454, "step": 610500 }, { "epoch": 2.67, "learning_rate": 3.5536986433980612e-06, "loss": 3.7622, "step": 611000 }, { "epoch": 2.68, "learning_rate": 3.5481983111779956e-06, "loss": 3.7541, "step": 611500 }, { "epoch": 2.68, "learning_rate": 3.542697978957929e-06, "loss": 3.7643, "step": 612000 }, { "epoch": 2.68, "learning_rate": 3.5371976467378635e-06, "loss": 3.7736, "step": 612500 }, { "epoch": 2.68, "learning_rate": 3.5316973145177974e-06, "loss": 3.7541, "step": 613000 }, { "epoch": 2.68, "learning_rate": 3.526196982297731e-06, "loss": 3.7547, "step": 613500 }, { "epoch": 2.69, "learning_rate": 3.5206966500776652e-06, "loss": 3.7471, "step": 614000 }, { "epoch": 2.69, "learning_rate": 3.5151963178575987e-06, "loss": 3.7696, "step": 614500 }, { "epoch": 2.69, "learning_rate": 3.509695985637533e-06, "loss": 3.7523, "step": 615000 }, { "epoch": 2.69, "learning_rate": 3.5041956534174666e-06, "loss": 3.7594, "step": 615500 }, { "epoch": 2.7, "learning_rate": 3.4986953211974005e-06, "loss": 3.7387, "step": 616000 }, { "epoch": 2.7, "learning_rate": 3.493194988977335e-06, "loss": 3.7519, "step": 616500 }, { "epoch": 2.7, "learning_rate": 3.4876946567572684e-06, "loss": 3.7412, "step": 617000 }, { "epoch": 2.7, "learning_rate": 3.4821943245372023e-06, "loss": 3.7378, "step": 617500 }, { "epoch": 2.7, "learning_rate": 3.4766939923171362e-06, "loss": 3.7344, "step": 618000 }, { "epoch": 2.71, "learning_rate": 3.47119366009707e-06, "loss": 3.7464, "step": 618500 }, { "epoch": 2.71, "learning_rate": 3.4656933278770037e-06, "loss": 3.7491, "step": 619000 }, { "epoch": 2.71, "learning_rate": 3.460192995656938e-06, "loss": 3.7542, "step": 619500 }, { "epoch": 2.71, "learning_rate": 3.4546926634368715e-06, "loss": 3.743, "step": 620000 }, { "epoch": 2.72, "learning_rate": 3.449192331216806e-06, "loss": 3.7351, "step": 620500 }, { "epoch": 2.72, "learning_rate": 3.44369199899674e-06, "loss": 3.7342, "step": 621000 }, { "epoch": 2.72, "learning_rate": 3.4381916667766733e-06, "loss": 3.7416, "step": 621500 }, { "epoch": 2.72, "learning_rate": 3.4326913345566077e-06, "loss": 3.7543, "step": 622000 }, { "epoch": 2.72, "learning_rate": 3.427191002336541e-06, "loss": 3.7338, "step": 622500 }, { "epoch": 2.73, "learning_rate": 3.4216906701164755e-06, "loss": 3.7325, "step": 623000 }, { "epoch": 2.73, "learning_rate": 3.416190337896409e-06, "loss": 3.7203, "step": 623500 }, { "epoch": 2.73, "learning_rate": 3.410690005676343e-06, "loss": 3.7379, "step": 624000 }, { "epoch": 2.73, "learning_rate": 3.4051896734562773e-06, "loss": 3.7296, "step": 624500 }, { "epoch": 2.74, "learning_rate": 3.399689341236211e-06, "loss": 3.7402, "step": 625000 }, { "epoch": 2.74, "learning_rate": 3.394189009016145e-06, "loss": 3.7209, "step": 625500 }, { "epoch": 2.74, "learning_rate": 3.3886886767960787e-06, "loss": 3.7284, "step": 626000 }, { "epoch": 2.74, "learning_rate": 3.3831883445760126e-06, "loss": 3.7234, "step": 626500 }, { "epoch": 2.74, "learning_rate": 3.3776880123559465e-06, "loss": 3.7357, "step": 627000 }, { "epoch": 2.75, "learning_rate": 3.3721876801358805e-06, "loss": 3.7248, "step": 627500 }, { "epoch": 2.75, "learning_rate": 3.366687347915815e-06, "loss": 3.7242, "step": 628000 }, { "epoch": 2.75, "learning_rate": 3.3611870156957483e-06, "loss": 3.7506, "step": 628500 }, { "epoch": 2.75, "learning_rate": 3.3556866834756823e-06, "loss": 3.7226, "step": 629000 }, { "epoch": 2.75, "learning_rate": 3.350186351255616e-06, "loss": 3.7389, "step": 629500 }, { "epoch": 2.76, "learning_rate": 3.34468601903555e-06, "loss": 3.737, "step": 630000 }, { "epoch": 2.76, "learning_rate": 3.3391856868154836e-06, "loss": 3.7277, "step": 630500 }, { "epoch": 2.76, "learning_rate": 3.333685354595418e-06, "loss": 3.7262, "step": 631000 }, { "epoch": 2.76, "learning_rate": 3.328185022375352e-06, "loss": 3.7151, "step": 631500 }, { "epoch": 2.77, "learning_rate": 3.3226846901552854e-06, "loss": 3.7277, "step": 632000 }, { "epoch": 2.77, "learning_rate": 3.3171843579352198e-06, "loss": 3.753, "step": 632500 }, { "epoch": 2.77, "learning_rate": 3.3116840257151533e-06, "loss": 3.7222, "step": 633000 }, { "epoch": 2.77, "learning_rate": 3.3061836934950876e-06, "loss": 3.7188, "step": 633500 }, { "epoch": 2.77, "learning_rate": 3.300683361275021e-06, "loss": 3.722, "step": 634000 }, { "epoch": 2.78, "learning_rate": 3.295183029054955e-06, "loss": 3.7498, "step": 634500 }, { "epoch": 2.78, "learning_rate": 3.289682696834889e-06, "loss": 3.7312, "step": 635000 }, { "epoch": 2.78, "learning_rate": 3.284182364614823e-06, "loss": 3.7182, "step": 635500 }, { "epoch": 2.78, "learning_rate": 3.2786820323947573e-06, "loss": 3.7401, "step": 636000 }, { "epoch": 2.79, "learning_rate": 3.2731817001746908e-06, "loss": 3.7245, "step": 636500 }, { "epoch": 2.79, "learning_rate": 3.2676813679546247e-06, "loss": 3.7145, "step": 637000 }, { "epoch": 2.79, "learning_rate": 3.2621810357345586e-06, "loss": 3.729, "step": 637500 }, { "epoch": 2.79, "learning_rate": 3.2566807035144925e-06, "loss": 3.7488, "step": 638000 }, { "epoch": 2.79, "learning_rate": 3.251180371294426e-06, "loss": 3.7329, "step": 638500 }, { "epoch": 2.8, "learning_rate": 3.2456800390743604e-06, "loss": 3.7121, "step": 639000 }, { "epoch": 2.8, "learning_rate": 3.2401797068542943e-06, "loss": 3.719, "step": 639500 }, { "epoch": 2.8, "learning_rate": 3.2346793746342283e-06, "loss": 3.7277, "step": 640000 }, { "epoch": 2.8, "learning_rate": 3.229179042414162e-06, "loss": 3.7167, "step": 640500 }, { "epoch": 2.81, "learning_rate": 3.2236787101940957e-06, "loss": 3.7119, "step": 641000 }, { "epoch": 2.81, "learning_rate": 3.21817837797403e-06, "loss": 3.7478, "step": 641500 }, { "epoch": 2.81, "learning_rate": 3.2126780457539636e-06, "loss": 3.7177, "step": 642000 }, { "epoch": 2.81, "learning_rate": 3.207177713533898e-06, "loss": 3.7032, "step": 642500 }, { "epoch": 2.81, "learning_rate": 3.201677381313832e-06, "loss": 3.7161, "step": 643000 }, { "epoch": 2.82, "learning_rate": 3.1961770490937653e-06, "loss": 3.7241, "step": 643500 }, { "epoch": 2.82, "learning_rate": 3.1906767168736997e-06, "loss": 3.7244, "step": 644000 }, { "epoch": 2.82, "learning_rate": 3.185176384653633e-06, "loss": 3.7248, "step": 644500 }, { "epoch": 2.82, "learning_rate": 3.1796760524335676e-06, "loss": 3.7437, "step": 645000 }, { "epoch": 2.82, "learning_rate": 3.174175720213501e-06, "loss": 3.7532, "step": 645500 }, { "epoch": 2.83, "learning_rate": 3.168675387993435e-06, "loss": 3.7164, "step": 646000 }, { "epoch": 2.83, "learning_rate": 3.1631750557733685e-06, "loss": 3.7146, "step": 646500 }, { "epoch": 2.83, "learning_rate": 3.157674723553303e-06, "loss": 3.7108, "step": 647000 }, { "epoch": 2.83, "learning_rate": 3.152174391333237e-06, "loss": 3.6925, "step": 647500 }, { "epoch": 2.84, "learning_rate": 3.1466740591131707e-06, "loss": 3.7028, "step": 648000 }, { "epoch": 2.84, "learning_rate": 3.1411737268931046e-06, "loss": 3.721, "step": 648500 }, { "epoch": 2.84, "learning_rate": 3.135673394673038e-06, "loss": 3.7131, "step": 649000 }, { "epoch": 2.84, "learning_rate": 3.1301730624529725e-06, "loss": 3.7004, "step": 649500 }, { "epoch": 2.84, "learning_rate": 3.124672730232906e-06, "loss": 3.7099, "step": 650000 }, { "epoch": 2.85, "learning_rate": 3.1191723980128403e-06, "loss": 3.7139, "step": 650500 }, { "epoch": 2.85, "learning_rate": 3.1136720657927743e-06, "loss": 3.7192, "step": 651000 }, { "epoch": 2.85, "learning_rate": 3.1081717335727078e-06, "loss": 3.7072, "step": 651500 }, { "epoch": 2.85, "learning_rate": 3.102671401352642e-06, "loss": 3.7057, "step": 652000 }, { "epoch": 2.86, "learning_rate": 3.0971710691325756e-06, "loss": 3.7074, "step": 652500 }, { "epoch": 2.86, "learning_rate": 3.09167073691251e-06, "loss": 3.6901, "step": 653000 }, { "epoch": 2.86, "learning_rate": 3.0861704046924435e-06, "loss": 3.7143, "step": 653500 }, { "epoch": 2.86, "learning_rate": 3.0806700724723774e-06, "loss": 3.7008, "step": 654000 }, { "epoch": 2.86, "learning_rate": 3.0751697402523118e-06, "loss": 3.7157, "step": 654500 }, { "epoch": 2.87, "learning_rate": 3.0696694080322453e-06, "loss": 3.7042, "step": 655000 }, { "epoch": 2.87, "learning_rate": 3.0641690758121796e-06, "loss": 3.7275, "step": 655500 }, { "epoch": 2.87, "learning_rate": 3.058668743592113e-06, "loss": 3.7057, "step": 656000 }, { "epoch": 2.87, "learning_rate": 3.053168411372047e-06, "loss": 3.6993, "step": 656500 }, { "epoch": 2.88, "learning_rate": 3.047668079151981e-06, "loss": 3.6943, "step": 657000 }, { "epoch": 2.88, "learning_rate": 3.042167746931915e-06, "loss": 3.7124, "step": 657500 }, { "epoch": 2.88, "learning_rate": 3.0366674147118493e-06, "loss": 3.7149, "step": 658000 }, { "epoch": 2.88, "learning_rate": 3.0311670824917828e-06, "loss": 3.7134, "step": 658500 }, { "epoch": 2.88, "learning_rate": 3.0256667502717167e-06, "loss": 3.7186, "step": 659000 }, { "epoch": 2.89, "learning_rate": 3.0201664180516506e-06, "loss": 3.698, "step": 659500 }, { "epoch": 2.89, "learning_rate": 3.0146660858315846e-06, "loss": 3.7098, "step": 660000 }, { "epoch": 2.89, "learning_rate": 3.009165753611518e-06, "loss": 3.6881, "step": 660500 }, { "epoch": 2.89, "learning_rate": 3.0036654213914524e-06, "loss": 3.697, "step": 661000 }, { "epoch": 2.89, "learning_rate": 2.998165089171386e-06, "loss": 3.6947, "step": 661500 }, { "epoch": 2.9, "learning_rate": 2.9926647569513203e-06, "loss": 3.7003, "step": 662000 }, { "epoch": 2.9, "learning_rate": 2.9871644247312542e-06, "loss": 3.6983, "step": 662500 }, { "epoch": 2.9, "learning_rate": 2.9816640925111877e-06, "loss": 3.695, "step": 663000 }, { "epoch": 2.9, "learning_rate": 2.976163760291122e-06, "loss": 3.6907, "step": 663500 }, { "epoch": 2.91, "learning_rate": 2.9706634280710556e-06, "loss": 3.7034, "step": 664000 }, { "epoch": 2.91, "learning_rate": 2.9651630958509895e-06, "loss": 3.6857, "step": 664500 }, { "epoch": 2.91, "learning_rate": 2.9596627636309234e-06, "loss": 3.7105, "step": 665000 }, { "epoch": 2.91, "learning_rate": 2.9541624314108574e-06, "loss": 3.7165, "step": 665500 }, { "epoch": 2.91, "learning_rate": 2.9486620991907917e-06, "loss": 3.6927, "step": 666000 }, { "epoch": 2.92, "learning_rate": 2.9431617669707252e-06, "loss": 3.7015, "step": 666500 }, { "epoch": 2.92, "learning_rate": 2.937661434750659e-06, "loss": 3.6991, "step": 667000 }, { "epoch": 2.92, "learning_rate": 2.932161102530593e-06, "loss": 3.6869, "step": 667500 }, { "epoch": 2.92, "learning_rate": 2.926660770310527e-06, "loss": 3.684, "step": 668000 }, { "epoch": 2.93, "learning_rate": 2.9211604380904605e-06, "loss": 3.7013, "step": 668500 }, { "epoch": 2.93, "learning_rate": 2.915660105870395e-06, "loss": 3.6768, "step": 669000 }, { "epoch": 2.93, "learning_rate": 2.910159773650329e-06, "loss": 3.7044, "step": 669500 }, { "epoch": 2.93, "learning_rate": 2.9046594414302627e-06, "loss": 3.7081, "step": 670000 }, { "epoch": 2.93, "learning_rate": 2.8991591092101967e-06, "loss": 3.702, "step": 670500 }, { "epoch": 2.94, "learning_rate": 2.89365877699013e-06, "loss": 3.6981, "step": 671000 }, { "epoch": 2.94, "learning_rate": 2.8881584447700645e-06, "loss": 3.6725, "step": 671500 }, { "epoch": 2.94, "learning_rate": 2.882658112549998e-06, "loss": 3.6838, "step": 672000 }, { "epoch": 2.94, "learning_rate": 2.8771577803299324e-06, "loss": 3.7048, "step": 672500 }, { "epoch": 2.95, "learning_rate": 2.871657448109866e-06, "loss": 3.6937, "step": 673000 }, { "epoch": 2.95, "learning_rate": 2.8661571158898e-06, "loss": 3.6923, "step": 673500 }, { "epoch": 2.95, "learning_rate": 2.860656783669734e-06, "loss": 3.6854, "step": 674000 }, { "epoch": 2.95, "learning_rate": 2.8551564514496677e-06, "loss": 3.6848, "step": 674500 }, { "epoch": 2.95, "learning_rate": 2.849656119229602e-06, "loss": 3.6736, "step": 675000 }, { "epoch": 2.96, "learning_rate": 2.8441557870095355e-06, "loss": 3.6957, "step": 675500 }, { "epoch": 2.96, "learning_rate": 2.8386554547894694e-06, "loss": 3.6843, "step": 676000 }, { "epoch": 2.96, "learning_rate": 2.8331551225694034e-06, "loss": 3.6824, "step": 676500 }, { "epoch": 2.96, "learning_rate": 2.8276547903493373e-06, "loss": 3.6942, "step": 677000 }, { "epoch": 2.96, "learning_rate": 2.8221544581292717e-06, "loss": 3.6742, "step": 677500 }, { "epoch": 2.97, "learning_rate": 2.816654125909205e-06, "loss": 3.6756, "step": 678000 }, { "epoch": 2.97, "learning_rate": 2.811153793689139e-06, "loss": 3.6754, "step": 678500 }, { "epoch": 2.97, "learning_rate": 2.8056534614690726e-06, "loss": 3.6676, "step": 679000 }, { "epoch": 2.97, "learning_rate": 2.800153129249007e-06, "loss": 3.6645, "step": 679500 }, { "epoch": 2.98, "learning_rate": 2.7946527970289405e-06, "loss": 3.6895, "step": 680000 }, { "epoch": 2.98, "learning_rate": 2.789152464808875e-06, "loss": 3.6723, "step": 680500 }, { "epoch": 2.98, "learning_rate": 2.7836521325888087e-06, "loss": 3.6766, "step": 681000 }, { "epoch": 2.98, "learning_rate": 2.7781518003687422e-06, "loss": 3.6747, "step": 681500 }, { "epoch": 2.98, "learning_rate": 2.7726514681486766e-06, "loss": 3.6776, "step": 682000 }, { "epoch": 2.99, "learning_rate": 2.76715113592861e-06, "loss": 3.6917, "step": 682500 }, { "epoch": 2.99, "learning_rate": 2.7616508037085444e-06, "loss": 3.6874, "step": 683000 }, { "epoch": 2.99, "learning_rate": 2.756150471488478e-06, "loss": 3.6866, "step": 683500 }, { "epoch": 2.99, "learning_rate": 2.750650139268412e-06, "loss": 3.6723, "step": 684000 }, { "epoch": 3.0, "learning_rate": 2.7451498070483462e-06, "loss": 3.6642, "step": 684500 }, { "epoch": 3.0, "learning_rate": 2.7396494748282797e-06, "loss": 3.6796, "step": 685000 }, { "epoch": 3.0, "learning_rate": 2.734149142608214e-06, "loss": 3.6819, "step": 685500 }, { "epoch": 3.0, "learning_rate": 2.7286488103881476e-06, "loss": 3.6769, "step": 686000 }, { "epoch": 3.0, "learning_rate": 2.7231484781680815e-06, "loss": 3.6762, "step": 686500 }, { "epoch": 3.01, "learning_rate": 2.7176481459480155e-06, "loss": 3.6525, "step": 687000 }, { "epoch": 3.01, "learning_rate": 2.7121478137279494e-06, "loss": 3.6849, "step": 687500 }, { "epoch": 3.01, "learning_rate": 2.706647481507883e-06, "loss": 3.659, "step": 688000 }, { "epoch": 3.01, "learning_rate": 2.7011471492878172e-06, "loss": 3.6637, "step": 688500 }, { "epoch": 3.02, "learning_rate": 2.695646817067751e-06, "loss": 3.6884, "step": 689000 }, { "epoch": 3.02, "learning_rate": 2.690146484847685e-06, "loss": 3.6698, "step": 689500 }, { "epoch": 3.02, "learning_rate": 2.684646152627619e-06, "loss": 3.665, "step": 690000 }, { "epoch": 3.02, "learning_rate": 2.6791458204075525e-06, "loss": 3.6518, "step": 690500 }, { "epoch": 3.02, "learning_rate": 2.673645488187487e-06, "loss": 3.6717, "step": 691000 }, { "epoch": 3.03, "learning_rate": 2.6681451559674204e-06, "loss": 3.6727, "step": 691500 }, { "epoch": 3.03, "learning_rate": 2.6626448237473547e-06, "loss": 3.6653, "step": 692000 }, { "epoch": 3.03, "learning_rate": 2.6571444915272887e-06, "loss": 3.6758, "step": 692500 }, { "epoch": 3.03, "learning_rate": 2.651644159307222e-06, "loss": 3.6462, "step": 693000 }, { "epoch": 3.03, "learning_rate": 2.6461438270871565e-06, "loss": 3.666, "step": 693500 }, { "epoch": 3.04, "learning_rate": 2.64064349486709e-06, "loss": 3.6677, "step": 694000 }, { "epoch": 3.04, "learning_rate": 2.6351431626470244e-06, "loss": 3.6593, "step": 694500 }, { "epoch": 3.04, "learning_rate": 2.629642830426958e-06, "loss": 3.6507, "step": 695000 }, { "epoch": 3.04, "learning_rate": 2.624142498206892e-06, "loss": 3.682, "step": 695500 }, { "epoch": 3.05, "learning_rate": 2.618642165986826e-06, "loss": 3.6746, "step": 696000 }, { "epoch": 3.05, "learning_rate": 2.6131418337667597e-06, "loss": 3.6697, "step": 696500 }, { "epoch": 3.05, "learning_rate": 2.607641501546694e-06, "loss": 3.6692, "step": 697000 }, { "epoch": 3.05, "learning_rate": 2.6021411693266275e-06, "loss": 3.6647, "step": 697500 }, { "epoch": 3.05, "learning_rate": 2.5966408371065615e-06, "loss": 3.6514, "step": 698000 }, { "epoch": 3.06, "learning_rate": 2.591140504886495e-06, "loss": 3.6857, "step": 698500 }, { "epoch": 3.06, "learning_rate": 2.5856401726664293e-06, "loss": 3.642, "step": 699000 }, { "epoch": 3.06, "learning_rate": 2.580139840446363e-06, "loss": 3.6616, "step": 699500 }, { "epoch": 3.06, "learning_rate": 2.574639508226297e-06, "loss": 3.6542, "step": 700000 }, { "epoch": 3.07, "learning_rate": 2.569139176006231e-06, "loss": 3.6593, "step": 700500 }, { "epoch": 3.07, "learning_rate": 2.5636388437861646e-06, "loss": 3.6587, "step": 701000 }, { "epoch": 3.07, "learning_rate": 2.558138511566099e-06, "loss": 3.6669, "step": 701500 }, { "epoch": 3.07, "learning_rate": 2.5526381793460325e-06, "loss": 3.6729, "step": 702000 }, { "epoch": 3.07, "learning_rate": 2.547137847125967e-06, "loss": 3.6637, "step": 702500 }, { "epoch": 3.08, "learning_rate": 2.5416375149059003e-06, "loss": 3.6729, "step": 703000 }, { "epoch": 3.08, "learning_rate": 2.5361371826858343e-06, "loss": 3.6586, "step": 703500 }, { "epoch": 3.08, "learning_rate": 2.5306368504657686e-06, "loss": 3.6552, "step": 704000 }, { "epoch": 3.08, "learning_rate": 2.525136518245702e-06, "loss": 3.6681, "step": 704500 }, { "epoch": 3.09, "learning_rate": 2.5196361860256365e-06, "loss": 3.6506, "step": 705000 }, { "epoch": 3.09, "learning_rate": 2.51413585380557e-06, "loss": 3.6672, "step": 705500 }, { "epoch": 3.09, "learning_rate": 2.508635521585504e-06, "loss": 3.6543, "step": 706000 }, { "epoch": 3.09, "learning_rate": 2.503135189365438e-06, "loss": 3.6552, "step": 706500 }, { "epoch": 3.09, "learning_rate": 2.4976348571453718e-06, "loss": 3.66, "step": 707000 }, { "epoch": 3.1, "learning_rate": 2.4921345249253057e-06, "loss": 3.6742, "step": 707500 }, { "epoch": 3.1, "learning_rate": 2.4866341927052396e-06, "loss": 3.6776, "step": 708000 }, { "epoch": 3.1, "learning_rate": 2.4811338604851735e-06, "loss": 3.6334, "step": 708500 }, { "epoch": 3.1, "learning_rate": 2.4756335282651075e-06, "loss": 3.6503, "step": 709000 }, { "epoch": 3.1, "learning_rate": 2.4701331960450414e-06, "loss": 3.6462, "step": 709500 }, { "epoch": 3.11, "learning_rate": 2.4646328638249753e-06, "loss": 3.6595, "step": 710000 }, { "epoch": 3.11, "learning_rate": 2.4591325316049093e-06, "loss": 3.6479, "step": 710500 }, { "epoch": 3.11, "learning_rate": 2.453632199384843e-06, "loss": 3.6565, "step": 711000 }, { "epoch": 3.11, "learning_rate": 2.448131867164777e-06, "loss": 3.6369, "step": 711500 }, { "epoch": 3.12, "learning_rate": 2.4426315349447106e-06, "loss": 3.6472, "step": 712000 }, { "epoch": 3.12, "learning_rate": 2.4371312027246446e-06, "loss": 3.6586, "step": 712500 }, { "epoch": 3.12, "learning_rate": 2.431630870504579e-06, "loss": 3.6407, "step": 713000 }, { "epoch": 3.12, "learning_rate": 2.426130538284513e-06, "loss": 3.6419, "step": 713500 }, { "epoch": 3.12, "learning_rate": 2.4206302060644463e-06, "loss": 3.656, "step": 714000 }, { "epoch": 3.13, "learning_rate": 2.4151298738443803e-06, "loss": 3.6574, "step": 714500 }, { "epoch": 3.13, "learning_rate": 2.409629541624314e-06, "loss": 3.6627, "step": 715000 }, { "epoch": 3.13, "learning_rate": 2.404129209404248e-06, "loss": 3.6533, "step": 715500 }, { "epoch": 3.13, "learning_rate": 2.398628877184182e-06, "loss": 3.6598, "step": 716000 }, { "epoch": 3.14, "learning_rate": 2.393128544964116e-06, "loss": 3.6383, "step": 716500 }, { "epoch": 3.14, "learning_rate": 2.38762821274405e-06, "loss": 3.6495, "step": 717000 }, { "epoch": 3.14, "learning_rate": 2.382127880523984e-06, "loss": 3.6403, "step": 717500 }, { "epoch": 3.14, "learning_rate": 2.3766275483039178e-06, "loss": 3.6252, "step": 718000 }, { "epoch": 3.14, "learning_rate": 2.3711272160838517e-06, "loss": 3.6393, "step": 718500 }, { "epoch": 3.15, "learning_rate": 2.3656268838637856e-06, "loss": 3.6445, "step": 719000 }, { "epoch": 3.15, "learning_rate": 2.3601265516437196e-06, "loss": 3.6524, "step": 719500 }, { "epoch": 3.15, "learning_rate": 2.354626219423653e-06, "loss": 3.6464, "step": 720000 }, { "epoch": 3.15, "learning_rate": 2.3491258872035874e-06, "loss": 3.6339, "step": 720500 }, { "epoch": 3.16, "learning_rate": 2.3436255549835213e-06, "loss": 3.6574, "step": 721000 }, { "epoch": 3.16, "learning_rate": 2.3381252227634553e-06, "loss": 3.6531, "step": 721500 }, { "epoch": 3.16, "learning_rate": 2.332624890543389e-06, "loss": 3.6396, "step": 722000 }, { "epoch": 3.16, "learning_rate": 2.3271245583233227e-06, "loss": 3.6356, "step": 722500 }, { "epoch": 3.16, "learning_rate": 2.3216242261032566e-06, "loss": 3.657, "step": 723000 }, { "epoch": 3.17, "learning_rate": 2.3161238938831906e-06, "loss": 3.6506, "step": 723500 }, { "epoch": 3.17, "learning_rate": 2.3106235616631245e-06, "loss": 3.6428, "step": 724000 }, { "epoch": 3.17, "learning_rate": 2.305123229443059e-06, "loss": 3.6326, "step": 724500 }, { "epoch": 3.17, "learning_rate": 2.2996228972229924e-06, "loss": 3.6471, "step": 725000 }, { "epoch": 3.17, "learning_rate": 2.2941225650029263e-06, "loss": 3.6425, "step": 725500 }, { "epoch": 3.18, "learning_rate": 2.28862223278286e-06, "loss": 3.6469, "step": 726000 }, { "epoch": 3.18, "learning_rate": 2.283121900562794e-06, "loss": 3.624, "step": 726500 }, { "epoch": 3.18, "learning_rate": 2.277621568342728e-06, "loss": 3.6305, "step": 727000 }, { "epoch": 3.18, "learning_rate": 2.272121236122662e-06, "loss": 3.6303, "step": 727500 }, { "epoch": 3.19, "learning_rate": 2.266620903902596e-06, "loss": 3.6412, "step": 728000 }, { "epoch": 3.19, "learning_rate": 2.26112057168253e-06, "loss": 3.6278, "step": 728500 }, { "epoch": 3.19, "learning_rate": 2.2556202394624638e-06, "loss": 3.6592, "step": 729000 }, { "epoch": 3.19, "learning_rate": 2.2501199072423977e-06, "loss": 3.6277, "step": 729500 }, { "epoch": 3.19, "learning_rate": 2.2446195750223316e-06, "loss": 3.6473, "step": 730000 }, { "epoch": 3.2, "learning_rate": 2.2391192428022656e-06, "loss": 3.6472, "step": 730500 }, { "epoch": 3.2, "learning_rate": 2.233618910582199e-06, "loss": 3.6361, "step": 731000 }, { "epoch": 3.2, "learning_rate": 2.228118578362133e-06, "loss": 3.6418, "step": 731500 }, { "epoch": 3.2, "learning_rate": 2.2226182461420674e-06, "loss": 3.6541, "step": 732000 }, { "epoch": 3.21, "learning_rate": 2.2171179139220013e-06, "loss": 3.6382, "step": 732500 }, { "epoch": 3.21, "learning_rate": 2.211617581701935e-06, "loss": 3.6565, "step": 733000 }, { "epoch": 3.21, "learning_rate": 2.2061172494818687e-06, "loss": 3.6152, "step": 733500 }, { "epoch": 3.21, "learning_rate": 2.2006169172618026e-06, "loss": 3.6281, "step": 734000 }, { "epoch": 3.21, "learning_rate": 2.1951165850417366e-06, "loss": 3.6262, "step": 734500 }, { "epoch": 3.22, "learning_rate": 2.1896162528216705e-06, "loss": 3.6217, "step": 735000 }, { "epoch": 3.22, "learning_rate": 2.1841159206016044e-06, "loss": 3.6567, "step": 735500 }, { "epoch": 3.22, "learning_rate": 2.1786155883815384e-06, "loss": 3.6587, "step": 736000 }, { "epoch": 3.22, "learning_rate": 2.1731152561614723e-06, "loss": 3.6168, "step": 736500 }, { "epoch": 3.23, "learning_rate": 2.1676149239414062e-06, "loss": 3.6255, "step": 737000 }, { "epoch": 3.23, "learning_rate": 2.16211459172134e-06, "loss": 3.6383, "step": 737500 }, { "epoch": 3.23, "learning_rate": 2.156614259501274e-06, "loss": 3.6205, "step": 738000 }, { "epoch": 3.23, "learning_rate": 2.151113927281208e-06, "loss": 3.6206, "step": 738500 }, { "epoch": 3.23, "learning_rate": 2.145613595061142e-06, "loss": 3.6552, "step": 739000 }, { "epoch": 3.24, "learning_rate": 2.140113262841076e-06, "loss": 3.6309, "step": 739500 }, { "epoch": 3.24, "learning_rate": 2.13461293062101e-06, "loss": 3.6272, "step": 740000 }, { "epoch": 3.24, "learning_rate": 2.1291125984009437e-06, "loss": 3.6102, "step": 740500 }, { "epoch": 3.24, "learning_rate": 2.1236122661808776e-06, "loss": 3.6113, "step": 741000 }, { "epoch": 3.24, "learning_rate": 2.1181119339608116e-06, "loss": 3.6356, "step": 741500 }, { "epoch": 3.25, "learning_rate": 2.112611601740745e-06, "loss": 3.6216, "step": 742000 }, { "epoch": 3.25, "learning_rate": 2.107111269520679e-06, "loss": 3.6297, "step": 742500 }, { "epoch": 3.25, "learning_rate": 2.101610937300613e-06, "loss": 3.6411, "step": 743000 }, { "epoch": 3.25, "learning_rate": 2.0961106050805473e-06, "loss": 3.6339, "step": 743500 }, { "epoch": 3.26, "learning_rate": 2.0906102728604812e-06, "loss": 3.6366, "step": 744000 }, { "epoch": 3.26, "learning_rate": 2.0851099406404147e-06, "loss": 3.6237, "step": 744500 }, { "epoch": 3.26, "learning_rate": 2.0796096084203487e-06, "loss": 3.6409, "step": 745000 }, { "epoch": 3.26, "learning_rate": 2.0741092762002826e-06, "loss": 3.6242, "step": 745500 }, { "epoch": 3.26, "learning_rate": 2.0686089439802165e-06, "loss": 3.6334, "step": 746000 }, { "epoch": 3.27, "learning_rate": 2.0631086117601504e-06, "loss": 3.6293, "step": 746500 }, { "epoch": 3.27, "learning_rate": 2.0576082795400844e-06, "loss": 3.621, "step": 747000 }, { "epoch": 3.27, "learning_rate": 2.0521079473200183e-06, "loss": 3.6453, "step": 747500 }, { "epoch": 3.27, "learning_rate": 2.0466076150999522e-06, "loss": 3.6199, "step": 748000 }, { "epoch": 3.28, "learning_rate": 2.041107282879886e-06, "loss": 3.622, "step": 748500 }, { "epoch": 3.28, "learning_rate": 2.03560695065982e-06, "loss": 3.6239, "step": 749000 }, { "epoch": 3.28, "learning_rate": 2.030106618439754e-06, "loss": 3.6204, "step": 749500 }, { "epoch": 3.28, "learning_rate": 2.024606286219688e-06, "loss": 3.6277, "step": 750000 }, { "epoch": 3.28, "learning_rate": 2.0191059539996214e-06, "loss": 3.6097, "step": 750500 }, { "epoch": 3.29, "learning_rate": 2.013605621779556e-06, "loss": 3.623, "step": 751000 }, { "epoch": 3.29, "learning_rate": 2.0081052895594897e-06, "loss": 3.6062, "step": 751500 }, { "epoch": 3.29, "learning_rate": 2.0026049573394237e-06, "loss": 3.6213, "step": 752000 }, { "epoch": 3.29, "learning_rate": 1.9971046251193576e-06, "loss": 3.6332, "step": 752500 }, { "epoch": 3.3, "learning_rate": 1.991604292899291e-06, "loss": 3.6231, "step": 753000 }, { "epoch": 3.3, "learning_rate": 1.986103960679225e-06, "loss": 3.6362, "step": 753500 }, { "epoch": 3.3, "learning_rate": 1.980603628459159e-06, "loss": 3.6255, "step": 754000 }, { "epoch": 3.3, "learning_rate": 1.9751032962390933e-06, "loss": 3.6273, "step": 754500 }, { "epoch": 3.3, "learning_rate": 1.969602964019027e-06, "loss": 3.6085, "step": 755000 }, { "epoch": 3.31, "learning_rate": 1.9641026317989607e-06, "loss": 3.6228, "step": 755500 }, { "epoch": 3.31, "learning_rate": 1.9586022995788947e-06, "loss": 3.6276, "step": 756000 }, { "epoch": 3.31, "learning_rate": 1.9531019673588286e-06, "loss": 3.6219, "step": 756500 }, { "epoch": 3.31, "learning_rate": 1.9476016351387625e-06, "loss": 3.6357, "step": 757000 }, { "epoch": 3.31, "learning_rate": 1.9421013029186965e-06, "loss": 3.6168, "step": 757500 }, { "epoch": 3.32, "learning_rate": 1.9366009706986304e-06, "loss": 3.6224, "step": 758000 }, { "epoch": 3.32, "learning_rate": 1.9311006384785643e-06, "loss": 3.6105, "step": 758500 }, { "epoch": 3.32, "learning_rate": 1.9256003062584982e-06, "loss": 3.6348, "step": 759000 }, { "epoch": 3.32, "learning_rate": 1.920099974038432e-06, "loss": 3.6333, "step": 759500 }, { "epoch": 3.33, "learning_rate": 1.914599641818366e-06, "loss": 3.6262, "step": 760000 }, { "epoch": 3.33, "learning_rate": 1.9090993095983e-06, "loss": 3.6144, "step": 760500 }, { "epoch": 3.33, "learning_rate": 1.9035989773782337e-06, "loss": 3.5976, "step": 761000 }, { "epoch": 3.33, "learning_rate": 1.8980986451581677e-06, "loss": 3.6118, "step": 761500 }, { "epoch": 3.33, "learning_rate": 1.8925983129381014e-06, "loss": 3.6193, "step": 762000 }, { "epoch": 3.34, "learning_rate": 1.8870979807180355e-06, "loss": 3.6339, "step": 762500 }, { "epoch": 3.34, "learning_rate": 1.8815976484979695e-06, "loss": 3.6026, "step": 763000 }, { "epoch": 3.34, "learning_rate": 1.8760973162779034e-06, "loss": 3.6367, "step": 763500 }, { "epoch": 3.34, "learning_rate": 1.8705969840578373e-06, "loss": 3.6108, "step": 764000 }, { "epoch": 3.35, "learning_rate": 1.865096651837771e-06, "loss": 3.5938, "step": 764500 }, { "epoch": 3.35, "learning_rate": 1.859596319617705e-06, "loss": 3.6145, "step": 765000 }, { "epoch": 3.35, "learning_rate": 1.8540959873976389e-06, "loss": 3.6213, "step": 765500 }, { "epoch": 3.35, "learning_rate": 1.848595655177573e-06, "loss": 3.6156, "step": 766000 }, { "epoch": 3.35, "learning_rate": 1.843095322957507e-06, "loss": 3.6202, "step": 766500 }, { "epoch": 3.36, "learning_rate": 1.8375949907374407e-06, "loss": 3.6116, "step": 767000 }, { "epoch": 3.36, "learning_rate": 1.8320946585173746e-06, "loss": 3.6031, "step": 767500 }, { "epoch": 3.36, "learning_rate": 1.8265943262973085e-06, "loss": 3.6284, "step": 768000 }, { "epoch": 3.36, "learning_rate": 1.8210939940772423e-06, "loss": 3.6116, "step": 768500 }, { "epoch": 3.37, "learning_rate": 1.8155936618571762e-06, "loss": 3.6036, "step": 769000 }, { "epoch": 3.37, "learning_rate": 1.8100933296371101e-06, "loss": 3.6161, "step": 769500 }, { "epoch": 3.37, "learning_rate": 1.8045929974170442e-06, "loss": 3.6137, "step": 770000 }, { "epoch": 3.37, "learning_rate": 1.7990926651969782e-06, "loss": 3.6098, "step": 770500 }, { "epoch": 3.37, "learning_rate": 1.793592332976912e-06, "loss": 3.6203, "step": 771000 }, { "epoch": 3.38, "learning_rate": 1.7880920007568458e-06, "loss": 3.6222, "step": 771500 }, { "epoch": 3.38, "learning_rate": 1.7825916685367798e-06, "loss": 3.6052, "step": 772000 }, { "epoch": 3.38, "learning_rate": 1.7770913363167137e-06, "loss": 3.6241, "step": 772500 }, { "epoch": 3.38, "learning_rate": 1.7715910040966474e-06, "loss": 3.6213, "step": 773000 }, { "epoch": 3.38, "learning_rate": 1.7660906718765815e-06, "loss": 3.5962, "step": 773500 }, { "epoch": 3.39, "learning_rate": 1.7605903396565155e-06, "loss": 3.6085, "step": 774000 }, { "epoch": 3.39, "learning_rate": 1.7550900074364494e-06, "loss": 3.59, "step": 774500 }, { "epoch": 3.39, "learning_rate": 1.7495896752163833e-06, "loss": 3.6154, "step": 775000 }, { "epoch": 3.39, "learning_rate": 1.744089342996317e-06, "loss": 3.5961, "step": 775500 }, { "epoch": 3.4, "learning_rate": 1.738589010776251e-06, "loss": 3.619, "step": 776000 }, { "epoch": 3.4, "learning_rate": 1.733088678556185e-06, "loss": 3.6108, "step": 776500 }, { "epoch": 3.4, "learning_rate": 1.7275883463361186e-06, "loss": 3.6124, "step": 777000 }, { "epoch": 3.4, "learning_rate": 1.722088014116053e-06, "loss": 3.6054, "step": 777500 }, { "epoch": 3.4, "learning_rate": 1.7165876818959867e-06, "loss": 3.6057, "step": 778000 }, { "epoch": 3.41, "learning_rate": 1.7110873496759206e-06, "loss": 3.5989, "step": 778500 }, { "epoch": 3.41, "learning_rate": 1.7055870174558545e-06, "loss": 3.6196, "step": 779000 }, { "epoch": 3.41, "learning_rate": 1.7000866852357883e-06, "loss": 3.6141, "step": 779500 }, { "epoch": 3.41, "learning_rate": 1.6945863530157222e-06, "loss": 3.6201, "step": 780000 }, { "epoch": 3.42, "learning_rate": 1.6890860207956561e-06, "loss": 3.6144, "step": 780500 }, { "epoch": 3.42, "learning_rate": 1.6835856885755903e-06, "loss": 3.6094, "step": 781000 }, { "epoch": 3.42, "learning_rate": 1.6780853563555242e-06, "loss": 3.5901, "step": 781500 }, { "epoch": 3.42, "learning_rate": 1.672585024135458e-06, "loss": 3.5945, "step": 782000 }, { "epoch": 3.42, "learning_rate": 1.6670846919153918e-06, "loss": 3.6066, "step": 782500 }, { "epoch": 3.43, "learning_rate": 1.6615843596953258e-06, "loss": 3.6179, "step": 783000 }, { "epoch": 3.43, "learning_rate": 1.6560840274752597e-06, "loss": 3.6198, "step": 783500 }, { "epoch": 3.43, "learning_rate": 1.6505836952551934e-06, "loss": 3.6033, "step": 784000 }, { "epoch": 3.43, "learning_rate": 1.6450833630351273e-06, "loss": 3.6041, "step": 784500 }, { "epoch": 3.44, "learning_rate": 1.6395830308150615e-06, "loss": 3.6131, "step": 785000 }, { "epoch": 3.44, "learning_rate": 1.6340826985949954e-06, "loss": 3.6208, "step": 785500 }, { "epoch": 3.44, "learning_rate": 1.6285823663749291e-06, "loss": 3.6218, "step": 786000 }, { "epoch": 3.44, "learning_rate": 1.623082034154863e-06, "loss": 3.6059, "step": 786500 }, { "epoch": 3.44, "learning_rate": 1.617581701934797e-06, "loss": 3.6209, "step": 787000 }, { "epoch": 3.45, "learning_rate": 1.612081369714731e-06, "loss": 3.5962, "step": 787500 }, { "epoch": 3.45, "learning_rate": 1.6065810374946646e-06, "loss": 3.6046, "step": 788000 }, { "epoch": 3.45, "learning_rate": 1.6010807052745986e-06, "loss": 3.6158, "step": 788500 }, { "epoch": 3.45, "learning_rate": 1.5955803730545327e-06, "loss": 3.5979, "step": 789000 }, { "epoch": 3.46, "learning_rate": 1.5900800408344666e-06, "loss": 3.6153, "step": 789500 }, { "epoch": 3.46, "learning_rate": 1.5845797086144006e-06, "loss": 3.5999, "step": 790000 }, { "epoch": 3.46, "learning_rate": 1.5790793763943343e-06, "loss": 3.6041, "step": 790500 }, { "epoch": 3.46, "learning_rate": 1.5735790441742682e-06, "loss": 3.6019, "step": 791000 }, { "epoch": 3.46, "learning_rate": 1.5680787119542021e-06, "loss": 3.5991, "step": 791500 }, { "epoch": 3.47, "learning_rate": 1.562578379734136e-06, "loss": 3.5955, "step": 792000 }, { "epoch": 3.47, "learning_rate": 1.5570780475140702e-06, "loss": 3.6217, "step": 792500 }, { "epoch": 3.47, "learning_rate": 1.551577715294004e-06, "loss": 3.6082, "step": 793000 }, { "epoch": 3.47, "learning_rate": 1.5460773830739378e-06, "loss": 3.6068, "step": 793500 }, { "epoch": 3.47, "learning_rate": 1.5405770508538718e-06, "loss": 3.6073, "step": 794000 }, { "epoch": 3.48, "learning_rate": 1.5350767186338055e-06, "loss": 3.61, "step": 794500 }, { "epoch": 3.48, "learning_rate": 1.5295763864137394e-06, "loss": 3.5978, "step": 795000 }, { "epoch": 3.48, "learning_rate": 1.5240760541936733e-06, "loss": 3.594, "step": 795500 }, { "epoch": 3.48, "learning_rate": 1.5185757219736073e-06, "loss": 3.5951, "step": 796000 }, { "epoch": 3.49, "learning_rate": 1.5130753897535414e-06, "loss": 3.5942, "step": 796500 }, { "epoch": 3.49, "learning_rate": 1.5075750575334751e-06, "loss": 3.578, "step": 797000 }, { "epoch": 3.49, "learning_rate": 1.502074725313409e-06, "loss": 3.5983, "step": 797500 }, { "epoch": 3.49, "learning_rate": 1.496574393093343e-06, "loss": 3.5995, "step": 798000 }, { "epoch": 3.49, "learning_rate": 1.491074060873277e-06, "loss": 3.5942, "step": 798500 }, { "epoch": 3.5, "learning_rate": 1.4855737286532106e-06, "loss": 3.6105, "step": 799000 }, { "epoch": 3.5, "learning_rate": 1.4800733964331446e-06, "loss": 3.6005, "step": 799500 }, { "epoch": 3.5, "learning_rate": 1.4745730642130787e-06, "loss": 3.5992, "step": 800000 }, { "epoch": 3.5, "learning_rate": 1.4690727319930126e-06, "loss": 3.6003, "step": 800500 }, { "epoch": 3.51, "learning_rate": 1.4635723997729466e-06, "loss": 3.5961, "step": 801000 }, { "epoch": 3.51, "learning_rate": 1.4580720675528803e-06, "loss": 3.5923, "step": 801500 }, { "epoch": 3.51, "learning_rate": 1.4525717353328142e-06, "loss": 3.5917, "step": 802000 }, { "epoch": 3.51, "learning_rate": 1.4470714031127481e-06, "loss": 3.5949, "step": 802500 }, { "epoch": 3.51, "learning_rate": 1.4415710708926819e-06, "loss": 3.5836, "step": 803000 }, { "epoch": 3.52, "learning_rate": 1.4360707386726158e-06, "loss": 3.6094, "step": 803500 }, { "epoch": 3.52, "learning_rate": 1.43057040645255e-06, "loss": 3.6073, "step": 804000 }, { "epoch": 3.52, "learning_rate": 1.4250700742324839e-06, "loss": 3.5858, "step": 804500 }, { "epoch": 3.52, "learning_rate": 1.4195697420124178e-06, "loss": 3.6001, "step": 805000 }, { "epoch": 3.53, "learning_rate": 1.4140694097923515e-06, "loss": 3.5929, "step": 805500 }, { "epoch": 3.53, "learning_rate": 1.4085690775722854e-06, "loss": 3.5922, "step": 806000 }, { "epoch": 3.53, "learning_rate": 1.4030687453522194e-06, "loss": 3.6161, "step": 806500 }, { "epoch": 3.53, "learning_rate": 1.3975684131321533e-06, "loss": 3.6097, "step": 807000 }, { "epoch": 3.53, "learning_rate": 1.3920680809120874e-06, "loss": 3.6172, "step": 807500 }, { "epoch": 3.54, "learning_rate": 1.3865677486920211e-06, "loss": 3.5946, "step": 808000 }, { "epoch": 3.54, "learning_rate": 1.381067416471955e-06, "loss": 3.6015, "step": 808500 }, { "epoch": 3.54, "learning_rate": 1.375567084251889e-06, "loss": 3.6125, "step": 809000 }, { "epoch": 3.54, "learning_rate": 1.3700667520318227e-06, "loss": 3.6061, "step": 809500 }, { "epoch": 3.54, "learning_rate": 1.3645664198117566e-06, "loss": 3.5902, "step": 810000 }, { "epoch": 3.55, "learning_rate": 1.3590660875916906e-06, "loss": 3.597, "step": 810500 }, { "epoch": 3.55, "learning_rate": 1.3535657553716245e-06, "loss": 3.6164, "step": 811000 }, { "epoch": 3.55, "learning_rate": 1.3480654231515586e-06, "loss": 3.6002, "step": 811500 }, { "epoch": 3.55, "learning_rate": 1.3425650909314924e-06, "loss": 3.6012, "step": 812000 }, { "epoch": 3.56, "learning_rate": 1.3370647587114263e-06, "loss": 3.5951, "step": 812500 }, { "epoch": 3.56, "learning_rate": 1.3315644264913602e-06, "loss": 3.5944, "step": 813000 }, { "epoch": 3.56, "learning_rate": 1.3260640942712941e-06, "loss": 3.6024, "step": 813500 }, { "epoch": 3.56, "learning_rate": 1.3205637620512279e-06, "loss": 3.5891, "step": 814000 }, { "epoch": 3.56, "learning_rate": 1.3150634298311618e-06, "loss": 3.5926, "step": 814500 }, { "epoch": 3.57, "learning_rate": 1.3095630976110957e-06, "loss": 3.5937, "step": 815000 }, { "epoch": 3.57, "learning_rate": 1.3040627653910299e-06, "loss": 3.5772, "step": 815500 }, { "epoch": 3.57, "learning_rate": 1.2985624331709638e-06, "loss": 3.5793, "step": 816000 }, { "epoch": 3.57, "learning_rate": 1.2930621009508975e-06, "loss": 3.5919, "step": 816500 }, { "epoch": 3.58, "learning_rate": 1.2875617687308314e-06, "loss": 3.6159, "step": 817000 }, { "epoch": 3.58, "learning_rate": 1.2820614365107654e-06, "loss": 3.5919, "step": 817500 }, { "epoch": 3.58, "learning_rate": 1.276561104290699e-06, "loss": 3.6001, "step": 818000 }, { "epoch": 3.58, "learning_rate": 1.271060772070633e-06, "loss": 3.613, "step": 818500 }, { "epoch": 3.58, "learning_rate": 1.2655604398505672e-06, "loss": 3.5816, "step": 819000 }, { "epoch": 3.59, "learning_rate": 1.260060107630501e-06, "loss": 3.6099, "step": 819500 }, { "epoch": 3.59, "learning_rate": 1.254559775410435e-06, "loss": 3.5974, "step": 820000 }, { "epoch": 3.59, "learning_rate": 1.2490594431903687e-06, "loss": 3.5986, "step": 820500 }, { "epoch": 3.59, "learning_rate": 1.2435591109703027e-06, "loss": 3.5901, "step": 821000 }, { "epoch": 3.6, "learning_rate": 1.2380587787502366e-06, "loss": 3.5968, "step": 821500 }, { "epoch": 3.6, "learning_rate": 1.2325584465301705e-06, "loss": 3.6025, "step": 822000 }, { "epoch": 3.6, "learning_rate": 1.2270581143101044e-06, "loss": 3.5785, "step": 822500 }, { "epoch": 3.6, "learning_rate": 1.2215577820900384e-06, "loss": 3.5906, "step": 823000 }, { "epoch": 3.6, "learning_rate": 1.216057449869972e-06, "loss": 3.5827, "step": 823500 }, { "epoch": 3.61, "learning_rate": 1.2105571176499062e-06, "loss": 3.5897, "step": 824000 }, { "epoch": 3.61, "learning_rate": 1.2050567854298402e-06, "loss": 3.5881, "step": 824500 }, { "epoch": 3.61, "learning_rate": 1.1995564532097739e-06, "loss": 3.5894, "step": 825000 }, { "epoch": 3.61, "learning_rate": 1.194056120989708e-06, "loss": 3.5914, "step": 825500 }, { "epoch": 3.61, "learning_rate": 1.1885557887696417e-06, "loss": 3.5652, "step": 826000 }, { "epoch": 3.62, "learning_rate": 1.1830554565495757e-06, "loss": 3.5747, "step": 826500 }, { "epoch": 3.62, "learning_rate": 1.1775551243295096e-06, "loss": 3.594, "step": 827000 }, { "epoch": 3.62, "learning_rate": 1.1720547921094435e-06, "loss": 3.5865, "step": 827500 }, { "epoch": 3.62, "learning_rate": 1.1665544598893774e-06, "loss": 3.5854, "step": 828000 }, { "epoch": 3.63, "learning_rate": 1.1610541276693114e-06, "loss": 3.5782, "step": 828500 }, { "epoch": 3.63, "learning_rate": 1.155553795449245e-06, "loss": 3.6062, "step": 829000 }, { "epoch": 3.63, "learning_rate": 1.1500534632291792e-06, "loss": 3.5711, "step": 829500 }, { "epoch": 3.63, "learning_rate": 1.144553131009113e-06, "loss": 3.5878, "step": 830000 }, { "epoch": 3.63, "learning_rate": 1.1390527987890469e-06, "loss": 3.593, "step": 830500 }, { "epoch": 3.64, "learning_rate": 1.1335524665689808e-06, "loss": 3.584, "step": 831000 }, { "epoch": 3.64, "learning_rate": 1.1280521343489147e-06, "loss": 3.5956, "step": 831500 }, { "epoch": 3.64, "learning_rate": 1.1225518021288487e-06, "loss": 3.5882, "step": 832000 }, { "epoch": 3.64, "learning_rate": 1.1170514699087826e-06, "loss": 3.594, "step": 832500 }, { "epoch": 3.65, "learning_rate": 1.1115511376887165e-06, "loss": 3.5969, "step": 833000 }, { "epoch": 3.65, "learning_rate": 1.1060508054686505e-06, "loss": 3.5945, "step": 833500 }, { "epoch": 3.65, "learning_rate": 1.1005504732485844e-06, "loss": 3.5813, "step": 834000 }, { "epoch": 3.65, "learning_rate": 1.095050141028518e-06, "loss": 3.6051, "step": 834500 }, { "epoch": 3.65, "learning_rate": 1.0895498088084522e-06, "loss": 3.5854, "step": 835000 }, { "epoch": 3.66, "learning_rate": 1.084049476588386e-06, "loss": 3.5739, "step": 835500 }, { "epoch": 3.66, "learning_rate": 1.0785491443683199e-06, "loss": 3.5921, "step": 836000 }, { "epoch": 3.66, "learning_rate": 1.0730488121482538e-06, "loss": 3.587, "step": 836500 }, { "epoch": 3.66, "learning_rate": 1.0675484799281877e-06, "loss": 3.5739, "step": 837000 }, { "epoch": 3.67, "learning_rate": 1.0620481477081217e-06, "loss": 3.5844, "step": 837500 }, { "epoch": 3.67, "learning_rate": 1.0565478154880556e-06, "loss": 3.5814, "step": 838000 }, { "epoch": 3.67, "learning_rate": 1.0510474832679893e-06, "loss": 3.5723, "step": 838500 }, { "epoch": 3.67, "learning_rate": 1.0455471510479235e-06, "loss": 3.587, "step": 839000 }, { "epoch": 3.67, "learning_rate": 1.0400468188278574e-06, "loss": 3.5839, "step": 839500 }, { "epoch": 3.68, "learning_rate": 1.0345464866077911e-06, "loss": 3.5731, "step": 840000 }, { "epoch": 3.68, "learning_rate": 1.029046154387725e-06, "loss": 3.575, "step": 840500 }, { "epoch": 3.68, "learning_rate": 1.023545822167659e-06, "loss": 3.5902, "step": 841000 }, { "epoch": 3.68, "learning_rate": 1.0180454899475929e-06, "loss": 3.6101, "step": 841500 }, { "epoch": 3.68, "learning_rate": 1.0125451577275268e-06, "loss": 3.578, "step": 842000 }, { "epoch": 3.69, "learning_rate": 1.0070448255074608e-06, "loss": 3.5796, "step": 842500 }, { "epoch": 3.69, "learning_rate": 1.0015444932873947e-06, "loss": 3.5879, "step": 843000 }, { "epoch": 3.69, "learning_rate": 9.960441610673286e-07, "loss": 3.5764, "step": 843500 }, { "epoch": 3.69, "learning_rate": 9.905438288472623e-07, "loss": 3.5936, "step": 844000 }, { "epoch": 3.7, "learning_rate": 9.850434966271965e-07, "loss": 3.5776, "step": 844500 }, { "epoch": 3.7, "learning_rate": 9.795431644071304e-07, "loss": 3.5755, "step": 845000 }, { "epoch": 3.7, "learning_rate": 9.740428321870641e-07, "loss": 3.5725, "step": 845500 }, { "epoch": 3.7, "learning_rate": 9.68542499966998e-07, "loss": 3.5699, "step": 846000 }, { "epoch": 3.7, "learning_rate": 9.63042167746932e-07, "loss": 3.5751, "step": 846500 }, { "epoch": 3.71, "learning_rate": 9.57541835526866e-07, "loss": 3.5946, "step": 847000 }, { "epoch": 3.71, "learning_rate": 9.520415033067998e-07, "loss": 3.5818, "step": 847500 }, { "epoch": 3.71, "learning_rate": 9.465411710867336e-07, "loss": 3.5993, "step": 848000 }, { "epoch": 3.71, "learning_rate": 9.410408388666677e-07, "loss": 3.5642, "step": 848500 }, { "epoch": 3.72, "learning_rate": 9.355405066466015e-07, "loss": 3.5975, "step": 849000 }, { "epoch": 3.72, "learning_rate": 9.300401744265354e-07, "loss": 3.5909, "step": 849500 }, { "epoch": 3.72, "learning_rate": 9.245398422064693e-07, "loss": 3.5627, "step": 850000 }, { "epoch": 3.72, "learning_rate": 9.190395099864033e-07, "loss": 3.5729, "step": 850500 }, { "epoch": 3.72, "learning_rate": 9.135391777663371e-07, "loss": 3.5828, "step": 851000 }, { "epoch": 3.73, "learning_rate": 9.08038845546271e-07, "loss": 3.5692, "step": 851500 }, { "epoch": 3.73, "learning_rate": 9.025385133262051e-07, "loss": 3.5645, "step": 852000 }, { "epoch": 3.73, "learning_rate": 8.970381811061389e-07, "loss": 3.5867, "step": 852500 }, { "epoch": 3.73, "learning_rate": 8.915378488860728e-07, "loss": 3.5761, "step": 853000 }, { "epoch": 3.74, "learning_rate": 8.860375166660067e-07, "loss": 3.5734, "step": 853500 }, { "epoch": 3.74, "learning_rate": 8.805371844459407e-07, "loss": 3.5915, "step": 854000 }, { "epoch": 3.74, "learning_rate": 8.750368522258745e-07, "loss": 3.5812, "step": 854500 }, { "epoch": 3.74, "learning_rate": 8.695365200058084e-07, "loss": 3.5731, "step": 855000 }, { "epoch": 3.74, "learning_rate": 8.640361877857423e-07, "loss": 3.577, "step": 855500 }, { "epoch": 3.75, "learning_rate": 8.585358555656763e-07, "loss": 3.5891, "step": 856000 }, { "epoch": 3.75, "learning_rate": 8.530355233456101e-07, "loss": 3.5745, "step": 856500 }, { "epoch": 3.75, "learning_rate": 8.47535191125544e-07, "loss": 3.5749, "step": 857000 }, { "epoch": 3.75, "learning_rate": 8.420348589054779e-07, "loss": 3.5853, "step": 857500 }, { "epoch": 3.75, "learning_rate": 8.365345266854119e-07, "loss": 3.5863, "step": 858000 }, { "epoch": 3.76, "learning_rate": 8.310341944653457e-07, "loss": 3.562, "step": 858500 }, { "epoch": 3.76, "learning_rate": 8.255338622452797e-07, "loss": 3.5744, "step": 859000 }, { "epoch": 3.76, "learning_rate": 8.200335300252137e-07, "loss": 3.5943, "step": 859500 }, { "epoch": 3.76, "learning_rate": 8.145331978051475e-07, "loss": 3.5715, "step": 860000 }, { "epoch": 3.77, "learning_rate": 8.090328655850814e-07, "loss": 3.574, "step": 860500 }, { "epoch": 3.77, "learning_rate": 8.035325333650153e-07, "loss": 3.5707, "step": 861000 }, { "epoch": 3.77, "learning_rate": 7.980322011449493e-07, "loss": 3.5604, "step": 861500 }, { "epoch": 3.77, "learning_rate": 7.925318689248831e-07, "loss": 3.5664, "step": 862000 }, { "epoch": 3.77, "learning_rate": 7.870315367048171e-07, "loss": 3.5709, "step": 862500 }, { "epoch": 3.78, "learning_rate": 7.815312044847509e-07, "loss": 3.5779, "step": 863000 }, { "epoch": 3.78, "learning_rate": 7.760308722646849e-07, "loss": 3.5612, "step": 863500 }, { "epoch": 3.78, "learning_rate": 7.705305400446187e-07, "loss": 3.5783, "step": 864000 }, { "epoch": 3.78, "learning_rate": 7.650302078245527e-07, "loss": 3.5779, "step": 864500 }, { "epoch": 3.79, "learning_rate": 7.595298756044865e-07, "loss": 3.5689, "step": 865000 }, { "epoch": 3.79, "learning_rate": 7.540295433844205e-07, "loss": 3.5759, "step": 865500 }, { "epoch": 3.79, "learning_rate": 7.485292111643543e-07, "loss": 3.5804, "step": 866000 }, { "epoch": 3.79, "learning_rate": 7.430288789442883e-07, "loss": 3.5765, "step": 866500 }, { "epoch": 3.79, "learning_rate": 7.375285467242221e-07, "loss": 3.5839, "step": 867000 }, { "epoch": 3.8, "learning_rate": 7.320282145041561e-07, "loss": 3.5736, "step": 867500 }, { "epoch": 3.8, "learning_rate": 7.265278822840901e-07, "loss": 3.5739, "step": 868000 }, { "epoch": 3.8, "learning_rate": 7.210275500640239e-07, "loss": 3.5856, "step": 868500 }, { "epoch": 3.8, "learning_rate": 7.155272178439579e-07, "loss": 3.5765, "step": 869000 }, { "epoch": 3.81, "learning_rate": 7.100268856238917e-07, "loss": 3.5569, "step": 869500 }, { "epoch": 3.81, "learning_rate": 7.045265534038257e-07, "loss": 3.5749, "step": 870000 }, { "epoch": 3.81, "learning_rate": 6.990262211837595e-07, "loss": 3.5789, "step": 870500 }, { "epoch": 3.81, "learning_rate": 6.935258889636935e-07, "loss": 3.5751, "step": 871000 }, { "epoch": 3.81, "learning_rate": 6.880255567436274e-07, "loss": 3.5877, "step": 871500 }, { "epoch": 3.82, "learning_rate": 6.825252245235613e-07, "loss": 3.5813, "step": 872000 }, { "epoch": 3.82, "learning_rate": 6.770248923034951e-07, "loss": 3.5859, "step": 872500 }, { "epoch": 3.82, "learning_rate": 6.715245600834291e-07, "loss": 3.5547, "step": 873000 }, { "epoch": 3.82, "learning_rate": 6.660242278633631e-07, "loss": 3.5746, "step": 873500 }, { "epoch": 3.82, "learning_rate": 6.605238956432969e-07, "loss": 3.5859, "step": 874000 }, { "epoch": 3.83, "learning_rate": 6.550235634232307e-07, "loss": 3.5723, "step": 874500 }, { "epoch": 3.83, "learning_rate": 6.495232312031647e-07, "loss": 3.5854, "step": 875000 }, { "epoch": 3.83, "learning_rate": 6.440228989830987e-07, "loss": 3.5609, "step": 875500 }, { "epoch": 3.83, "learning_rate": 6.385225667630325e-07, "loss": 3.5816, "step": 876000 }, { "epoch": 3.84, "learning_rate": 6.330222345429664e-07, "loss": 3.5917, "step": 876500 }, { "epoch": 3.84, "learning_rate": 6.275219023229004e-07, "loss": 3.565, "step": 877000 }, { "epoch": 3.84, "learning_rate": 6.220215701028343e-07, "loss": 3.582, "step": 877500 }, { "epoch": 3.84, "learning_rate": 6.165212378827682e-07, "loss": 3.5734, "step": 878000 }, { "epoch": 3.84, "learning_rate": 6.11020905662702e-07, "loss": 3.5583, "step": 878500 }, { "epoch": 3.85, "learning_rate": 6.05520573442636e-07, "loss": 3.5628, "step": 879000 }, { "epoch": 3.85, "learning_rate": 6.000202412225699e-07, "loss": 3.5575, "step": 879500 }, { "epoch": 3.85, "learning_rate": 5.945199090025038e-07, "loss": 3.5824, "step": 880000 }, { "epoch": 3.85, "learning_rate": 5.890195767824376e-07, "loss": 3.5685, "step": 880500 }, { "epoch": 3.86, "learning_rate": 5.835192445623717e-07, "loss": 3.5838, "step": 881000 }, { "epoch": 3.86, "learning_rate": 5.780189123423055e-07, "loss": 3.5711, "step": 881500 }, { "epoch": 3.86, "learning_rate": 5.725185801222394e-07, "loss": 3.5761, "step": 882000 }, { "epoch": 3.86, "learning_rate": 5.670182479021734e-07, "loss": 3.5635, "step": 882500 }, { "epoch": 3.86, "learning_rate": 5.615179156821073e-07, "loss": 3.5609, "step": 883000 }, { "epoch": 3.87, "learning_rate": 5.560175834620411e-07, "loss": 3.5916, "step": 883500 }, { "epoch": 3.87, "learning_rate": 5.50517251241975e-07, "loss": 3.5857, "step": 884000 }, { "epoch": 3.87, "learning_rate": 5.45016919021909e-07, "loss": 3.5733, "step": 884500 }, { "epoch": 3.87, "learning_rate": 5.395165868018429e-07, "loss": 3.5576, "step": 885000 }, { "epoch": 3.88, "learning_rate": 5.340162545817767e-07, "loss": 3.5641, "step": 885500 }, { "epoch": 3.88, "learning_rate": 5.285159223617107e-07, "loss": 3.5612, "step": 886000 }, { "epoch": 3.88, "learning_rate": 5.230155901416446e-07, "loss": 3.5757, "step": 886500 }, { "epoch": 3.88, "learning_rate": 5.175152579215785e-07, "loss": 3.5792, "step": 887000 }, { "epoch": 3.88, "learning_rate": 5.120149257015124e-07, "loss": 3.5916, "step": 887500 }, { "epoch": 3.89, "learning_rate": 5.065145934814463e-07, "loss": 3.5646, "step": 888000 }, { "epoch": 3.89, "learning_rate": 5.010142612613803e-07, "loss": 3.5706, "step": 888500 }, { "epoch": 3.89, "learning_rate": 4.955139290413141e-07, "loss": 3.5896, "step": 889000 }, { "epoch": 3.89, "learning_rate": 4.90013596821248e-07, "loss": 3.5664, "step": 889500 }, { "epoch": 3.89, "learning_rate": 4.84513264601182e-07, "loss": 3.5767, "step": 890000 }, { "epoch": 3.9, "learning_rate": 4.790129323811159e-07, "loss": 3.5711, "step": 890500 }, { "epoch": 3.9, "learning_rate": 4.7351260016104973e-07, "loss": 3.5919, "step": 891000 }, { "epoch": 3.9, "learning_rate": 4.680122679409837e-07, "loss": 3.562, "step": 891500 }, { "epoch": 3.9, "learning_rate": 4.6251193572091753e-07, "loss": 3.562, "step": 892000 }, { "epoch": 3.91, "learning_rate": 4.570116035008515e-07, "loss": 3.5696, "step": 892500 }, { "epoch": 3.91, "learning_rate": 4.515112712807854e-07, "loss": 3.5657, "step": 893000 }, { "epoch": 3.91, "learning_rate": 4.460109390607193e-07, "loss": 3.555, "step": 893500 }, { "epoch": 3.91, "learning_rate": 4.405106068406532e-07, "loss": 3.586, "step": 894000 }, { "epoch": 3.91, "learning_rate": 4.350102746205871e-07, "loss": 3.5581, "step": 894500 }, { "epoch": 3.92, "learning_rate": 4.2950994240052105e-07, "loss": 3.5658, "step": 895000 }, { "epoch": 3.92, "learning_rate": 4.2400961018045493e-07, "loss": 3.5601, "step": 895500 }, { "epoch": 3.92, "learning_rate": 4.1850927796038886e-07, "loss": 3.5856, "step": 896000 }, { "epoch": 3.92, "learning_rate": 4.1300894574032273e-07, "loss": 3.5489, "step": 896500 }, { "epoch": 3.93, "learning_rate": 4.0750861352025666e-07, "loss": 3.5652, "step": 897000 }, { "epoch": 3.93, "learning_rate": 4.0200828130019054e-07, "loss": 3.548, "step": 897500 }, { "epoch": 3.93, "learning_rate": 3.965079490801245e-07, "loss": 3.5772, "step": 898000 }, { "epoch": 3.93, "learning_rate": 3.9100761686005834e-07, "loss": 3.5762, "step": 898500 }, { "epoch": 3.93, "learning_rate": 3.855072846399923e-07, "loss": 3.5521, "step": 899000 }, { "epoch": 3.94, "learning_rate": 3.800069524199262e-07, "loss": 3.5695, "step": 899500 }, { "epoch": 3.94, "learning_rate": 3.7450662019986013e-07, "loss": 3.5733, "step": 900000 }, { "epoch": 3.94, "learning_rate": 3.69006287979794e-07, "loss": 3.5832, "step": 900500 }, { "epoch": 3.94, "learning_rate": 3.6350595575972793e-07, "loss": 3.5668, "step": 901000 }, { "epoch": 3.95, "learning_rate": 3.580056235396618e-07, "loss": 3.5794, "step": 901500 }, { "epoch": 3.95, "learning_rate": 3.5250529131959574e-07, "loss": 3.5633, "step": 902000 }, { "epoch": 3.95, "learning_rate": 3.470049590995296e-07, "loss": 3.5713, "step": 902500 }, { "epoch": 3.95, "learning_rate": 3.4150462687946354e-07, "loss": 3.5666, "step": 903000 }, { "epoch": 3.95, "learning_rate": 3.3600429465939747e-07, "loss": 3.5673, "step": 903500 }, { "epoch": 3.96, "learning_rate": 3.3050396243933135e-07, "loss": 3.5563, "step": 904000 }, { "epoch": 3.96, "learning_rate": 3.250036302192653e-07, "loss": 3.5795, "step": 904500 }, { "epoch": 3.96, "learning_rate": 3.1950329799919915e-07, "loss": 3.5727, "step": 905000 }, { "epoch": 3.96, "learning_rate": 3.1400296577913313e-07, "loss": 3.5572, "step": 905500 }, { "epoch": 3.96, "learning_rate": 3.08502633559067e-07, "loss": 3.5539, "step": 906000 }, { "epoch": 3.97, "learning_rate": 3.030023013390009e-07, "loss": 3.5655, "step": 906500 }, { "epoch": 3.97, "learning_rate": 2.975019691189348e-07, "loss": 3.5589, "step": 907000 }, { "epoch": 3.97, "learning_rate": 2.920016368988687e-07, "loss": 3.5711, "step": 907500 }, { "epoch": 3.97, "learning_rate": 2.865013046788026e-07, "loss": 3.5594, "step": 908000 }, { "epoch": 3.98, "learning_rate": 2.8100097245873655e-07, "loss": 3.5661, "step": 908500 }, { "epoch": 3.98, "learning_rate": 2.755006402386705e-07, "loss": 3.5827, "step": 909000 }, { "epoch": 3.98, "learning_rate": 2.7000030801860435e-07, "loss": 3.5596, "step": 909500 }, { "epoch": 3.98, "learning_rate": 2.644999757985383e-07, "loss": 3.579, "step": 910000 }, { "epoch": 3.98, "learning_rate": 2.5899964357847216e-07, "loss": 3.5746, "step": 910500 }, { "epoch": 3.99, "learning_rate": 2.534993113584061e-07, "loss": 3.5646, "step": 911000 }, { "epoch": 3.99, "learning_rate": 2.4799897913833996e-07, "loss": 3.5753, "step": 911500 }, { "epoch": 3.99, "learning_rate": 2.424986469182739e-07, "loss": 3.5575, "step": 912000 }, { "epoch": 3.99, "learning_rate": 2.369983146982078e-07, "loss": 3.547, "step": 912500 }, { "epoch": 4.0, "learning_rate": 2.314979824781417e-07, "loss": 3.5592, "step": 913000 }, { "epoch": 4.0, "learning_rate": 2.259976502580756e-07, "loss": 3.5682, "step": 913500 }, { "epoch": 4.0, "learning_rate": 2.204973180380095e-07, "loss": 3.5759, "step": 914000 }, { "epoch": 4.0, "step": 914036, "total_flos": 4010446533477751296, "train_runtime": 482483.0917, "train_samples_per_second": 1.894 }, { "epoch": 4.38, "learning_rate": 9.28e-07, "loss": 3.74, "step": 914500 }, { "epoch": 4.38, "learning_rate": 1.928e-06, "loss": 3.6837, "step": 915000 }, { "epoch": 4.38, "learning_rate": 2.928e-06, "loss": 3.6548, "step": 915500 }, { "epoch": 4.38, "learning_rate": 3.928e-06, "loss": 3.6495, "step": 916000 }, { "epoch": 4.38, "learning_rate": 4.928000000000001e-06, "loss": 3.6499, "step": 916500 }, { "epoch": 4.39, "learning_rate": 5.928000000000001e-06, "loss": 3.6435, "step": 917000 }, { "epoch": 4.39, "learning_rate": 6.928e-06, "loss": 3.6555, "step": 917500 }, { "epoch": 4.39, "learning_rate": 7.928e-06, "loss": 3.6449, "step": 918000 }, { "epoch": 4.39, "learning_rate": 8.928000000000002e-06, "loss": 3.645, "step": 918500 }, { "epoch": 4.4, "learning_rate": 9.928e-06, "loss": 3.6527, "step": 919000 }, { "epoch": 4.4, "learning_rate": 9.996285509805775e-06, "loss": 3.6712, "step": 919500 }, { "epoch": 4.4, "learning_rate": 9.992282826406824e-06, "loss": 3.6637, "step": 920000 }, { "epoch": 4.4, "learning_rate": 9.988280143007873e-06, "loss": 3.6703, "step": 920500 }, { "epoch": 4.41, "learning_rate": 9.984277459608923e-06, "loss": 3.6596, "step": 921000 }, { "epoch": 4.41, "learning_rate": 9.980274776209972e-06, "loss": 3.6731, "step": 921500 }, { "epoch": 4.41, "learning_rate": 9.976272092811021e-06, "loss": 3.6479, "step": 922000 }, { "epoch": 4.41, "learning_rate": 9.97226940941207e-06, "loss": 3.6645, "step": 922500 }, { "epoch": 4.42, "learning_rate": 9.968266726013121e-06, "loss": 3.6548, "step": 923000 }, { "epoch": 4.42, "learning_rate": 9.964264042614169e-06, "loss": 3.666, "step": 923500 }, { "epoch": 4.42, "learning_rate": 9.960261359215218e-06, "loss": 3.6458, "step": 924000 }, { "epoch": 4.42, "learning_rate": 9.956258675816269e-06, "loss": 3.6544, "step": 924500 }, { "epoch": 4.43, "learning_rate": 9.952255992417318e-06, "loss": 3.6303, "step": 925000 }, { "epoch": 4.43, "learning_rate": 9.948253309018367e-06, "loss": 3.6393, "step": 925500 }, { "epoch": 4.43, "learning_rate": 9.944250625619416e-06, "loss": 3.644, "step": 926000 }, { "epoch": 4.43, "learning_rate": 9.940247942220466e-06, "loss": 3.6509, "step": 926500 }, { "epoch": 4.43, "learning_rate": 9.936245258821515e-06, "loss": 3.6363, "step": 927000 }, { "epoch": 4.44, "learning_rate": 9.932242575422564e-06, "loss": 3.649, "step": 927500 }, { "epoch": 4.44, "learning_rate": 9.928239892023613e-06, "loss": 3.6387, "step": 928000 }, { "epoch": 4.44, "learning_rate": 9.924237208624662e-06, "loss": 3.6365, "step": 928500 }, { "epoch": 4.44, "learning_rate": 9.920234525225713e-06, "loss": 3.6407, "step": 929000 }, { "epoch": 4.45, "learning_rate": 9.91623184182676e-06, "loss": 3.6452, "step": 929500 }, { "epoch": 4.45, "learning_rate": 9.91222915842781e-06, "loss": 3.6411, "step": 930000 }, { "epoch": 4.45, "learning_rate": 9.90822647502886e-06, "loss": 3.6548, "step": 930500 }, { "epoch": 4.45, "learning_rate": 9.90422379162991e-06, "loss": 3.6502, "step": 931000 }, { "epoch": 4.46, "learning_rate": 9.90022110823096e-06, "loss": 3.6401, "step": 931500 }, { "epoch": 4.46, "learning_rate": 9.896218424832008e-06, "loss": 3.6336, "step": 932000 }, { "epoch": 4.46, "learning_rate": 9.892215741433058e-06, "loss": 3.6401, "step": 932500 }, { "epoch": 4.46, "learning_rate": 9.888213058034107e-06, "loss": 3.6192, "step": 933000 }, { "epoch": 4.47, "learning_rate": 9.884210374635156e-06, "loss": 3.6232, "step": 933500 }, { "epoch": 4.47, "learning_rate": 9.880207691236205e-06, "loss": 3.6416, "step": 934000 }, { "epoch": 4.47, "learning_rate": 9.876205007837254e-06, "loss": 3.6332, "step": 934500 }, { "epoch": 4.47, "learning_rate": 9.872202324438305e-06, "loss": 3.6184, "step": 935000 }, { "epoch": 4.48, "learning_rate": 9.868199641039353e-06, "loss": 3.6325, "step": 935500 }, { "epoch": 4.48, "learning_rate": 9.864196957640402e-06, "loss": 3.632, "step": 936000 }, { "epoch": 4.48, "learning_rate": 9.860194274241453e-06, "loss": 3.6197, "step": 936500 }, { "epoch": 4.48, "learning_rate": 9.856191590842502e-06, "loss": 3.6193, "step": 937000 }, { "epoch": 4.49, "learning_rate": 9.852188907443551e-06, "loss": 3.6253, "step": 937500 }, { "epoch": 4.49, "learning_rate": 9.8481862240446e-06, "loss": 3.6216, "step": 938000 }, { "epoch": 4.49, "learning_rate": 9.84418354064565e-06, "loss": 3.6113, "step": 938500 }, { "epoch": 4.49, "learning_rate": 9.840180857246699e-06, "loss": 3.6246, "step": 939000 }, { "epoch": 4.49, "learning_rate": 9.836178173847748e-06, "loss": 3.6344, "step": 939500 }, { "epoch": 4.5, "learning_rate": 9.832175490448797e-06, "loss": 3.6139, "step": 940000 }, { "epoch": 4.5, "learning_rate": 9.828172807049847e-06, "loss": 3.6269, "step": 940500 }, { "epoch": 4.5, "learning_rate": 9.824170123650897e-06, "loss": 3.6017, "step": 941000 }, { "epoch": 4.5, "learning_rate": 9.820167440251945e-06, "loss": 3.6059, "step": 941500 }, { "epoch": 4.51, "learning_rate": 9.816164756852994e-06, "loss": 3.5937, "step": 942000 }, { "epoch": 4.51, "learning_rate": 9.812162073454045e-06, "loss": 3.6126, "step": 942500 }, { "epoch": 4.51, "learning_rate": 9.808159390055094e-06, "loss": 3.6252, "step": 943000 }, { "epoch": 4.51, "learning_rate": 9.804156706656143e-06, "loss": 3.6047, "step": 943500 }, { "epoch": 4.52, "learning_rate": 9.800154023257193e-06, "loss": 3.6086, "step": 944000 }, { "epoch": 4.52, "learning_rate": 9.796151339858242e-06, "loss": 3.6162, "step": 944500 }, { "epoch": 4.52, "learning_rate": 9.792148656459291e-06, "loss": 3.6092, "step": 945000 }, { "epoch": 4.52, "learning_rate": 9.78814597306034e-06, "loss": 3.6027, "step": 945500 }, { "epoch": 4.53, "learning_rate": 9.78414328966139e-06, "loss": 3.6049, "step": 946000 }, { "epoch": 4.53, "learning_rate": 9.780140606262439e-06, "loss": 3.5871, "step": 946500 }, { "epoch": 4.53, "learning_rate": 9.77613792286349e-06, "loss": 3.612, "step": 947000 }, { "epoch": 4.53, "learning_rate": 9.772135239464537e-06, "loss": 3.6203, "step": 947500 }, { "epoch": 4.54, "learning_rate": 9.768132556065586e-06, "loss": 3.6045, "step": 948000 }, { "epoch": 4.54, "learning_rate": 9.764129872666637e-06, "loss": 3.6093, "step": 948500 }, { "epoch": 4.54, "learning_rate": 9.760127189267686e-06, "loss": 3.5939, "step": 949000 }, { "epoch": 4.54, "learning_rate": 9.756124505868736e-06, "loss": 3.5818, "step": 949500 }, { "epoch": 4.54, "learning_rate": 9.752121822469785e-06, "loss": 3.5917, "step": 950000 }, { "epoch": 4.55, "learning_rate": 9.748119139070834e-06, "loss": 3.5907, "step": 950500 }, { "epoch": 4.55, "learning_rate": 9.744116455671883e-06, "loss": 3.5795, "step": 951000 }, { "epoch": 4.55, "learning_rate": 9.740113772272932e-06, "loss": 3.5883, "step": 951500 }, { "epoch": 4.55, "learning_rate": 9.736111088873982e-06, "loss": 3.5787, "step": 952000 }, { "epoch": 4.56, "learning_rate": 9.73210840547503e-06, "loss": 3.5857, "step": 952500 }, { "epoch": 4.56, "learning_rate": 9.728105722076082e-06, "loss": 3.5732, "step": 953000 }, { "epoch": 4.56, "learning_rate": 9.72410303867713e-06, "loss": 3.5931, "step": 953500 }, { "epoch": 4.56, "learning_rate": 9.720100355278178e-06, "loss": 3.5892, "step": 954000 }, { "epoch": 4.57, "learning_rate": 9.71609767187923e-06, "loss": 3.5789, "step": 954500 }, { "epoch": 4.57, "learning_rate": 9.712094988480278e-06, "loss": 3.5734, "step": 955000 }, { "epoch": 4.57, "learning_rate": 9.708092305081328e-06, "loss": 3.577, "step": 955500 }, { "epoch": 4.57, "learning_rate": 9.704089621682377e-06, "loss": 3.5898, "step": 956000 }, { "epoch": 4.58, "learning_rate": 9.700086938283426e-06, "loss": 3.5609, "step": 956500 }, { "epoch": 4.58, "learning_rate": 9.696084254884475e-06, "loss": 3.5817, "step": 957000 }, { "epoch": 4.58, "learning_rate": 9.692081571485524e-06, "loss": 3.571, "step": 957500 }, { "epoch": 4.58, "learning_rate": 9.688078888086574e-06, "loss": 3.5702, "step": 958000 }, { "epoch": 4.59, "learning_rate": 9.684076204687623e-06, "loss": 3.5672, "step": 958500 }, { "epoch": 4.59, "learning_rate": 9.680073521288674e-06, "loss": 3.5786, "step": 959000 }, { "epoch": 4.59, "learning_rate": 9.676070837889721e-06, "loss": 3.5617, "step": 959500 }, { "epoch": 4.59, "learning_rate": 9.67206815449077e-06, "loss": 3.5776, "step": 960000 }, { "epoch": 4.6, "learning_rate": 9.668065471091821e-06, "loss": 3.5625, "step": 960500 }, { "epoch": 4.6, "learning_rate": 9.66406278769287e-06, "loss": 3.5623, "step": 961000 }, { "epoch": 4.6, "learning_rate": 9.66006010429392e-06, "loss": 3.5592, "step": 961500 }, { "epoch": 4.6, "learning_rate": 9.656057420894969e-06, "loss": 3.5509, "step": 962000 }, { "epoch": 4.6, "learning_rate": 9.652054737496018e-06, "loss": 3.5683, "step": 962500 }, { "epoch": 4.61, "learning_rate": 9.648052054097067e-06, "loss": 3.5564, "step": 963000 }, { "epoch": 4.61, "learning_rate": 9.644049370698117e-06, "loss": 3.5604, "step": 963500 }, { "epoch": 4.61, "learning_rate": 9.640046687299166e-06, "loss": 3.5741, "step": 964000 }, { "epoch": 4.61, "learning_rate": 9.636044003900215e-06, "loss": 3.5541, "step": 964500 }, { "epoch": 4.62, "learning_rate": 9.632041320501266e-06, "loss": 3.5703, "step": 965000 }, { "epoch": 4.62, "learning_rate": 9.628038637102313e-06, "loss": 3.5455, "step": 965500 }, { "epoch": 4.62, "learning_rate": 9.624035953703363e-06, "loss": 3.5427, "step": 966000 }, { "epoch": 4.62, "learning_rate": 9.620033270304413e-06, "loss": 3.5595, "step": 966500 }, { "epoch": 4.63, "learning_rate": 9.616030586905463e-06, "loss": 3.5436, "step": 967000 }, { "epoch": 4.63, "learning_rate": 9.612027903506512e-06, "loss": 3.551, "step": 967500 }, { "epoch": 4.63, "learning_rate": 9.608025220107561e-06, "loss": 3.5516, "step": 968000 }, { "epoch": 4.63, "learning_rate": 9.60402253670861e-06, "loss": 3.5552, "step": 968500 }, { "epoch": 4.64, "learning_rate": 9.60001985330966e-06, "loss": 3.5368, "step": 969000 }, { "epoch": 4.64, "learning_rate": 9.596017169910709e-06, "loss": 3.5329, "step": 969500 }, { "epoch": 4.64, "learning_rate": 9.592014486511758e-06, "loss": 3.5442, "step": 970000 }, { "epoch": 4.64, "learning_rate": 9.588011803112807e-06, "loss": 3.5538, "step": 970500 }, { "epoch": 4.65, "learning_rate": 9.584009119713858e-06, "loss": 3.5464, "step": 971000 }, { "epoch": 4.65, "learning_rate": 9.580006436314905e-06, "loss": 3.5492, "step": 971500 }, { "epoch": 4.65, "learning_rate": 9.576003752915955e-06, "loss": 3.5393, "step": 972000 }, { "epoch": 4.65, "learning_rate": 9.572001069517006e-06, "loss": 3.549, "step": 972500 }, { "epoch": 4.65, "learning_rate": 9.567998386118055e-06, "loss": 3.5289, "step": 973000 }, { "epoch": 4.66, "learning_rate": 9.563995702719104e-06, "loss": 3.5552, "step": 973500 }, { "epoch": 4.66, "learning_rate": 9.559993019320153e-06, "loss": 3.545, "step": 974000 }, { "epoch": 4.66, "learning_rate": 9.555990335921202e-06, "loss": 3.542, "step": 974500 }, { "epoch": 4.66, "learning_rate": 9.551987652522252e-06, "loss": 3.5498, "step": 975000 }, { "epoch": 4.67, "learning_rate": 9.5479849691233e-06, "loss": 3.5428, "step": 975500 }, { "epoch": 4.67, "learning_rate": 9.54398228572435e-06, "loss": 3.5262, "step": 976000 }, { "epoch": 4.67, "learning_rate": 9.5399796023254e-06, "loss": 3.5596, "step": 976500 }, { "epoch": 4.67, "learning_rate": 9.53597691892645e-06, "loss": 3.5245, "step": 977000 }, { "epoch": 4.68, "learning_rate": 9.531974235527498e-06, "loss": 3.5314, "step": 977500 }, { "epoch": 4.68, "learning_rate": 9.527971552128547e-06, "loss": 3.5227, "step": 978000 }, { "epoch": 4.68, "learning_rate": 9.523968868729598e-06, "loss": 3.5274, "step": 978500 }, { "epoch": 4.68, "learning_rate": 9.519966185330647e-06, "loss": 3.5329, "step": 979000 }, { "epoch": 4.69, "learning_rate": 9.515963501931696e-06, "loss": 3.52, "step": 979500 }, { "epoch": 4.69, "learning_rate": 9.511960818532745e-06, "loss": 3.5284, "step": 980000 }, { "epoch": 4.69, "learning_rate": 9.507958135133794e-06, "loss": 3.527, "step": 980500 }, { "epoch": 4.69, "learning_rate": 9.503955451734844e-06, "loss": 3.5231, "step": 981000 }, { "epoch": 4.7, "learning_rate": 9.499952768335893e-06, "loss": 3.4989, "step": 981500 }, { "epoch": 4.7, "learning_rate": 9.495950084936942e-06, "loss": 3.5303, "step": 982000 }, { "epoch": 4.7, "learning_rate": 9.491947401537991e-06, "loss": 3.4973, "step": 982500 }, { "epoch": 4.7, "learning_rate": 9.487944718139042e-06, "loss": 3.5241, "step": 983000 }, { "epoch": 4.71, "learning_rate": 9.48394203474009e-06, "loss": 3.509, "step": 983500 }, { "epoch": 4.71, "learning_rate": 9.479939351341139e-06, "loss": 3.5171, "step": 984000 }, { "epoch": 4.71, "learning_rate": 9.47593666794219e-06, "loss": 3.5103, "step": 984500 }, { "epoch": 4.71, "learning_rate": 9.471933984543239e-06, "loss": 3.5223, "step": 985000 }, { "epoch": 4.71, "learning_rate": 9.467931301144288e-06, "loss": 3.5036, "step": 985500 }, { "epoch": 4.72, "learning_rate": 9.463928617745337e-06, "loss": 3.5212, "step": 986000 }, { "epoch": 4.72, "learning_rate": 9.459925934346387e-06, "loss": 3.5028, "step": 986500 }, { "epoch": 4.72, "learning_rate": 9.455923250947436e-06, "loss": 3.527, "step": 987000 }, { "epoch": 4.72, "learning_rate": 9.451920567548485e-06, "loss": 3.4991, "step": 987500 }, { "epoch": 4.73, "learning_rate": 9.447917884149534e-06, "loss": 3.5121, "step": 988000 }, { "epoch": 4.73, "learning_rate": 9.443915200750583e-06, "loss": 3.4947, "step": 988500 }, { "epoch": 4.73, "learning_rate": 9.439912517351634e-06, "loss": 3.5095, "step": 989000 }, { "epoch": 4.73, "learning_rate": 9.435909833952682e-06, "loss": 3.5161, "step": 989500 }, { "epoch": 4.74, "learning_rate": 9.431907150553731e-06, "loss": 3.5029, "step": 990000 }, { "epoch": 4.74, "learning_rate": 9.427904467154782e-06, "loss": 3.4983, "step": 990500 }, { "epoch": 4.74, "learning_rate": 9.423901783755831e-06, "loss": 3.5224, "step": 991000 }, { "epoch": 4.74, "learning_rate": 9.41989910035688e-06, "loss": 3.5059, "step": 991500 }, { "epoch": 4.75, "learning_rate": 9.41589641695793e-06, "loss": 3.522, "step": 992000 }, { "epoch": 4.75, "learning_rate": 9.411893733558979e-06, "loss": 3.5031, "step": 992500 }, { "epoch": 4.75, "learning_rate": 9.407891050160028e-06, "loss": 3.4977, "step": 993000 }, { "epoch": 4.75, "learning_rate": 9.403888366761077e-06, "loss": 3.4896, "step": 993500 }, { "epoch": 4.76, "learning_rate": 9.399885683362126e-06, "loss": 3.4966, "step": 994000 }, { "epoch": 4.76, "learning_rate": 9.395882999963176e-06, "loss": 3.4921, "step": 994500 }, { "epoch": 4.76, "learning_rate": 9.391880316564226e-06, "loss": 3.5007, "step": 995000 }, { "epoch": 4.76, "learning_rate": 9.387877633165274e-06, "loss": 3.4917, "step": 995500 }, { "epoch": 4.76, "learning_rate": 9.383874949766323e-06, "loss": 3.4926, "step": 996000 }, { "epoch": 4.77, "learning_rate": 9.379872266367374e-06, "loss": 3.5001, "step": 996500 }, { "epoch": 4.77, "learning_rate": 9.375869582968423e-06, "loss": 3.4814, "step": 997000 }, { "epoch": 4.77, "learning_rate": 9.371866899569472e-06, "loss": 3.4847, "step": 997500 }, { "epoch": 4.77, "learning_rate": 9.367864216170522e-06, "loss": 3.4825, "step": 998000 }, { "epoch": 4.78, "learning_rate": 9.36386153277157e-06, "loss": 3.489, "step": 998500 }, { "epoch": 4.78, "learning_rate": 9.35985884937262e-06, "loss": 3.4716, "step": 999000 }, { "epoch": 4.78, "learning_rate": 9.35585616597367e-06, "loss": 3.4849, "step": 999500 }, { "epoch": 4.78, "learning_rate": 9.351853482574718e-06, "loss": 3.4844, "step": 1000000 }, { "epoch": 4.79, "learning_rate": 9.347850799175768e-06, "loss": 3.4868, "step": 1000500 }, { "epoch": 4.79, "learning_rate": 9.343848115776819e-06, "loss": 3.4658, "step": 1001000 }, { "epoch": 4.79, "learning_rate": 9.339845432377866e-06, "loss": 3.474, "step": 1001500 }, { "epoch": 4.79, "learning_rate": 9.335842748978915e-06, "loss": 3.4939, "step": 1002000 }, { "epoch": 4.8, "learning_rate": 9.331840065579966e-06, "loss": 3.4664, "step": 1002500 }, { "epoch": 4.8, "learning_rate": 9.327837382181015e-06, "loss": 3.4715, "step": 1003000 }, { "epoch": 4.8, "learning_rate": 9.323834698782065e-06, "loss": 3.4739, "step": 1003500 }, { "epoch": 4.8, "learning_rate": 9.319832015383114e-06, "loss": 3.4763, "step": 1004000 }, { "epoch": 4.81, "learning_rate": 9.315829331984163e-06, "loss": 3.4815, "step": 1004500 }, { "epoch": 4.81, "learning_rate": 9.311826648585212e-06, "loss": 3.4816, "step": 1005000 }, { "epoch": 4.81, "learning_rate": 9.307823965186261e-06, "loss": 3.4704, "step": 1005500 }, { "epoch": 4.81, "learning_rate": 9.30382128178731e-06, "loss": 3.47, "step": 1006000 }, { "epoch": 4.82, "learning_rate": 9.29981859838836e-06, "loss": 3.4674, "step": 1006500 }, { "epoch": 4.82, "learning_rate": 9.29581591498941e-06, "loss": 3.4659, "step": 1007000 }, { "epoch": 4.82, "learning_rate": 9.291813231590458e-06, "loss": 3.4701, "step": 1007500 }, { "epoch": 4.82, "learning_rate": 9.287810548191507e-06, "loss": 3.4679, "step": 1008000 }, { "epoch": 4.82, "learning_rate": 9.283807864792558e-06, "loss": 3.4887, "step": 1008500 }, { "epoch": 4.83, "learning_rate": 9.279805181393607e-06, "loss": 3.4622, "step": 1009000 }, { "epoch": 4.83, "learning_rate": 9.275802497994657e-06, "loss": 3.4561, "step": 1009500 }, { "epoch": 4.83, "learning_rate": 9.271799814595706e-06, "loss": 3.4557, "step": 1010000 }, { "epoch": 4.83, "learning_rate": 9.267797131196755e-06, "loss": 3.4776, "step": 1010500 }, { "epoch": 4.84, "learning_rate": 9.263794447797804e-06, "loss": 3.4496, "step": 1011000 }, { "epoch": 4.84, "learning_rate": 9.259791764398853e-06, "loss": 3.471, "step": 1011500 }, { "epoch": 4.84, "learning_rate": 9.255789080999903e-06, "loss": 3.4426, "step": 1012000 }, { "epoch": 4.84, "learning_rate": 9.251786397600952e-06, "loss": 3.4546, "step": 1012500 }, { "epoch": 4.85, "learning_rate": 9.247783714202003e-06, "loss": 3.4477, "step": 1013000 }, { "epoch": 4.85, "learning_rate": 9.24378103080305e-06, "loss": 3.47, "step": 1013500 }, { "epoch": 4.85, "learning_rate": 9.2397783474041e-06, "loss": 3.4587, "step": 1014000 }, { "epoch": 4.85, "learning_rate": 9.23577566400515e-06, "loss": 3.4485, "step": 1014500 }, { "epoch": 4.86, "learning_rate": 9.2317729806062e-06, "loss": 3.4658, "step": 1015000 }, { "epoch": 4.86, "learning_rate": 9.227770297207249e-06, "loss": 3.4314, "step": 1015500 }, { "epoch": 4.86, "learning_rate": 9.223767613808298e-06, "loss": 3.447, "step": 1016000 }, { "epoch": 4.86, "learning_rate": 9.219764930409347e-06, "loss": 3.4478, "step": 1016500 }, { "epoch": 4.87, "learning_rate": 9.215762247010396e-06, "loss": 3.4497, "step": 1017000 }, { "epoch": 4.87, "learning_rate": 9.211759563611446e-06, "loss": 3.458, "step": 1017500 }, { "epoch": 4.87, "learning_rate": 9.207756880212495e-06, "loss": 3.461, "step": 1018000 }, { "epoch": 4.87, "learning_rate": 9.203754196813544e-06, "loss": 3.4581, "step": 1018500 }, { "epoch": 4.87, "learning_rate": 9.199751513414595e-06, "loss": 3.4544, "step": 1019000 }, { "epoch": 4.88, "learning_rate": 9.195748830015642e-06, "loss": 3.4346, "step": 1019500 }, { "epoch": 4.88, "learning_rate": 9.191746146616692e-06, "loss": 3.4412, "step": 1020000 }, { "epoch": 4.88, "learning_rate": 9.187743463217742e-06, "loss": 3.4563, "step": 1020500 }, { "epoch": 4.88, "learning_rate": 9.183740779818792e-06, "loss": 3.4442, "step": 1021000 }, { "epoch": 4.89, "learning_rate": 9.179738096419841e-06, "loss": 3.4518, "step": 1021500 }, { "epoch": 4.89, "learning_rate": 9.17573541302089e-06, "loss": 3.448, "step": 1022000 }, { "epoch": 4.89, "learning_rate": 9.17173272962194e-06, "loss": 3.4234, "step": 1022500 }, { "epoch": 4.89, "learning_rate": 9.167730046222988e-06, "loss": 3.435, "step": 1023000 }, { "epoch": 4.9, "learning_rate": 9.163727362824038e-06, "loss": 3.4533, "step": 1023500 }, { "epoch": 4.9, "learning_rate": 9.159724679425087e-06, "loss": 3.4432, "step": 1024000 }, { "epoch": 4.9, "learning_rate": 9.155721996026136e-06, "loss": 3.4313, "step": 1024500 }, { "epoch": 4.9, "learning_rate": 9.151719312627187e-06, "loss": 3.4425, "step": 1025000 }, { "epoch": 4.91, "learning_rate": 9.147716629228234e-06, "loss": 3.4533, "step": 1025500 }, { "epoch": 4.91, "learning_rate": 9.143713945829284e-06, "loss": 3.4374, "step": 1026000 }, { "epoch": 4.91, "learning_rate": 9.139711262430335e-06, "loss": 3.4225, "step": 1026500 }, { "epoch": 4.91, "learning_rate": 9.135708579031384e-06, "loss": 3.4367, "step": 1027000 }, { "epoch": 4.92, "learning_rate": 9.131705895632433e-06, "loss": 3.44, "step": 1027500 }, { "epoch": 4.92, "learning_rate": 9.127703212233482e-06, "loss": 3.4377, "step": 1028000 }, { "epoch": 4.92, "learning_rate": 9.123700528834531e-06, "loss": 3.4361, "step": 1028500 }, { "epoch": 4.92, "learning_rate": 9.11969784543558e-06, "loss": 3.4369, "step": 1029000 }, { "epoch": 4.93, "learning_rate": 9.11569516203663e-06, "loss": 3.4374, "step": 1029500 }, { "epoch": 4.93, "learning_rate": 9.111692478637679e-06, "loss": 3.4377, "step": 1030000 }, { "epoch": 4.93, "learning_rate": 9.107689795238728e-06, "loss": 3.422, "step": 1030500 }, { "epoch": 4.93, "learning_rate": 9.103687111839779e-06, "loss": 3.4309, "step": 1031000 }, { "epoch": 4.93, "learning_rate": 9.099684428440827e-06, "loss": 3.4217, "step": 1031500 }, { "epoch": 4.94, "learning_rate": 9.095681745041876e-06, "loss": 3.4228, "step": 1032000 }, { "epoch": 4.94, "learning_rate": 9.091679061642927e-06, "loss": 3.4227, "step": 1032500 }, { "epoch": 4.94, "learning_rate": 9.087676378243976e-06, "loss": 3.4247, "step": 1033000 }, { "epoch": 4.94, "learning_rate": 9.083673694845025e-06, "loss": 3.4238, "step": 1033500 }, { "epoch": 4.95, "learning_rate": 9.079671011446074e-06, "loss": 3.4359, "step": 1034000 }, { "epoch": 4.95, "learning_rate": 9.075668328047123e-06, "loss": 3.4259, "step": 1034500 }, { "epoch": 4.95, "learning_rate": 9.071665644648173e-06, "loss": 3.4207, "step": 1035000 }, { "epoch": 4.95, "learning_rate": 9.067662961249222e-06, "loss": 3.4115, "step": 1035500 }, { "epoch": 4.96, "learning_rate": 9.063660277850271e-06, "loss": 3.408, "step": 1036000 }, { "epoch": 4.96, "learning_rate": 9.05965759445132e-06, "loss": 3.4158, "step": 1036500 }, { "epoch": 4.96, "learning_rate": 9.055654911052371e-06, "loss": 3.4278, "step": 1037000 }, { "epoch": 4.96, "learning_rate": 9.051652227653419e-06, "loss": 3.4451, "step": 1037500 }, { "epoch": 4.97, "learning_rate": 9.047649544254468e-06, "loss": 3.417, "step": 1038000 }, { "epoch": 4.97, "learning_rate": 9.043646860855519e-06, "loss": 3.4113, "step": 1038500 }, { "epoch": 4.97, "learning_rate": 9.039644177456568e-06, "loss": 3.4053, "step": 1039000 }, { "epoch": 4.97, "learning_rate": 9.035641494057617e-06, "loss": 3.4014, "step": 1039500 }, { "epoch": 4.98, "learning_rate": 9.031638810658666e-06, "loss": 3.4174, "step": 1040000 }, { "epoch": 4.98, "learning_rate": 9.027636127259716e-06, "loss": 3.3954, "step": 1040500 }, { "epoch": 4.98, "learning_rate": 9.023633443860765e-06, "loss": 3.4071, "step": 1041000 }, { "epoch": 4.98, "learning_rate": 9.019630760461814e-06, "loss": 3.4283, "step": 1041500 }, { "epoch": 4.99, "learning_rate": 9.015628077062863e-06, "loss": 3.4063, "step": 1042000 }, { "epoch": 4.99, "learning_rate": 9.011625393663912e-06, "loss": 3.3895, "step": 1042500 }, { "epoch": 4.99, "learning_rate": 9.007622710264963e-06, "loss": 3.4066, "step": 1043000 }, { "epoch": 4.99, "learning_rate": 9.00362002686601e-06, "loss": 3.4119, "step": 1043500 }, { "epoch": 4.99, "learning_rate": 8.99961734346706e-06, "loss": 3.3966, "step": 1044000 }, { "epoch": 5.0, "learning_rate": 8.995614660068111e-06, "loss": 3.3983, "step": 1044500 }, { "epoch": 5.0, "learning_rate": 8.99161197666916e-06, "loss": 3.4122, "step": 1045000 }, { "epoch": 5.0, "learning_rate": 8.98760929327021e-06, "loss": 3.403, "step": 1045500 }, { "epoch": 5.0, "learning_rate": 8.983606609871259e-06, "loss": 3.4052, "step": 1046000 }, { "epoch": 5.01, "learning_rate": 8.979603926472308e-06, "loss": 3.4156, "step": 1046500 }, { "epoch": 5.01, "learning_rate": 8.975601243073357e-06, "loss": 3.4046, "step": 1047000 }, { "epoch": 5.01, "learning_rate": 8.971598559674406e-06, "loss": 3.3866, "step": 1047500 }, { "epoch": 5.01, "learning_rate": 8.967595876275455e-06, "loss": 3.397, "step": 1048000 }, { "epoch": 5.02, "learning_rate": 8.963593192876505e-06, "loss": 3.3934, "step": 1048500 }, { "epoch": 5.02, "learning_rate": 8.959590509477555e-06, "loss": 3.4057, "step": 1049000 }, { "epoch": 5.02, "learning_rate": 8.955587826078603e-06, "loss": 3.4034, "step": 1049500 }, { "epoch": 5.02, "learning_rate": 8.951585142679652e-06, "loss": 3.3912, "step": 1050000 }, { "epoch": 5.03, "learning_rate": 8.947582459280703e-06, "loss": 3.3995, "step": 1050500 }, { "epoch": 5.03, "learning_rate": 8.943579775881752e-06, "loss": 3.3758, "step": 1051000 }, { "epoch": 5.03, "learning_rate": 8.939577092482801e-06, "loss": 3.3933, "step": 1051500 }, { "epoch": 5.03, "learning_rate": 8.93557440908385e-06, "loss": 3.3974, "step": 1052000 }, { "epoch": 5.04, "learning_rate": 8.9315717256849e-06, "loss": 3.3947, "step": 1052500 }, { "epoch": 5.04, "learning_rate": 8.927569042285949e-06, "loss": 3.3824, "step": 1053000 }, { "epoch": 5.04, "learning_rate": 8.923566358886998e-06, "loss": 3.3874, "step": 1053500 }, { "epoch": 5.04, "learning_rate": 8.919563675488047e-06, "loss": 3.3792, "step": 1054000 }, { "epoch": 5.04, "learning_rate": 8.915560992089097e-06, "loss": 3.3918, "step": 1054500 }, { "epoch": 5.05, "learning_rate": 8.911558308690148e-06, "loss": 3.3781, "step": 1055000 }, { "epoch": 5.05, "learning_rate": 8.907555625291195e-06, "loss": 3.3948, "step": 1055500 }, { "epoch": 5.05, "learning_rate": 8.903552941892244e-06, "loss": 3.3855, "step": 1056000 }, { "epoch": 5.05, "learning_rate": 8.899550258493295e-06, "loss": 3.3945, "step": 1056500 }, { "epoch": 5.06, "learning_rate": 8.895547575094344e-06, "loss": 3.3716, "step": 1057000 }, { "epoch": 5.06, "learning_rate": 8.891544891695394e-06, "loss": 3.3838, "step": 1057500 }, { "epoch": 5.06, "learning_rate": 8.887542208296443e-06, "loss": 3.3963, "step": 1058000 }, { "epoch": 5.06, "learning_rate": 8.883539524897492e-06, "loss": 3.4065, "step": 1058500 }, { "epoch": 5.07, "learning_rate": 8.879536841498541e-06, "loss": 3.3982, "step": 1059000 }, { "epoch": 5.07, "learning_rate": 8.87553415809959e-06, "loss": 3.3815, "step": 1059500 }, { "epoch": 5.07, "learning_rate": 8.87153147470064e-06, "loss": 3.3836, "step": 1060000 }, { "epoch": 5.07, "learning_rate": 8.867528791301689e-06, "loss": 3.3915, "step": 1060500 }, { "epoch": 5.08, "learning_rate": 8.86352610790274e-06, "loss": 3.3789, "step": 1061000 }, { "epoch": 5.08, "learning_rate": 8.859523424503787e-06, "loss": 3.3921, "step": 1061500 }, { "epoch": 5.08, "learning_rate": 8.855520741104836e-06, "loss": 3.39, "step": 1062000 }, { "epoch": 5.08, "learning_rate": 8.851518057705887e-06, "loss": 3.3655, "step": 1062500 }, { "epoch": 5.09, "learning_rate": 8.847515374306936e-06, "loss": 3.3851, "step": 1063000 }, { "epoch": 5.09, "learning_rate": 8.843512690907986e-06, "loss": 3.3707, "step": 1063500 }, { "epoch": 5.09, "learning_rate": 8.839510007509035e-06, "loss": 3.3615, "step": 1064000 }, { "epoch": 5.09, "learning_rate": 8.835507324110084e-06, "loss": 3.3788, "step": 1064500 }, { "epoch": 5.1, "learning_rate": 8.831504640711133e-06, "loss": 3.3719, "step": 1065000 }, { "epoch": 5.1, "learning_rate": 8.827501957312182e-06, "loss": 3.3626, "step": 1065500 }, { "epoch": 5.1, "learning_rate": 8.823499273913232e-06, "loss": 3.3631, "step": 1066000 }, { "epoch": 5.1, "learning_rate": 8.81949659051428e-06, "loss": 3.3576, "step": 1066500 }, { "epoch": 5.1, "learning_rate": 8.815493907115332e-06, "loss": 3.3631, "step": 1067000 }, { "epoch": 5.11, "learning_rate": 8.81149122371638e-06, "loss": 3.3458, "step": 1067500 }, { "epoch": 5.11, "learning_rate": 8.807488540317428e-06, "loss": 3.3737, "step": 1068000 }, { "epoch": 5.11, "learning_rate": 8.80348585691848e-06, "loss": 3.3795, "step": 1068500 }, { "epoch": 5.11, "learning_rate": 8.799483173519529e-06, "loss": 3.3823, "step": 1069000 }, { "epoch": 5.12, "learning_rate": 8.795480490120578e-06, "loss": 3.3549, "step": 1069500 }, { "epoch": 5.12, "learning_rate": 8.791477806721627e-06, "loss": 3.3519, "step": 1070000 }, { "epoch": 5.12, "learning_rate": 8.787475123322676e-06, "loss": 3.3695, "step": 1070500 }, { "epoch": 5.12, "learning_rate": 8.783472439923725e-06, "loss": 3.3603, "step": 1071000 }, { "epoch": 5.13, "learning_rate": 8.779469756524775e-06, "loss": 3.3586, "step": 1071500 }, { "epoch": 5.13, "learning_rate": 8.775467073125824e-06, "loss": 3.3561, "step": 1072000 }, { "epoch": 5.13, "learning_rate": 8.771464389726873e-06, "loss": 3.3482, "step": 1072500 }, { "epoch": 5.13, "learning_rate": 8.767461706327924e-06, "loss": 3.3564, "step": 1073000 }, { "epoch": 5.14, "learning_rate": 8.763459022928971e-06, "loss": 3.3558, "step": 1073500 }, { "epoch": 5.14, "learning_rate": 8.75945633953002e-06, "loss": 3.3778, "step": 1074000 }, { "epoch": 5.14, "learning_rate": 8.755453656131071e-06, "loss": 3.3673, "step": 1074500 }, { "epoch": 5.14, "learning_rate": 8.75145097273212e-06, "loss": 3.3794, "step": 1075000 }, { "epoch": 5.15, "learning_rate": 8.74744828933317e-06, "loss": 3.3445, "step": 1075500 }, { "epoch": 5.15, "learning_rate": 8.743445605934219e-06, "loss": 3.353, "step": 1076000 }, { "epoch": 5.15, "learning_rate": 8.739442922535268e-06, "loss": 3.3647, "step": 1076500 }, { "epoch": 5.15, "learning_rate": 8.735440239136317e-06, "loss": 3.3595, "step": 1077000 }, { "epoch": 5.15, "learning_rate": 8.731437555737367e-06, "loss": 3.3579, "step": 1077500 }, { "epoch": 5.16, "learning_rate": 8.727434872338416e-06, "loss": 3.3764, "step": 1078000 }, { "epoch": 5.16, "learning_rate": 8.723432188939465e-06, "loss": 3.3575, "step": 1078500 }, { "epoch": 5.16, "learning_rate": 8.719429505540516e-06, "loss": 3.3581, "step": 1079000 }, { "epoch": 5.16, "learning_rate": 8.715426822141563e-06, "loss": 3.3582, "step": 1079500 }, { "epoch": 5.17, "learning_rate": 8.711424138742613e-06, "loss": 3.364, "step": 1080000 }, { "epoch": 5.17, "learning_rate": 8.707421455343664e-06, "loss": 3.3689, "step": 1080500 }, { "epoch": 5.17, "learning_rate": 8.703418771944713e-06, "loss": 3.3486, "step": 1081000 }, { "epoch": 5.17, "learning_rate": 8.699416088545762e-06, "loss": 3.3392, "step": 1081500 }, { "epoch": 5.18, "learning_rate": 8.695413405146811e-06, "loss": 3.3616, "step": 1082000 }, { "epoch": 5.18, "learning_rate": 8.69141072174786e-06, "loss": 3.3624, "step": 1082500 }, { "epoch": 5.18, "learning_rate": 8.68740803834891e-06, "loss": 3.3314, "step": 1083000 }, { "epoch": 5.18, "learning_rate": 8.683405354949959e-06, "loss": 3.346, "step": 1083500 }, { "epoch": 5.19, "learning_rate": 8.679402671551008e-06, "loss": 3.3643, "step": 1084000 }, { "epoch": 5.19, "learning_rate": 8.675399988152057e-06, "loss": 3.3358, "step": 1084500 }, { "epoch": 5.19, "learning_rate": 8.671397304753108e-06, "loss": 3.3508, "step": 1085000 }, { "epoch": 5.19, "learning_rate": 8.667394621354156e-06, "loss": 3.3515, "step": 1085500 }, { "epoch": 5.2, "learning_rate": 8.663391937955205e-06, "loss": 3.3562, "step": 1086000 }, { "epoch": 5.2, "learning_rate": 8.659389254556256e-06, "loss": 3.3287, "step": 1086500 }, { "epoch": 5.2, "learning_rate": 8.655386571157305e-06, "loss": 3.3401, "step": 1087000 }, { "epoch": 5.2, "learning_rate": 8.651383887758354e-06, "loss": 3.3571, "step": 1087500 }, { "epoch": 5.21, "learning_rate": 8.647381204359403e-06, "loss": 3.3407, "step": 1088000 }, { "epoch": 5.21, "learning_rate": 8.643378520960452e-06, "loss": 3.3256, "step": 1088500 }, { "epoch": 5.21, "learning_rate": 8.639375837561502e-06, "loss": 3.3403, "step": 1089000 }, { "epoch": 5.21, "learning_rate": 8.635373154162551e-06, "loss": 3.3475, "step": 1089500 }, { "epoch": 5.21, "learning_rate": 8.6313704707636e-06, "loss": 3.3381, "step": 1090000 }, { "epoch": 5.22, "learning_rate": 8.62736778736465e-06, "loss": 3.3287, "step": 1090500 }, { "epoch": 5.22, "learning_rate": 8.6233651039657e-06, "loss": 3.3423, "step": 1091000 }, { "epoch": 5.22, "learning_rate": 8.619362420566748e-06, "loss": 3.3206, "step": 1091500 }, { "epoch": 5.22, "learning_rate": 8.615359737167797e-06, "loss": 3.3258, "step": 1092000 }, { "epoch": 5.23, "learning_rate": 8.611357053768848e-06, "loss": 3.327, "step": 1092500 }, { "epoch": 5.23, "learning_rate": 8.607354370369897e-06, "loss": 3.326, "step": 1093000 }, { "epoch": 5.23, "learning_rate": 8.603351686970946e-06, "loss": 3.3478, "step": 1093500 }, { "epoch": 5.23, "learning_rate": 8.599349003571995e-06, "loss": 3.3329, "step": 1094000 }, { "epoch": 5.24, "learning_rate": 8.595346320173045e-06, "loss": 3.3348, "step": 1094500 }, { "epoch": 5.24, "learning_rate": 8.591343636774094e-06, "loss": 3.3243, "step": 1095000 }, { "epoch": 5.24, "learning_rate": 8.587340953375145e-06, "loss": 3.3454, "step": 1095500 }, { "epoch": 5.24, "learning_rate": 8.583338269976192e-06, "loss": 3.3306, "step": 1096000 }, { "epoch": 5.25, "learning_rate": 8.579335586577241e-06, "loss": 3.3134, "step": 1096500 }, { "epoch": 5.25, "learning_rate": 8.575332903178292e-06, "loss": 3.3141, "step": 1097000 }, { "epoch": 5.25, "learning_rate": 8.571330219779341e-06, "loss": 3.3206, "step": 1097500 }, { "epoch": 5.25, "learning_rate": 8.567327536380389e-06, "loss": 3.3307, "step": 1098000 }, { "epoch": 5.26, "learning_rate": 8.56332485298144e-06, "loss": 3.3218, "step": 1098500 }, { "epoch": 5.26, "learning_rate": 8.559322169582489e-06, "loss": 3.3011, "step": 1099000 }, { "epoch": 5.26, "learning_rate": 8.555319486183538e-06, "loss": 3.3387, "step": 1099500 }, { "epoch": 5.26, "learning_rate": 8.551316802784587e-06, "loss": 3.3237, "step": 1100000 }, { "epoch": 5.26, "learning_rate": 8.547314119385637e-06, "loss": 3.3094, "step": 1100500 }, { "epoch": 5.27, "learning_rate": 8.543311435986686e-06, "loss": 3.332, "step": 1101000 }, { "epoch": 5.27, "learning_rate": 8.539308752587737e-06, "loss": 3.3344, "step": 1101500 }, { "epoch": 5.27, "learning_rate": 8.535306069188784e-06, "loss": 3.3262, "step": 1102000 }, { "epoch": 5.27, "learning_rate": 8.531303385789834e-06, "loss": 3.3267, "step": 1102500 }, { "epoch": 5.28, "learning_rate": 8.527300702390884e-06, "loss": 3.3155, "step": 1103000 }, { "epoch": 5.28, "learning_rate": 8.523298018991934e-06, "loss": 3.3035, "step": 1103500 }, { "epoch": 5.28, "learning_rate": 8.519295335592981e-06, "loss": 3.3111, "step": 1104000 }, { "epoch": 5.28, "learning_rate": 8.515292652194032e-06, "loss": 3.3118, "step": 1104500 }, { "epoch": 5.29, "learning_rate": 8.511289968795081e-06, "loss": 3.3005, "step": 1105000 }, { "epoch": 5.29, "learning_rate": 8.50728728539613e-06, "loss": 3.3014, "step": 1105500 }, { "epoch": 5.29, "learning_rate": 8.50328460199718e-06, "loss": 3.3032, "step": 1106000 }, { "epoch": 5.29, "learning_rate": 8.499281918598229e-06, "loss": 3.3133, "step": 1106500 }, { "epoch": 5.3, "learning_rate": 8.495279235199278e-06, "loss": 3.3007, "step": 1107000 }, { "epoch": 5.3, "learning_rate": 8.491276551800329e-06, "loss": 3.3218, "step": 1107500 }, { "epoch": 5.3, "learning_rate": 8.487273868401376e-06, "loss": 3.3353, "step": 1108000 }, { "epoch": 5.3, "learning_rate": 8.483271185002426e-06, "loss": 3.3056, "step": 1108500 }, { "epoch": 5.31, "learning_rate": 8.479268501603477e-06, "loss": 3.3195, "step": 1109000 }, { "epoch": 5.31, "learning_rate": 8.475265818204526e-06, "loss": 3.2965, "step": 1109500 }, { "epoch": 5.31, "learning_rate": 8.471263134805573e-06, "loss": 3.3079, "step": 1110000 }, { "epoch": 5.31, "learning_rate": 8.467260451406624e-06, "loss": 3.3237, "step": 1110500 }, { "epoch": 5.32, "learning_rate": 8.463257768007673e-06, "loss": 3.3059, "step": 1111000 }, { "epoch": 5.32, "learning_rate": 8.459255084608723e-06, "loss": 3.3178, "step": 1111500 }, { "epoch": 5.32, "learning_rate": 8.455252401209772e-06, "loss": 3.3127, "step": 1112000 }, { "epoch": 5.32, "learning_rate": 8.451249717810821e-06, "loss": 3.3192, "step": 1112500 }, { "epoch": 5.32, "learning_rate": 8.44724703441187e-06, "loss": 3.3053, "step": 1113000 }, { "epoch": 5.33, "learning_rate": 8.443244351012921e-06, "loss": 3.3102, "step": 1113500 }, { "epoch": 5.33, "learning_rate": 8.439241667613969e-06, "loss": 3.3199, "step": 1114000 }, { "epoch": 5.33, "learning_rate": 8.435238984215018e-06, "loss": 3.3016, "step": 1114500 }, { "epoch": 5.33, "learning_rate": 8.431236300816069e-06, "loss": 3.3186, "step": 1115000 }, { "epoch": 5.34, "learning_rate": 8.427233617417118e-06, "loss": 3.2908, "step": 1115500 }, { "epoch": 5.34, "learning_rate": 8.423230934018165e-06, "loss": 3.2958, "step": 1116000 }, { "epoch": 5.34, "learning_rate": 8.419228250619216e-06, "loss": 3.3082, "step": 1116500 }, { "epoch": 5.34, "learning_rate": 8.415225567220265e-06, "loss": 3.2966, "step": 1117000 }, { "epoch": 5.35, "learning_rate": 8.411222883821315e-06, "loss": 3.306, "step": 1117500 }, { "epoch": 5.35, "learning_rate": 8.407220200422364e-06, "loss": 3.3072, "step": 1118000 }, { "epoch": 5.35, "learning_rate": 8.403217517023413e-06, "loss": 3.2981, "step": 1118500 }, { "epoch": 5.35, "learning_rate": 8.399214833624462e-06, "loss": 3.2855, "step": 1119000 }, { "epoch": 5.36, "learning_rate": 8.395212150225513e-06, "loss": 3.3125, "step": 1119500 }, { "epoch": 5.36, "learning_rate": 8.39120946682656e-06, "loss": 3.3075, "step": 1120000 }, { "epoch": 5.36, "learning_rate": 8.38720678342761e-06, "loss": 3.3052, "step": 1120500 }, { "epoch": 5.36, "learning_rate": 8.38320410002866e-06, "loss": 3.2915, "step": 1121000 }, { "epoch": 5.37, "learning_rate": 8.37920141662971e-06, "loss": 3.2974, "step": 1121500 }, { "epoch": 5.37, "learning_rate": 8.375198733230757e-06, "loss": 3.2958, "step": 1122000 }, { "epoch": 5.37, "learning_rate": 8.371196049831808e-06, "loss": 3.2962, "step": 1122500 }, { "epoch": 5.37, "learning_rate": 8.367193366432858e-06, "loss": 3.3038, "step": 1123000 }, { "epoch": 5.37, "learning_rate": 8.363190683033907e-06, "loss": 3.2942, "step": 1123500 }, { "epoch": 5.38, "learning_rate": 8.359187999634956e-06, "loss": 3.2961, "step": 1124000 }, { "epoch": 5.38, "learning_rate": 8.355185316236005e-06, "loss": 3.3067, "step": 1124500 }, { "epoch": 5.38, "learning_rate": 8.351182632837054e-06, "loss": 3.2838, "step": 1125000 }, { "epoch": 5.38, "learning_rate": 8.347179949438105e-06, "loss": 3.3056, "step": 1125500 }, { "epoch": 5.39, "learning_rate": 8.343177266039153e-06, "loss": 3.2759, "step": 1126000 }, { "epoch": 5.39, "learning_rate": 8.339174582640202e-06, "loss": 3.2721, "step": 1126500 }, { "epoch": 5.39, "learning_rate": 8.335171899241253e-06, "loss": 3.2923, "step": 1127000 }, { "epoch": 5.39, "learning_rate": 8.331169215842302e-06, "loss": 3.2961, "step": 1127500 }, { "epoch": 5.4, "learning_rate": 8.32716653244335e-06, "loss": 3.3039, "step": 1128000 }, { "epoch": 5.4, "learning_rate": 8.3231638490444e-06, "loss": 3.2968, "step": 1128500 }, { "epoch": 5.4, "learning_rate": 8.31916116564545e-06, "loss": 3.2871, "step": 1129000 }, { "epoch": 5.4, "learning_rate": 8.315158482246499e-06, "loss": 3.2979, "step": 1129500 }, { "epoch": 5.41, "learning_rate": 8.311155798847548e-06, "loss": 3.2824, "step": 1130000 }, { "epoch": 5.41, "learning_rate": 8.307153115448597e-06, "loss": 3.2944, "step": 1130500 }, { "epoch": 5.41, "learning_rate": 8.303150432049646e-06, "loss": 3.2941, "step": 1131000 }, { "epoch": 5.41, "learning_rate": 8.299147748650697e-06, "loss": 3.2811, "step": 1131500 }, { "epoch": 5.42, "learning_rate": 8.295145065251745e-06, "loss": 3.2813, "step": 1132000 }, { "epoch": 5.42, "learning_rate": 8.291142381852794e-06, "loss": 3.2939, "step": 1132500 }, { "epoch": 5.42, "learning_rate": 8.287139698453845e-06, "loss": 3.2819, "step": 1133000 }, { "epoch": 5.42, "learning_rate": 8.283137015054894e-06, "loss": 3.2774, "step": 1133500 }, { "epoch": 5.43, "learning_rate": 8.279134331655942e-06, "loss": 3.2964, "step": 1134000 }, { "epoch": 5.43, "learning_rate": 8.275131648256993e-06, "loss": 3.2655, "step": 1134500 }, { "epoch": 5.43, "learning_rate": 8.271128964858042e-06, "loss": 3.2791, "step": 1135000 }, { "epoch": 5.43, "learning_rate": 8.267126281459091e-06, "loss": 3.2829, "step": 1135500 }, { "epoch": 5.43, "learning_rate": 8.26312359806014e-06, "loss": 3.281, "step": 1136000 }, { "epoch": 5.44, "learning_rate": 8.25912091466119e-06, "loss": 3.2728, "step": 1136500 }, { "epoch": 5.44, "learning_rate": 8.255118231262239e-06, "loss": 3.2873, "step": 1137000 }, { "epoch": 5.44, "learning_rate": 8.25111554786329e-06, "loss": 3.2807, "step": 1137500 }, { "epoch": 5.44, "learning_rate": 8.247112864464337e-06, "loss": 3.2678, "step": 1138000 }, { "epoch": 5.45, "learning_rate": 8.243110181065386e-06, "loss": 3.2703, "step": 1138500 }, { "epoch": 5.45, "learning_rate": 8.239107497666437e-06, "loss": 3.2813, "step": 1139000 }, { "epoch": 5.45, "learning_rate": 8.235104814267486e-06, "loss": 3.2883, "step": 1139500 }, { "epoch": 5.45, "learning_rate": 8.231102130868534e-06, "loss": 3.266, "step": 1140000 }, { "epoch": 5.46, "learning_rate": 8.227099447469585e-06, "loss": 3.2726, "step": 1140500 }, { "epoch": 5.46, "learning_rate": 8.223096764070634e-06, "loss": 3.2736, "step": 1141000 }, { "epoch": 5.46, "learning_rate": 8.219094080671683e-06, "loss": 3.2734, "step": 1141500 }, { "epoch": 5.46, "learning_rate": 8.215091397272732e-06, "loss": 3.2866, "step": 1142000 }, { "epoch": 5.47, "learning_rate": 8.211088713873781e-06, "loss": 3.2704, "step": 1142500 }, { "epoch": 5.47, "learning_rate": 8.20708603047483e-06, "loss": 3.2742, "step": 1143000 }, { "epoch": 5.47, "learning_rate": 8.203083347075882e-06, "loss": 3.2874, "step": 1143500 }, { "epoch": 5.47, "learning_rate": 8.199080663676929e-06, "loss": 3.2823, "step": 1144000 }, { "epoch": 5.48, "learning_rate": 8.195077980277978e-06, "loss": 3.256, "step": 1144500 }, { "epoch": 5.48, "learning_rate": 8.19107529687903e-06, "loss": 3.274, "step": 1145000 }, { "epoch": 5.48, "learning_rate": 8.187072613480078e-06, "loss": 3.2788, "step": 1145500 }, { "epoch": 5.48, "learning_rate": 8.183069930081126e-06, "loss": 3.2601, "step": 1146000 }, { "epoch": 5.48, "learning_rate": 8.179067246682177e-06, "loss": 3.2771, "step": 1146500 }, { "epoch": 5.49, "learning_rate": 8.175064563283226e-06, "loss": 3.2826, "step": 1147000 }, { "epoch": 5.49, "learning_rate": 8.171061879884275e-06, "loss": 3.2721, "step": 1147500 }, { "epoch": 5.49, "learning_rate": 8.167059196485324e-06, "loss": 3.2869, "step": 1148000 }, { "epoch": 5.49, "learning_rate": 8.163056513086374e-06, "loss": 3.2787, "step": 1148500 }, { "epoch": 5.5, "learning_rate": 8.159053829687423e-06, "loss": 3.2689, "step": 1149000 }, { "epoch": 5.5, "learning_rate": 8.155051146288474e-06, "loss": 3.2545, "step": 1149500 }, { "epoch": 5.5, "learning_rate": 8.151048462889521e-06, "loss": 3.2727, "step": 1150000 }, { "epoch": 5.5, "learning_rate": 8.14704577949057e-06, "loss": 3.2654, "step": 1150500 }, { "epoch": 5.51, "learning_rate": 8.143043096091621e-06, "loss": 3.2624, "step": 1151000 }, { "epoch": 5.51, "learning_rate": 8.13904041269267e-06, "loss": 3.2515, "step": 1151500 }, { "epoch": 5.51, "learning_rate": 8.135037729293718e-06, "loss": 3.2505, "step": 1152000 }, { "epoch": 5.51, "learning_rate": 8.131035045894769e-06, "loss": 3.2632, "step": 1152500 }, { "epoch": 5.52, "learning_rate": 8.127032362495818e-06, "loss": 3.2662, "step": 1153000 }, { "epoch": 5.52, "learning_rate": 8.123029679096867e-06, "loss": 3.2461, "step": 1153500 }, { "epoch": 5.52, "learning_rate": 8.119026995697916e-06, "loss": 3.2541, "step": 1154000 }, { "epoch": 5.52, "learning_rate": 8.115024312298966e-06, "loss": 3.2546, "step": 1154500 }, { "epoch": 5.53, "learning_rate": 8.111021628900015e-06, "loss": 3.2552, "step": 1155000 }, { "epoch": 5.53, "learning_rate": 8.107018945501066e-06, "loss": 3.2635, "step": 1155500 }, { "epoch": 5.53, "learning_rate": 8.103016262102113e-06, "loss": 3.2563, "step": 1156000 }, { "epoch": 5.53, "learning_rate": 8.099013578703162e-06, "loss": 3.2706, "step": 1156500 }, { "epoch": 5.54, "learning_rate": 8.095010895304213e-06, "loss": 3.2538, "step": 1157000 }, { "epoch": 5.54, "learning_rate": 8.091008211905263e-06, "loss": 3.2605, "step": 1157500 }, { "epoch": 5.54, "learning_rate": 8.08700552850631e-06, "loss": 3.2468, "step": 1158000 }, { "epoch": 5.54, "learning_rate": 8.083002845107361e-06, "loss": 3.2514, "step": 1158500 }, { "epoch": 5.54, "learning_rate": 8.07900016170841e-06, "loss": 3.2688, "step": 1159000 }, { "epoch": 5.55, "learning_rate": 8.07499747830946e-06, "loss": 3.2543, "step": 1159500 }, { "epoch": 5.55, "learning_rate": 8.070994794910509e-06, "loss": 3.2374, "step": 1160000 }, { "epoch": 5.55, "learning_rate": 8.066992111511558e-06, "loss": 3.2605, "step": 1160500 }, { "epoch": 5.55, "learning_rate": 8.062989428112607e-06, "loss": 3.2453, "step": 1161000 }, { "epoch": 5.56, "learning_rate": 8.058986744713658e-06, "loss": 3.2419, "step": 1161500 }, { "epoch": 5.56, "learning_rate": 8.054984061314705e-06, "loss": 3.2599, "step": 1162000 }, { "epoch": 5.56, "learning_rate": 8.050981377915755e-06, "loss": 3.2566, "step": 1162500 }, { "epoch": 5.56, "learning_rate": 8.046978694516806e-06, "loss": 3.2559, "step": 1163000 }, { "epoch": 5.57, "learning_rate": 8.042976011117855e-06, "loss": 3.2489, "step": 1163500 }, { "epoch": 5.57, "learning_rate": 8.038973327718902e-06, "loss": 3.2448, "step": 1164000 }, { "epoch": 5.57, "learning_rate": 8.034970644319953e-06, "loss": 3.246, "step": 1164500 }, { "epoch": 5.57, "learning_rate": 8.030967960921002e-06, "loss": 3.2343, "step": 1165000 }, { "epoch": 5.58, "learning_rate": 8.026965277522052e-06, "loss": 3.2286, "step": 1165500 }, { "epoch": 5.58, "learning_rate": 8.0229625941231e-06, "loss": 3.2526, "step": 1166000 }, { "epoch": 5.58, "learning_rate": 8.01895991072415e-06, "loss": 3.2441, "step": 1166500 }, { "epoch": 5.58, "learning_rate": 8.014957227325199e-06, "loss": 3.2316, "step": 1167000 }, { "epoch": 5.59, "learning_rate": 8.01095454392625e-06, "loss": 3.2599, "step": 1167500 }, { "epoch": 5.59, "learning_rate": 8.006951860527298e-06, "loss": 3.2614, "step": 1168000 }, { "epoch": 5.59, "learning_rate": 8.002949177128347e-06, "loss": 3.2432, "step": 1168500 }, { "epoch": 5.59, "learning_rate": 7.998946493729398e-06, "loss": 3.2465, "step": 1169000 }, { "epoch": 5.59, "learning_rate": 7.994943810330447e-06, "loss": 3.2357, "step": 1169500 }, { "epoch": 5.6, "learning_rate": 7.990941126931494e-06, "loss": 3.247, "step": 1170000 }, { "epoch": 5.6, "learning_rate": 7.986938443532545e-06, "loss": 3.2461, "step": 1170500 }, { "epoch": 5.6, "learning_rate": 7.982935760133594e-06, "loss": 3.2429, "step": 1171000 }, { "epoch": 5.6, "learning_rate": 7.978933076734644e-06, "loss": 3.2473, "step": 1171500 }, { "epoch": 5.61, "learning_rate": 7.974930393335693e-06, "loss": 3.2314, "step": 1172000 }, { "epoch": 5.61, "learning_rate": 7.970927709936742e-06, "loss": 3.2502, "step": 1172500 }, { "epoch": 5.61, "learning_rate": 7.966925026537791e-06, "loss": 3.2342, "step": 1173000 }, { "epoch": 5.61, "learning_rate": 7.962922343138842e-06, "loss": 3.229, "step": 1173500 }, { "epoch": 5.62, "learning_rate": 7.95891965973989e-06, "loss": 3.2453, "step": 1174000 }, { "epoch": 5.62, "learning_rate": 7.954916976340939e-06, "loss": 3.2478, "step": 1174500 }, { "epoch": 5.62, "learning_rate": 7.95091429294199e-06, "loss": 3.248, "step": 1175000 }, { "epoch": 5.62, "learning_rate": 7.946911609543039e-06, "loss": 3.242, "step": 1175500 }, { "epoch": 5.63, "learning_rate": 7.942908926144086e-06, "loss": 3.2378, "step": 1176000 }, { "epoch": 5.63, "learning_rate": 7.938906242745137e-06, "loss": 3.2453, "step": 1176500 }, { "epoch": 5.63, "learning_rate": 7.934903559346187e-06, "loss": 3.2363, "step": 1177000 }, { "epoch": 5.63, "learning_rate": 7.930900875947236e-06, "loss": 3.2372, "step": 1177500 }, { "epoch": 5.64, "learning_rate": 7.926898192548285e-06, "loss": 3.2232, "step": 1178000 }, { "epoch": 5.64, "learning_rate": 7.922895509149334e-06, "loss": 3.2244, "step": 1178500 }, { "epoch": 5.64, "learning_rate": 7.918892825750383e-06, "loss": 3.2392, "step": 1179000 }, { "epoch": 5.64, "learning_rate": 7.914890142351434e-06, "loss": 3.2361, "step": 1179500 }, { "epoch": 5.65, "learning_rate": 7.910887458952482e-06, "loss": 3.2229, "step": 1180000 }, { "epoch": 5.65, "learning_rate": 7.906884775553531e-06, "loss": 3.2502, "step": 1180500 }, { "epoch": 5.65, "learning_rate": 7.902882092154582e-06, "loss": 3.2407, "step": 1181000 }, { "epoch": 5.65, "learning_rate": 7.898879408755631e-06, "loss": 3.2298, "step": 1181500 }, { "epoch": 5.65, "learning_rate": 7.894876725356679e-06, "loss": 3.2216, "step": 1182000 }, { "epoch": 5.66, "learning_rate": 7.89087404195773e-06, "loss": 3.2201, "step": 1182500 }, { "epoch": 5.66, "learning_rate": 7.886871358558779e-06, "loss": 3.2355, "step": 1183000 }, { "epoch": 5.66, "learning_rate": 7.882868675159828e-06, "loss": 3.2362, "step": 1183500 }, { "epoch": 5.66, "learning_rate": 7.878865991760877e-06, "loss": 3.2239, "step": 1184000 }, { "epoch": 5.67, "learning_rate": 7.874863308361926e-06, "loss": 3.2249, "step": 1184500 }, { "epoch": 5.67, "learning_rate": 7.870860624962975e-06, "loss": 3.2411, "step": 1185000 }, { "epoch": 5.67, "learning_rate": 7.866857941564026e-06, "loss": 3.2443, "step": 1185500 }, { "epoch": 5.67, "learning_rate": 7.862855258165074e-06, "loss": 3.2211, "step": 1186000 }, { "epoch": 5.68, "learning_rate": 7.858852574766123e-06, "loss": 3.2286, "step": 1186500 }, { "epoch": 5.68, "learning_rate": 7.854849891367174e-06, "loss": 3.2384, "step": 1187000 }, { "epoch": 5.68, "learning_rate": 7.850847207968223e-06, "loss": 3.2304, "step": 1187500 }, { "epoch": 5.68, "learning_rate": 7.84684452456927e-06, "loss": 3.2317, "step": 1188000 }, { "epoch": 5.69, "learning_rate": 7.842841841170322e-06, "loss": 3.223, "step": 1188500 }, { "epoch": 5.69, "learning_rate": 7.83883915777137e-06, "loss": 3.231, "step": 1189000 }, { "epoch": 5.69, "learning_rate": 7.83483647437242e-06, "loss": 3.2226, "step": 1189500 }, { "epoch": 5.69, "learning_rate": 7.830833790973469e-06, "loss": 3.2199, "step": 1190000 }, { "epoch": 5.7, "learning_rate": 7.826831107574518e-06, "loss": 3.2368, "step": 1190500 }, { "epoch": 5.7, "learning_rate": 7.822828424175568e-06, "loss": 3.2386, "step": 1191000 }, { "epoch": 5.7, "learning_rate": 7.818825740776618e-06, "loss": 3.2184, "step": 1191500 }, { "epoch": 5.7, "learning_rate": 7.814823057377666e-06, "loss": 3.2167, "step": 1192000 }, { "epoch": 5.71, "learning_rate": 7.810820373978715e-06, "loss": 3.2251, "step": 1192500 }, { "epoch": 5.71, "learning_rate": 7.806817690579766e-06, "loss": 3.2358, "step": 1193000 }, { "epoch": 5.71, "learning_rate": 7.802815007180815e-06, "loss": 3.2141, "step": 1193500 }, { "epoch": 5.71, "learning_rate": 7.798812323781863e-06, "loss": 3.2126, "step": 1194000 }, { "epoch": 5.71, "learning_rate": 7.794809640382914e-06, "loss": 3.2383, "step": 1194500 }, { "epoch": 5.72, "learning_rate": 7.790806956983963e-06, "loss": 3.2188, "step": 1195000 }, { "epoch": 5.72, "learning_rate": 7.786804273585012e-06, "loss": 3.2143, "step": 1195500 }, { "epoch": 5.72, "learning_rate": 7.782801590186061e-06, "loss": 3.2256, "step": 1196000 }, { "epoch": 5.72, "learning_rate": 7.77879890678711e-06, "loss": 3.2247, "step": 1196500 }, { "epoch": 5.73, "learning_rate": 7.77479622338816e-06, "loss": 3.2005, "step": 1197000 }, { "epoch": 5.73, "learning_rate": 7.77079353998921e-06, "loss": 3.2231, "step": 1197500 }, { "epoch": 5.73, "learning_rate": 7.766790856590258e-06, "loss": 3.215, "step": 1198000 }, { "epoch": 5.73, "learning_rate": 7.762788173191307e-06, "loss": 3.2147, "step": 1198500 }, { "epoch": 5.74, "learning_rate": 7.758785489792358e-06, "loss": 3.2175, "step": 1199000 }, { "epoch": 5.74, "learning_rate": 7.754782806393407e-06, "loss": 3.2086, "step": 1199500 }, { "epoch": 5.74, "learning_rate": 7.750780122994455e-06, "loss": 3.207, "step": 1200000 }, { "epoch": 5.74, "learning_rate": 7.746777439595506e-06, "loss": 3.2035, "step": 1200500 }, { "epoch": 5.75, "learning_rate": 7.742774756196555e-06, "loss": 3.2304, "step": 1201000 }, { "epoch": 5.75, "learning_rate": 7.738772072797604e-06, "loss": 3.2161, "step": 1201500 }, { "epoch": 5.75, "learning_rate": 7.734769389398653e-06, "loss": 3.2194, "step": 1202000 }, { "epoch": 5.75, "learning_rate": 7.730766705999703e-06, "loss": 3.2104, "step": 1202500 }, { "epoch": 5.76, "learning_rate": 7.726764022600752e-06, "loss": 3.2134, "step": 1203000 }, { "epoch": 5.76, "learning_rate": 7.722761339201803e-06, "loss": 3.219, "step": 1203500 }, { "epoch": 5.76, "learning_rate": 7.71875865580285e-06, "loss": 3.2207, "step": 1204000 }, { "epoch": 5.76, "learning_rate": 7.7147559724039e-06, "loss": 3.2118, "step": 1204500 }, { "epoch": 5.76, "learning_rate": 7.71075328900495e-06, "loss": 3.2275, "step": 1205000 }, { "epoch": 5.77, "learning_rate": 7.706750605606e-06, "loss": 3.1857, "step": 1205500 }, { "epoch": 5.77, "learning_rate": 7.702747922207047e-06, "loss": 3.215, "step": 1206000 }, { "epoch": 5.77, "learning_rate": 7.698745238808098e-06, "loss": 3.2049, "step": 1206500 }, { "epoch": 5.77, "learning_rate": 7.694742555409147e-06, "loss": 3.2012, "step": 1207000 }, { "epoch": 5.78, "learning_rate": 7.690739872010196e-06, "loss": 3.2215, "step": 1207500 }, { "epoch": 5.78, "learning_rate": 7.686737188611245e-06, "loss": 3.2027, "step": 1208000 }, { "epoch": 5.78, "learning_rate": 7.682734505212295e-06, "loss": 3.2169, "step": 1208500 }, { "epoch": 5.78, "learning_rate": 7.678731821813344e-06, "loss": 3.2064, "step": 1209000 }, { "epoch": 5.79, "learning_rate": 7.674729138414395e-06, "loss": 3.2188, "step": 1209500 }, { "epoch": 5.79, "learning_rate": 7.670726455015442e-06, "loss": 3.1964, "step": 1210000 }, { "epoch": 5.79, "learning_rate": 7.666723771616491e-06, "loss": 3.1835, "step": 1210500 }, { "epoch": 5.79, "learning_rate": 7.662721088217542e-06, "loss": 3.1982, "step": 1211000 }, { "epoch": 5.8, "learning_rate": 7.658718404818592e-06, "loss": 3.2069, "step": 1211500 }, { "epoch": 5.8, "learning_rate": 7.654715721419639e-06, "loss": 3.1984, "step": 1212000 }, { "epoch": 5.8, "learning_rate": 7.65071303802069e-06, "loss": 3.197, "step": 1212500 }, { "epoch": 5.8, "learning_rate": 7.64671035462174e-06, "loss": 3.2164, "step": 1213000 }, { "epoch": 5.81, "learning_rate": 7.642707671222788e-06, "loss": 3.2134, "step": 1213500 }, { "epoch": 5.81, "learning_rate": 7.638704987823838e-06, "loss": 3.1984, "step": 1214000 }, { "epoch": 5.81, "learning_rate": 7.634702304424887e-06, "loss": 3.2043, "step": 1214500 }, { "epoch": 5.81, "learning_rate": 7.630699621025936e-06, "loss": 3.1946, "step": 1215000 }, { "epoch": 5.82, "learning_rate": 7.626696937626986e-06, "loss": 3.2126, "step": 1215500 }, { "epoch": 5.82, "learning_rate": 7.622694254228034e-06, "loss": 3.1757, "step": 1216000 }, { "epoch": 5.82, "learning_rate": 7.6186915708290844e-06, "loss": 3.1974, "step": 1216500 }, { "epoch": 5.82, "learning_rate": 7.614688887430134e-06, "loss": 3.1973, "step": 1217000 }, { "epoch": 5.82, "learning_rate": 7.610686204031184e-06, "loss": 3.1989, "step": 1217500 }, { "epoch": 5.83, "learning_rate": 7.606683520632232e-06, "loss": 3.2144, "step": 1218000 }, { "epoch": 5.83, "learning_rate": 7.602680837233281e-06, "loss": 3.1938, "step": 1218500 }, { "epoch": 5.83, "learning_rate": 7.598678153834331e-06, "loss": 3.1877, "step": 1219000 }, { "epoch": 5.83, "learning_rate": 7.5946754704353805e-06, "loss": 3.2009, "step": 1219500 }, { "epoch": 5.84, "learning_rate": 7.59067278703643e-06, "loss": 3.1983, "step": 1220000 }, { "epoch": 5.84, "learning_rate": 7.586670103637479e-06, "loss": 3.197, "step": 1220500 }, { "epoch": 5.84, "learning_rate": 7.582667420238529e-06, "loss": 3.2096, "step": 1221000 }, { "epoch": 5.84, "learning_rate": 7.578664736839578e-06, "loss": 3.1755, "step": 1221500 }, { "epoch": 5.85, "learning_rate": 7.5746620534406265e-06, "loss": 3.1916, "step": 1222000 }, { "epoch": 5.85, "learning_rate": 7.5706593700416766e-06, "loss": 3.2051, "step": 1222500 }, { "epoch": 5.85, "learning_rate": 7.566656686642726e-06, "loss": 3.1812, "step": 1223000 }, { "epoch": 5.85, "learning_rate": 7.562654003243776e-06, "loss": 3.1993, "step": 1223500 }, { "epoch": 5.86, "learning_rate": 7.558651319844824e-06, "loss": 3.1907, "step": 1224000 }, { "epoch": 5.86, "learning_rate": 7.554648636445873e-06, "loss": 3.1972, "step": 1224500 }, { "epoch": 5.86, "learning_rate": 7.550645953046923e-06, "loss": 3.2049, "step": 1225000 }, { "epoch": 5.86, "learning_rate": 7.546643269647973e-06, "loss": 3.1825, "step": 1225500 }, { "epoch": 5.87, "learning_rate": 7.542640586249022e-06, "loss": 3.1768, "step": 1226000 }, { "epoch": 5.87, "learning_rate": 7.538637902850071e-06, "loss": 3.1902, "step": 1226500 }, { "epoch": 5.87, "learning_rate": 7.534635219451121e-06, "loss": 3.1875, "step": 1227000 }, { "epoch": 5.87, "learning_rate": 7.53063253605217e-06, "loss": 3.198, "step": 1227500 }, { "epoch": 5.87, "learning_rate": 7.526629852653219e-06, "loss": 3.1851, "step": 1228000 }, { "epoch": 5.88, "learning_rate": 7.522627169254269e-06, "loss": 3.1934, "step": 1228500 }, { "epoch": 5.88, "learning_rate": 7.518624485855318e-06, "loss": 3.1717, "step": 1229000 }, { "epoch": 5.88, "learning_rate": 7.514621802456368e-06, "loss": 3.1948, "step": 1229500 }, { "epoch": 5.88, "learning_rate": 7.510619119057416e-06, "loss": 3.2118, "step": 1230000 }, { "epoch": 5.89, "learning_rate": 7.5066164356584655e-06, "loss": 3.1771, "step": 1230500 }, { "epoch": 5.89, "learning_rate": 7.5026137522595155e-06, "loss": 3.1818, "step": 1231000 }, { "epoch": 5.89, "learning_rate": 7.498611068860565e-06, "loss": 3.189, "step": 1231500 }, { "epoch": 5.89, "learning_rate": 7.494608385461614e-06, "loss": 3.1778, "step": 1232000 }, { "epoch": 5.9, "learning_rate": 7.490605702062663e-06, "loss": 3.1775, "step": 1232500 }, { "epoch": 5.9, "learning_rate": 7.486603018663713e-06, "loss": 3.1775, "step": 1233000 }, { "epoch": 5.9, "learning_rate": 7.482600335264762e-06, "loss": 3.1882, "step": 1233500 }, { "epoch": 5.9, "learning_rate": 7.478597651865811e-06, "loss": 3.1981, "step": 1234000 }, { "epoch": 5.91, "learning_rate": 7.474594968466861e-06, "loss": 3.1832, "step": 1234500 }, { "epoch": 5.91, "learning_rate": 7.47059228506791e-06, "loss": 3.1902, "step": 1235000 }, { "epoch": 5.91, "learning_rate": 7.46658960166896e-06, "loss": 3.1851, "step": 1235500 }, { "epoch": 5.91, "learning_rate": 7.462586918270008e-06, "loss": 3.1938, "step": 1236000 }, { "epoch": 5.92, "learning_rate": 7.458584234871058e-06, "loss": 3.1565, "step": 1236500 }, { "epoch": 5.92, "learning_rate": 7.454581551472108e-06, "loss": 3.1785, "step": 1237000 }, { "epoch": 5.92, "learning_rate": 7.450578868073157e-06, "loss": 3.1825, "step": 1237500 }, { "epoch": 5.92, "learning_rate": 7.446576184674206e-06, "loss": 3.1712, "step": 1238000 }, { "epoch": 5.93, "learning_rate": 7.442573501275255e-06, "loss": 3.1838, "step": 1238500 }, { "epoch": 5.93, "learning_rate": 7.438570817876305e-06, "loss": 3.1843, "step": 1239000 }, { "epoch": 5.93, "learning_rate": 7.4345681344773545e-06, "loss": 3.1868, "step": 1239500 }, { "epoch": 5.93, "learning_rate": 7.430565451078403e-06, "loss": 3.1785, "step": 1240000 }, { "epoch": 5.93, "learning_rate": 7.426562767679453e-06, "loss": 3.1836, "step": 1240500 }, { "epoch": 5.94, "learning_rate": 7.422560084280502e-06, "loss": 3.1799, "step": 1241000 }, { "epoch": 5.94, "learning_rate": 7.418557400881552e-06, "loss": 3.1859, "step": 1241500 }, { "epoch": 5.94, "learning_rate": 7.4145547174826005e-06, "loss": 3.1834, "step": 1242000 }, { "epoch": 5.94, "learning_rate": 7.41055203408365e-06, "loss": 3.1853, "step": 1242500 }, { "epoch": 5.95, "learning_rate": 7.4065493506847e-06, "loss": 3.166, "step": 1243000 }, { "epoch": 5.95, "learning_rate": 7.402546667285749e-06, "loss": 3.1666, "step": 1243500 }, { "epoch": 5.95, "learning_rate": 7.398543983886798e-06, "loss": 3.1739, "step": 1244000 }, { "epoch": 5.95, "learning_rate": 7.394541300487847e-06, "loss": 3.1669, "step": 1244500 }, { "epoch": 5.96, "learning_rate": 7.390538617088897e-06, "loss": 3.1841, "step": 1245000 }, { "epoch": 5.96, "learning_rate": 7.386535933689947e-06, "loss": 3.1653, "step": 1245500 }, { "epoch": 5.96, "learning_rate": 7.382533250290995e-06, "loss": 3.1598, "step": 1246000 }, { "epoch": 5.96, "learning_rate": 7.378530566892045e-06, "loss": 3.1673, "step": 1246500 }, { "epoch": 5.97, "learning_rate": 7.374527883493094e-06, "loss": 3.1755, "step": 1247000 }, { "epoch": 5.97, "learning_rate": 7.370525200094144e-06, "loss": 3.1797, "step": 1247500 }, { "epoch": 5.97, "learning_rate": 7.366522516695193e-06, "loss": 3.1844, "step": 1248000 }, { "epoch": 5.97, "learning_rate": 7.362519833296242e-06, "loss": 3.1611, "step": 1248500 }, { "epoch": 5.98, "learning_rate": 7.358517149897292e-06, "loss": 3.1623, "step": 1249000 }, { "epoch": 5.98, "learning_rate": 7.354514466498341e-06, "loss": 3.1857, "step": 1249500 }, { "epoch": 5.98, "learning_rate": 7.35051178309939e-06, "loss": 3.1599, "step": 1250000 }, { "epoch": 5.98, "learning_rate": 7.3465090997004395e-06, "loss": 3.1663, "step": 1250500 }, { "epoch": 5.98, "learning_rate": 7.3425064163014895e-06, "loss": 3.1652, "step": 1251000 }, { "epoch": 5.99, "learning_rate": 7.338503732902539e-06, "loss": 3.17, "step": 1251500 }, { "epoch": 5.99, "learning_rate": 7.334501049503587e-06, "loss": 3.1542, "step": 1252000 }, { "epoch": 5.99, "learning_rate": 7.330498366104637e-06, "loss": 3.1714, "step": 1252500 }, { "epoch": 5.99, "learning_rate": 7.326495682705686e-06, "loss": 3.1672, "step": 1253000 }, { "epoch": 6.0, "learning_rate": 7.322492999306736e-06, "loss": 3.1768, "step": 1253500 }, { "epoch": 6.0, "learning_rate": 7.318490315907785e-06, "loss": 3.1719, "step": 1254000 }, { "epoch": 6.0, "step": 1254162, "total_flos": 5550828628572068352, "train_runtime": 106122.2527, "train_samples_per_second": 11.818 } ], "max_steps": 1254162, "num_train_epochs": 6, "total_flos": 5550828628572068352, "trial_name": null, "trial_params": null }