diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,5338 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 3.0, + "global_step": 44361, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 5e-06, + "loss": 0.6934, + "step": 50 + }, + { + "epoch": 0.01, + "learning_rate": 1e-05, + "loss": 0.5953, + "step": 100 + }, + { + "epoch": 0.01, + "learning_rate": 1.5e-05, + "loss": 0.5041, + "step": 150 + }, + { + "epoch": 0.01, + "learning_rate": 2e-05, + "loss": 0.5038, + "step": 200 + }, + { + "epoch": 0.02, + "learning_rate": 2.5e-05, + "loss": 0.4902, + "step": 250 + }, + { + "epoch": 0.02, + "learning_rate": 3e-05, + "loss": 0.4758, + "step": 300 + }, + { + "epoch": 0.02, + "learning_rate": 3.5e-05, + "loss": 0.5029, + "step": 350 + }, + { + "epoch": 0.03, + "learning_rate": 4e-05, + "loss": 0.4621, + "step": 400 + }, + { + "epoch": 0.03, + "learning_rate": 4.5e-05, + "loss": 0.477, + "step": 450 + }, + { + "epoch": 0.03, + "learning_rate": 5e-05, + "loss": 0.4989, + "step": 500 + }, + { + "epoch": 0.04, + "learning_rate": 4.994300175554593e-05, + "loss": 0.4836, + "step": 550 + }, + { + "epoch": 0.04, + "learning_rate": 4.988600351109186e-05, + "loss": 0.4955, + "step": 600 + }, + { + "epoch": 0.04, + "learning_rate": 4.982900526663779e-05, + "loss": 0.4802, + "step": 650 + }, + { + "epoch": 0.05, + "learning_rate": 4.977200702218372e-05, + "loss": 0.4741, + "step": 700 + }, + { + "epoch": 0.05, + "learning_rate": 4.971500877772965e-05, + "loss": 0.4728, + "step": 750 + }, + { + "epoch": 0.05, + "learning_rate": 4.965801053327558e-05, + "loss": 0.4439, + "step": 800 + }, + { + "epoch": 0.06, + "learning_rate": 4.9601012288821506e-05, + "loss": 0.4795, + "step": 850 + }, + { + "epoch": 0.06, + "learning_rate": 4.954401404436744e-05, + "loss": 0.4612, + "step": 900 + }, + { + "epoch": 0.06, + "learning_rate": 4.948701579991337e-05, + "loss": 0.4623, + "step": 950 + }, + { + "epoch": 0.07, + "learning_rate": 4.9430017555459296e-05, + "loss": 0.4743, + "step": 1000 + }, + { + "epoch": 0.07, + "learning_rate": 4.9373019311005225e-05, + "loss": 0.434, + "step": 1050 + }, + { + "epoch": 0.07, + "learning_rate": 4.931602106655115e-05, + "loss": 0.461, + "step": 1100 + }, + { + "epoch": 0.08, + "learning_rate": 4.925902282209708e-05, + "loss": 0.4438, + "step": 1150 + }, + { + "epoch": 0.08, + "learning_rate": 4.9202024577643015e-05, + "loss": 0.4663, + "step": 1200 + }, + { + "epoch": 0.08, + "learning_rate": 4.9145026333188944e-05, + "loss": 0.4321, + "step": 1250 + }, + { + "epoch": 0.09, + "learning_rate": 4.908802808873487e-05, + "loss": 0.4614, + "step": 1300 + }, + { + "epoch": 0.09, + "learning_rate": 4.903102984428079e-05, + "loss": 0.4656, + "step": 1350 + }, + { + "epoch": 0.09, + "learning_rate": 4.897403159982673e-05, + "loss": 0.4643, + "step": 1400 + }, + { + "epoch": 0.1, + "learning_rate": 4.8917033355372656e-05, + "loss": 0.4566, + "step": 1450 + }, + { + "epoch": 0.1, + "learning_rate": 4.8860035110918584e-05, + "loss": 0.4545, + "step": 1500 + }, + { + "epoch": 0.1, + "learning_rate": 4.880303686646451e-05, + "loss": 0.4477, + "step": 1550 + }, + { + "epoch": 0.11, + "learning_rate": 4.874603862201044e-05, + "loss": 0.4497, + "step": 1600 + }, + { + "epoch": 0.11, + "learning_rate": 4.8689040377556375e-05, + "loss": 0.4629, + "step": 1650 + }, + { + "epoch": 0.11, + "learning_rate": 4.86320421331023e-05, + "loss": 0.4539, + "step": 1700 + }, + { + "epoch": 0.12, + "learning_rate": 4.857504388864823e-05, + "loss": 0.4724, + "step": 1750 + }, + { + "epoch": 0.12, + "learning_rate": 4.851804564419416e-05, + "loss": 0.4619, + "step": 1800 + }, + { + "epoch": 0.13, + "learning_rate": 4.846104739974009e-05, + "loss": 0.444, + "step": 1850 + }, + { + "epoch": 0.13, + "learning_rate": 4.840404915528602e-05, + "loss": 0.4532, + "step": 1900 + }, + { + "epoch": 0.13, + "learning_rate": 4.834705091083195e-05, + "loss": 0.4819, + "step": 1950 + }, + { + "epoch": 0.14, + "learning_rate": 4.829005266637788e-05, + "loss": 0.4672, + "step": 2000 + }, + { + "epoch": 0.14, + "learning_rate": 4.8233054421923806e-05, + "loss": 0.4373, + "step": 2050 + }, + { + "epoch": 0.14, + "learning_rate": 4.8176056177469734e-05, + "loss": 0.4425, + "step": 2100 + }, + { + "epoch": 0.15, + "learning_rate": 4.811905793301567e-05, + "loss": 0.442, + "step": 2150 + }, + { + "epoch": 0.15, + "learning_rate": 4.80620596885616e-05, + "loss": 0.4365, + "step": 2200 + }, + { + "epoch": 0.15, + "learning_rate": 4.8005061444107525e-05, + "loss": 0.4481, + "step": 2250 + }, + { + "epoch": 0.16, + "learning_rate": 4.794806319965345e-05, + "loss": 0.4639, + "step": 2300 + }, + { + "epoch": 0.16, + "learning_rate": 4.789106495519938e-05, + "loss": 0.4527, + "step": 2350 + }, + { + "epoch": 0.16, + "learning_rate": 4.783406671074531e-05, + "loss": 0.4575, + "step": 2400 + }, + { + "epoch": 0.17, + "learning_rate": 4.7777068466291244e-05, + "loss": 0.4626, + "step": 2450 + }, + { + "epoch": 0.17, + "learning_rate": 4.772007022183717e-05, + "loss": 0.4235, + "step": 2500 + }, + { + "epoch": 0.17, + "learning_rate": 4.76630719773831e-05, + "loss": 0.4496, + "step": 2550 + }, + { + "epoch": 0.18, + "learning_rate": 4.760607373292903e-05, + "loss": 0.4355, + "step": 2600 + }, + { + "epoch": 0.18, + "learning_rate": 4.7549075488474956e-05, + "loss": 0.4361, + "step": 2650 + }, + { + "epoch": 0.18, + "learning_rate": 4.749207724402089e-05, + "loss": 0.4655, + "step": 2700 + }, + { + "epoch": 0.19, + "learning_rate": 4.743507899956682e-05, + "loss": 0.4295, + "step": 2750 + }, + { + "epoch": 0.19, + "learning_rate": 4.737808075511275e-05, + "loss": 0.4567, + "step": 2800 + }, + { + "epoch": 0.19, + "learning_rate": 4.7321082510658675e-05, + "loss": 0.4501, + "step": 2850 + }, + { + "epoch": 0.2, + "learning_rate": 4.72640842662046e-05, + "loss": 0.4518, + "step": 2900 + }, + { + "epoch": 0.2, + "learning_rate": 4.720708602175054e-05, + "loss": 0.4625, + "step": 2950 + }, + { + "epoch": 0.2, + "learning_rate": 4.715008777729646e-05, + "loss": 0.4563, + "step": 3000 + }, + { + "epoch": 0.21, + "learning_rate": 4.709308953284239e-05, + "loss": 0.4593, + "step": 3050 + }, + { + "epoch": 0.21, + "learning_rate": 4.7036091288388315e-05, + "loss": 0.4316, + "step": 3100 + }, + { + "epoch": 0.21, + "learning_rate": 4.697909304393425e-05, + "loss": 0.4465, + "step": 3150 + }, + { + "epoch": 0.22, + "learning_rate": 4.692209479948018e-05, + "loss": 0.4427, + "step": 3200 + }, + { + "epoch": 0.22, + "learning_rate": 4.6865096555026106e-05, + "loss": 0.4623, + "step": 3250 + }, + { + "epoch": 0.22, + "learning_rate": 4.6808098310572034e-05, + "loss": 0.4505, + "step": 3300 + }, + { + "epoch": 0.23, + "learning_rate": 4.675110006611796e-05, + "loss": 0.446, + "step": 3350 + }, + { + "epoch": 0.23, + "learning_rate": 4.669410182166389e-05, + "loss": 0.4549, + "step": 3400 + }, + { + "epoch": 0.23, + "learning_rate": 4.6637103577209825e-05, + "loss": 0.4467, + "step": 3450 + }, + { + "epoch": 0.24, + "learning_rate": 4.658010533275575e-05, + "loss": 0.4508, + "step": 3500 + }, + { + "epoch": 0.24, + "learning_rate": 4.652310708830168e-05, + "loss": 0.4378, + "step": 3550 + }, + { + "epoch": 0.24, + "learning_rate": 4.646610884384761e-05, + "loss": 0.4326, + "step": 3600 + }, + { + "epoch": 0.25, + "learning_rate": 4.640911059939354e-05, + "loss": 0.4312, + "step": 3650 + }, + { + "epoch": 0.25, + "learning_rate": 4.635211235493947e-05, + "loss": 0.4426, + "step": 3700 + }, + { + "epoch": 0.25, + "learning_rate": 4.62951141104854e-05, + "loss": 0.4246, + "step": 3750 + }, + { + "epoch": 0.26, + "learning_rate": 4.623811586603133e-05, + "loss": 0.4327, + "step": 3800 + }, + { + "epoch": 0.26, + "learning_rate": 4.6181117621577257e-05, + "loss": 0.4526, + "step": 3850 + }, + { + "epoch": 0.26, + "learning_rate": 4.6124119377123185e-05, + "loss": 0.4427, + "step": 3900 + }, + { + "epoch": 0.27, + "learning_rate": 4.606712113266912e-05, + "loss": 0.45, + "step": 3950 + }, + { + "epoch": 0.27, + "learning_rate": 4.601012288821505e-05, + "loss": 0.4537, + "step": 4000 + }, + { + "epoch": 0.27, + "learning_rate": 4.5953124643760975e-05, + "loss": 0.4487, + "step": 4050 + }, + { + "epoch": 0.28, + "learning_rate": 4.5896126399306904e-05, + "loss": 0.4305, + "step": 4100 + }, + { + "epoch": 0.28, + "learning_rate": 4.583912815485283e-05, + "loss": 0.4434, + "step": 4150 + }, + { + "epoch": 0.28, + "learning_rate": 4.5782129910398766e-05, + "loss": 0.4619, + "step": 4200 + }, + { + "epoch": 0.29, + "learning_rate": 4.5725131665944694e-05, + "loss": 0.4481, + "step": 4250 + }, + { + "epoch": 0.29, + "learning_rate": 4.566813342149062e-05, + "loss": 0.4493, + "step": 4300 + }, + { + "epoch": 0.29, + "learning_rate": 4.561113517703655e-05, + "loss": 0.4438, + "step": 4350 + }, + { + "epoch": 0.3, + "learning_rate": 4.555413693258248e-05, + "loss": 0.4334, + "step": 4400 + }, + { + "epoch": 0.3, + "learning_rate": 4.549713868812841e-05, + "loss": 0.433, + "step": 4450 + }, + { + "epoch": 0.3, + "learning_rate": 4.544014044367434e-05, + "loss": 0.4493, + "step": 4500 + }, + { + "epoch": 0.31, + "learning_rate": 4.538314219922027e-05, + "loss": 0.449, + "step": 4550 + }, + { + "epoch": 0.31, + "learning_rate": 4.53261439547662e-05, + "loss": 0.4464, + "step": 4600 + }, + { + "epoch": 0.31, + "learning_rate": 4.526914571031212e-05, + "loss": 0.4241, + "step": 4650 + }, + { + "epoch": 0.32, + "learning_rate": 4.5212147465858054e-05, + "loss": 0.4091, + "step": 4700 + }, + { + "epoch": 0.32, + "learning_rate": 4.515514922140398e-05, + "loss": 0.4402, + "step": 4750 + }, + { + "epoch": 0.32, + "learning_rate": 4.509815097694991e-05, + "loss": 0.4245, + "step": 4800 + }, + { + "epoch": 0.33, + "learning_rate": 4.504115273249584e-05, + "loss": 0.4507, + "step": 4850 + }, + { + "epoch": 0.33, + "learning_rate": 4.4984154488041766e-05, + "loss": 0.4107, + "step": 4900 + }, + { + "epoch": 0.33, + "learning_rate": 4.49271562435877e-05, + "loss": 0.4454, + "step": 4950 + }, + { + "epoch": 0.34, + "learning_rate": 4.487015799913363e-05, + "loss": 0.4451, + "step": 5000 + }, + { + "epoch": 0.34, + "learning_rate": 4.481315975467956e-05, + "loss": 0.4273, + "step": 5050 + }, + { + "epoch": 0.34, + "learning_rate": 4.4756161510225485e-05, + "loss": 0.4523, + "step": 5100 + }, + { + "epoch": 0.35, + "learning_rate": 4.469916326577141e-05, + "loss": 0.4469, + "step": 5150 + }, + { + "epoch": 0.35, + "learning_rate": 4.464216502131735e-05, + "loss": 0.4264, + "step": 5200 + }, + { + "epoch": 0.36, + "learning_rate": 4.4585166776863276e-05, + "loss": 0.4442, + "step": 5250 + }, + { + "epoch": 0.36, + "learning_rate": 4.4528168532409204e-05, + "loss": 0.4361, + "step": 5300 + }, + { + "epoch": 0.36, + "learning_rate": 4.447117028795513e-05, + "loss": 0.4309, + "step": 5350 + }, + { + "epoch": 0.37, + "learning_rate": 4.441417204350106e-05, + "loss": 0.4252, + "step": 5400 + }, + { + "epoch": 0.37, + "learning_rate": 4.4357173799046995e-05, + "loss": 0.4442, + "step": 5450 + }, + { + "epoch": 0.37, + "learning_rate": 4.430017555459292e-05, + "loss": 0.4425, + "step": 5500 + }, + { + "epoch": 0.38, + "learning_rate": 4.424317731013885e-05, + "loss": 0.4469, + "step": 5550 + }, + { + "epoch": 0.38, + "learning_rate": 4.418617906568478e-05, + "loss": 0.4222, + "step": 5600 + }, + { + "epoch": 0.38, + "learning_rate": 4.412918082123071e-05, + "loss": 0.4431, + "step": 5650 + }, + { + "epoch": 0.39, + "learning_rate": 4.4072182576776635e-05, + "loss": 0.4301, + "step": 5700 + }, + { + "epoch": 0.39, + "learning_rate": 4.401518433232257e-05, + "loss": 0.4565, + "step": 5750 + }, + { + "epoch": 0.39, + "learning_rate": 4.39581860878685e-05, + "loss": 0.4332, + "step": 5800 + }, + { + "epoch": 0.4, + "learning_rate": 4.3901187843414426e-05, + "loss": 0.4352, + "step": 5850 + }, + { + "epoch": 0.4, + "learning_rate": 4.3844189598960354e-05, + "loss": 0.4307, + "step": 5900 + }, + { + "epoch": 0.4, + "learning_rate": 4.378719135450628e-05, + "loss": 0.4603, + "step": 5950 + }, + { + "epoch": 0.41, + "learning_rate": 4.373019311005222e-05, + "loss": 0.4314, + "step": 6000 + }, + { + "epoch": 0.41, + "learning_rate": 4.3673194865598145e-05, + "loss": 0.4299, + "step": 6050 + }, + { + "epoch": 0.41, + "learning_rate": 4.361619662114407e-05, + "loss": 0.4427, + "step": 6100 + }, + { + "epoch": 0.42, + "learning_rate": 4.355919837669e-05, + "loss": 0.4358, + "step": 6150 + }, + { + "epoch": 0.42, + "learning_rate": 4.350220013223593e-05, + "loss": 0.4483, + "step": 6200 + }, + { + "epoch": 0.42, + "learning_rate": 4.344520188778186e-05, + "loss": 0.4326, + "step": 6250 + }, + { + "epoch": 0.43, + "learning_rate": 4.3388203643327785e-05, + "loss": 0.4367, + "step": 6300 + }, + { + "epoch": 0.43, + "learning_rate": 4.333120539887371e-05, + "loss": 0.4305, + "step": 6350 + }, + { + "epoch": 0.43, + "learning_rate": 4.327420715441964e-05, + "loss": 0.4274, + "step": 6400 + }, + { + "epoch": 0.44, + "learning_rate": 4.3217208909965576e-05, + "loss": 0.4265, + "step": 6450 + }, + { + "epoch": 0.44, + "learning_rate": 4.3160210665511504e-05, + "loss": 0.4519, + "step": 6500 + }, + { + "epoch": 0.44, + "learning_rate": 4.310321242105743e-05, + "loss": 0.4335, + "step": 6550 + }, + { + "epoch": 0.45, + "learning_rate": 4.304621417660336e-05, + "loss": 0.448, + "step": 6600 + }, + { + "epoch": 0.45, + "learning_rate": 4.298921593214929e-05, + "loss": 0.4098, + "step": 6650 + }, + { + "epoch": 0.45, + "learning_rate": 4.2932217687695217e-05, + "loss": 0.45, + "step": 6700 + }, + { + "epoch": 0.46, + "learning_rate": 4.287521944324115e-05, + "loss": 0.4218, + "step": 6750 + }, + { + "epoch": 0.46, + "learning_rate": 4.281822119878708e-05, + "loss": 0.446, + "step": 6800 + }, + { + "epoch": 0.46, + "learning_rate": 4.276122295433301e-05, + "loss": 0.4319, + "step": 6850 + }, + { + "epoch": 0.47, + "learning_rate": 4.2704224709878936e-05, + "loss": 0.4232, + "step": 6900 + }, + { + "epoch": 0.47, + "learning_rate": 4.2647226465424864e-05, + "loss": 0.4258, + "step": 6950 + }, + { + "epoch": 0.47, + "learning_rate": 4.25902282209708e-05, + "loss": 0.4277, + "step": 7000 + }, + { + "epoch": 0.48, + "learning_rate": 4.2533229976516726e-05, + "loss": 0.4264, + "step": 7050 + }, + { + "epoch": 0.48, + "learning_rate": 4.2476231732062655e-05, + "loss": 0.4419, + "step": 7100 + }, + { + "epoch": 0.48, + "learning_rate": 4.241923348760858e-05, + "loss": 0.4369, + "step": 7150 + }, + { + "epoch": 0.49, + "learning_rate": 4.236223524315451e-05, + "loss": 0.4532, + "step": 7200 + }, + { + "epoch": 0.49, + "learning_rate": 4.2305236998700445e-05, + "loss": 0.4205, + "step": 7250 + }, + { + "epoch": 0.49, + "learning_rate": 4.2248238754246373e-05, + "loss": 0.406, + "step": 7300 + }, + { + "epoch": 0.5, + "learning_rate": 4.21912405097923e-05, + "loss": 0.4436, + "step": 7350 + }, + { + "epoch": 0.5, + "learning_rate": 4.213424226533823e-05, + "loss": 0.4443, + "step": 7400 + }, + { + "epoch": 0.5, + "learning_rate": 4.207724402088416e-05, + "loss": 0.4353, + "step": 7450 + }, + { + "epoch": 0.51, + "learning_rate": 4.202024577643009e-05, + "loss": 0.4181, + "step": 7500 + }, + { + "epoch": 0.51, + "learning_rate": 4.196324753197602e-05, + "loss": 0.4202, + "step": 7550 + }, + { + "epoch": 0.51, + "learning_rate": 4.190624928752195e-05, + "loss": 0.4327, + "step": 7600 + }, + { + "epoch": 0.52, + "learning_rate": 4.184925104306788e-05, + "loss": 0.4169, + "step": 7650 + }, + { + "epoch": 0.52, + "learning_rate": 4.1792252798613805e-05, + "loss": 0.4559, + "step": 7700 + }, + { + "epoch": 0.52, + "learning_rate": 4.173525455415974e-05, + "loss": 0.4114, + "step": 7750 + }, + { + "epoch": 0.53, + "learning_rate": 4.167825630970567e-05, + "loss": 0.4371, + "step": 7800 + }, + { + "epoch": 0.53, + "learning_rate": 4.1621258065251596e-05, + "loss": 0.4198, + "step": 7850 + }, + { + "epoch": 0.53, + "learning_rate": 4.156425982079752e-05, + "loss": 0.4338, + "step": 7900 + }, + { + "epoch": 0.54, + "learning_rate": 4.1507261576343445e-05, + "loss": 0.4651, + "step": 7950 + }, + { + "epoch": 0.54, + "learning_rate": 4.145026333188938e-05, + "loss": 0.4341, + "step": 8000 + }, + { + "epoch": 0.54, + "learning_rate": 4.139326508743531e-05, + "loss": 0.4449, + "step": 8050 + }, + { + "epoch": 0.55, + "learning_rate": 4.1336266842981236e-05, + "loss": 0.4415, + "step": 8100 + }, + { + "epoch": 0.55, + "learning_rate": 4.1279268598527164e-05, + "loss": 0.4214, + "step": 8150 + }, + { + "epoch": 0.55, + "learning_rate": 4.122227035407309e-05, + "loss": 0.4582, + "step": 8200 + }, + { + "epoch": 0.56, + "learning_rate": 4.116527210961903e-05, + "loss": 0.4339, + "step": 8250 + }, + { + "epoch": 0.56, + "learning_rate": 4.1108273865164955e-05, + "loss": 0.4519, + "step": 8300 + }, + { + "epoch": 0.56, + "learning_rate": 4.105127562071088e-05, + "loss": 0.4483, + "step": 8350 + }, + { + "epoch": 0.57, + "learning_rate": 4.099427737625681e-05, + "loss": 0.4335, + "step": 8400 + }, + { + "epoch": 0.57, + "learning_rate": 4.093727913180274e-05, + "loss": 0.4199, + "step": 8450 + }, + { + "epoch": 0.57, + "learning_rate": 4.0880280887348674e-05, + "loss": 0.4409, + "step": 8500 + }, + { + "epoch": 0.58, + "learning_rate": 4.08232826428946e-05, + "loss": 0.4459, + "step": 8550 + }, + { + "epoch": 0.58, + "learning_rate": 4.076628439844053e-05, + "loss": 0.4306, + "step": 8600 + }, + { + "epoch": 0.58, + "learning_rate": 4.070928615398646e-05, + "loss": 0.4147, + "step": 8650 + }, + { + "epoch": 0.59, + "learning_rate": 4.0652287909532386e-05, + "loss": 0.4381, + "step": 8700 + }, + { + "epoch": 0.59, + "learning_rate": 4.059528966507832e-05, + "loss": 0.4233, + "step": 8750 + }, + { + "epoch": 0.6, + "learning_rate": 4.053829142062425e-05, + "loss": 0.4353, + "step": 8800 + }, + { + "epoch": 0.6, + "learning_rate": 4.048129317617018e-05, + "loss": 0.4322, + "step": 8850 + }, + { + "epoch": 0.6, + "learning_rate": 4.0424294931716105e-05, + "loss": 0.4247, + "step": 8900 + }, + { + "epoch": 0.61, + "learning_rate": 4.036729668726203e-05, + "loss": 0.4309, + "step": 8950 + }, + { + "epoch": 0.61, + "learning_rate": 4.031029844280796e-05, + "loss": 0.4046, + "step": 9000 + }, + { + "epoch": 0.61, + "learning_rate": 4.0253300198353896e-05, + "loss": 0.4704, + "step": 9050 + }, + { + "epoch": 0.62, + "learning_rate": 4.0196301953899824e-05, + "loss": 0.4778, + "step": 9100 + }, + { + "epoch": 0.62, + "learning_rate": 4.013930370944575e-05, + "loss": 0.4714, + "step": 9150 + }, + { + "epoch": 0.62, + "learning_rate": 4.008230546499168e-05, + "loss": 0.4971, + "step": 9200 + }, + { + "epoch": 0.63, + "learning_rate": 4.002530722053761e-05, + "loss": 0.4713, + "step": 9250 + }, + { + "epoch": 0.63, + "learning_rate": 3.996830897608354e-05, + "loss": 0.4536, + "step": 9300 + }, + { + "epoch": 0.63, + "learning_rate": 3.991131073162947e-05, + "loss": 0.4443, + "step": 9350 + }, + { + "epoch": 0.64, + "learning_rate": 3.98543124871754e-05, + "loss": 0.4303, + "step": 9400 + }, + { + "epoch": 0.64, + "learning_rate": 3.979731424272133e-05, + "loss": 0.4285, + "step": 9450 + }, + { + "epoch": 0.64, + "learning_rate": 3.9740315998267255e-05, + "loss": 0.4376, + "step": 9500 + }, + { + "epoch": 0.65, + "learning_rate": 3.968331775381318e-05, + "loss": 0.4458, + "step": 9550 + }, + { + "epoch": 0.65, + "learning_rate": 3.962631950935911e-05, + "loss": 0.4414, + "step": 9600 + }, + { + "epoch": 0.65, + "learning_rate": 3.956932126490504e-05, + "loss": 0.4474, + "step": 9650 + }, + { + "epoch": 0.66, + "learning_rate": 3.951232302045097e-05, + "loss": 0.4276, + "step": 9700 + }, + { + "epoch": 0.66, + "learning_rate": 3.94553247759969e-05, + "loss": 0.4529, + "step": 9750 + }, + { + "epoch": 0.66, + "learning_rate": 3.939832653154283e-05, + "loss": 0.4629, + "step": 9800 + }, + { + "epoch": 0.67, + "learning_rate": 3.934132828708876e-05, + "loss": 0.4438, + "step": 9850 + }, + { + "epoch": 0.67, + "learning_rate": 3.9284330042634686e-05, + "loss": 0.4331, + "step": 9900 + }, + { + "epoch": 0.67, + "learning_rate": 3.9227331798180615e-05, + "loss": 0.4478, + "step": 9950 + }, + { + "epoch": 0.68, + "learning_rate": 3.917033355372654e-05, + "loss": 0.4499, + "step": 10000 + }, + { + "epoch": 0.68, + "learning_rate": 3.911333530927248e-05, + "loss": 0.4589, + "step": 10050 + }, + { + "epoch": 0.68, + "learning_rate": 3.9056337064818405e-05, + "loss": 0.4636, + "step": 10100 + }, + { + "epoch": 0.69, + "learning_rate": 3.8999338820364334e-05, + "loss": 0.4601, + "step": 10150 + }, + { + "epoch": 0.69, + "learning_rate": 3.894234057591026e-05, + "loss": 0.4391, + "step": 10200 + }, + { + "epoch": 0.69, + "learning_rate": 3.888534233145619e-05, + "loss": 0.4452, + "step": 10250 + }, + { + "epoch": 0.7, + "learning_rate": 3.8828344087002124e-05, + "loss": 0.4603, + "step": 10300 + }, + { + "epoch": 0.7, + "learning_rate": 3.877134584254805e-05, + "loss": 0.4416, + "step": 10350 + }, + { + "epoch": 0.7, + "learning_rate": 3.871434759809398e-05, + "loss": 0.4419, + "step": 10400 + }, + { + "epoch": 0.71, + "learning_rate": 3.865734935363991e-05, + "loss": 0.4367, + "step": 10450 + }, + { + "epoch": 0.71, + "learning_rate": 3.860035110918584e-05, + "loss": 0.4265, + "step": 10500 + }, + { + "epoch": 0.71, + "learning_rate": 3.854335286473177e-05, + "loss": 0.4387, + "step": 10550 + }, + { + "epoch": 0.72, + "learning_rate": 3.84863546202777e-05, + "loss": 0.4327, + "step": 10600 + }, + { + "epoch": 0.72, + "learning_rate": 3.842935637582363e-05, + "loss": 0.4645, + "step": 10650 + }, + { + "epoch": 0.72, + "learning_rate": 3.8372358131369556e-05, + "loss": 0.439, + "step": 10700 + }, + { + "epoch": 0.73, + "learning_rate": 3.8315359886915484e-05, + "loss": 0.4391, + "step": 10750 + }, + { + "epoch": 0.73, + "learning_rate": 3.825836164246142e-05, + "loss": 0.4795, + "step": 10800 + }, + { + "epoch": 0.73, + "learning_rate": 3.8201363398007347e-05, + "loss": 0.485, + "step": 10850 + }, + { + "epoch": 0.74, + "learning_rate": 3.8144365153553275e-05, + "loss": 0.4669, + "step": 10900 + }, + { + "epoch": 0.74, + "learning_rate": 3.80873669090992e-05, + "loss": 0.4674, + "step": 10950 + }, + { + "epoch": 0.74, + "learning_rate": 3.803036866464513e-05, + "loss": 0.4926, + "step": 11000 + }, + { + "epoch": 0.75, + "learning_rate": 3.7973370420191066e-05, + "loss": 0.4585, + "step": 11050 + }, + { + "epoch": 0.75, + "learning_rate": 3.7916372175736994e-05, + "loss": 0.4782, + "step": 11100 + }, + { + "epoch": 0.75, + "learning_rate": 3.785937393128292e-05, + "loss": 0.4851, + "step": 11150 + }, + { + "epoch": 0.76, + "learning_rate": 3.780237568682884e-05, + "loss": 0.4782, + "step": 11200 + }, + { + "epoch": 0.76, + "learning_rate": 3.774537744237477e-05, + "loss": 0.4744, + "step": 11250 + }, + { + "epoch": 0.76, + "learning_rate": 3.7688379197920706e-05, + "loss": 0.4659, + "step": 11300 + }, + { + "epoch": 0.77, + "learning_rate": 3.7631380953466634e-05, + "loss": 0.471, + "step": 11350 + }, + { + "epoch": 0.77, + "learning_rate": 3.757438270901256e-05, + "loss": 0.4871, + "step": 11400 + }, + { + "epoch": 0.77, + "learning_rate": 3.751738446455849e-05, + "loss": 0.4729, + "step": 11450 + }, + { + "epoch": 0.78, + "learning_rate": 3.746038622010442e-05, + "loss": 0.4772, + "step": 11500 + }, + { + "epoch": 0.78, + "learning_rate": 3.740338797565035e-05, + "loss": 0.4957, + "step": 11550 + }, + { + "epoch": 0.78, + "learning_rate": 3.734638973119628e-05, + "loss": 0.4931, + "step": 11600 + }, + { + "epoch": 0.79, + "learning_rate": 3.728939148674221e-05, + "loss": 0.4642, + "step": 11650 + }, + { + "epoch": 0.79, + "learning_rate": 3.723239324228814e-05, + "loss": 0.4725, + "step": 11700 + }, + { + "epoch": 0.79, + "learning_rate": 3.7175394997834065e-05, + "loss": 0.4871, + "step": 11750 + }, + { + "epoch": 0.8, + "learning_rate": 3.711839675338e-05, + "loss": 0.4686, + "step": 11800 + }, + { + "epoch": 0.8, + "learning_rate": 3.706139850892593e-05, + "loss": 0.4776, + "step": 11850 + }, + { + "epoch": 0.8, + "learning_rate": 3.7004400264471856e-05, + "loss": 0.4852, + "step": 11900 + }, + { + "epoch": 0.81, + "learning_rate": 3.6947402020017784e-05, + "loss": 0.4928, + "step": 11950 + }, + { + "epoch": 0.81, + "learning_rate": 3.689040377556371e-05, + "loss": 0.4728, + "step": 12000 + }, + { + "epoch": 0.81, + "learning_rate": 3.683340553110965e-05, + "loss": 0.462, + "step": 12050 + }, + { + "epoch": 0.82, + "learning_rate": 3.6776407286655575e-05, + "loss": 0.4577, + "step": 12100 + }, + { + "epoch": 0.82, + "learning_rate": 3.67194090422015e-05, + "loss": 0.4697, + "step": 12150 + }, + { + "epoch": 0.83, + "learning_rate": 3.666241079774743e-05, + "loss": 0.4914, + "step": 12200 + }, + { + "epoch": 0.83, + "learning_rate": 3.660541255329336e-05, + "loss": 0.4493, + "step": 12250 + }, + { + "epoch": 0.83, + "learning_rate": 3.654841430883929e-05, + "loss": 0.4601, + "step": 12300 + }, + { + "epoch": 0.84, + "learning_rate": 3.649141606438522e-05, + "loss": 0.4577, + "step": 12350 + }, + { + "epoch": 0.84, + "learning_rate": 3.643441781993115e-05, + "loss": 0.4645, + "step": 12400 + }, + { + "epoch": 0.84, + "learning_rate": 3.637741957547708e-05, + "loss": 0.4722, + "step": 12450 + }, + { + "epoch": 0.85, + "learning_rate": 3.6320421331023006e-05, + "loss": 0.4707, + "step": 12500 + }, + { + "epoch": 0.85, + "learning_rate": 3.6263423086568934e-05, + "loss": 0.4611, + "step": 12550 + }, + { + "epoch": 0.85, + "learning_rate": 3.620642484211487e-05, + "loss": 0.4907, + "step": 12600 + }, + { + "epoch": 0.86, + "learning_rate": 3.61494265976608e-05, + "loss": 0.4848, + "step": 12650 + }, + { + "epoch": 0.86, + "learning_rate": 3.6092428353206725e-05, + "loss": 0.5054, + "step": 12700 + }, + { + "epoch": 0.86, + "learning_rate": 3.603543010875265e-05, + "loss": 0.4926, + "step": 12750 + }, + { + "epoch": 0.87, + "learning_rate": 3.597843186429858e-05, + "loss": 0.4608, + "step": 12800 + }, + { + "epoch": 0.87, + "learning_rate": 3.592143361984451e-05, + "loss": 0.4791, + "step": 12850 + }, + { + "epoch": 0.87, + "learning_rate": 3.586443537539044e-05, + "loss": 0.4826, + "step": 12900 + }, + { + "epoch": 0.88, + "learning_rate": 3.5807437130936365e-05, + "loss": 0.4558, + "step": 12950 + }, + { + "epoch": 0.88, + "learning_rate": 3.5750438886482294e-05, + "loss": 0.4822, + "step": 13000 + }, + { + "epoch": 0.88, + "learning_rate": 3.569344064202823e-05, + "loss": 0.4668, + "step": 13050 + }, + { + "epoch": 0.89, + "learning_rate": 3.5636442397574156e-05, + "loss": 0.4707, + "step": 13100 + }, + { + "epoch": 0.89, + "learning_rate": 3.5579444153120084e-05, + "loss": 0.4704, + "step": 13150 + }, + { + "epoch": 0.89, + "learning_rate": 3.552244590866601e-05, + "loss": 0.4502, + "step": 13200 + }, + { + "epoch": 0.9, + "learning_rate": 3.546544766421194e-05, + "loss": 0.4552, + "step": 13250 + }, + { + "epoch": 0.9, + "learning_rate": 3.540844941975787e-05, + "loss": 0.4639, + "step": 13300 + }, + { + "epoch": 0.9, + "learning_rate": 3.5351451175303803e-05, + "loss": 0.4518, + "step": 13350 + }, + { + "epoch": 0.91, + "learning_rate": 3.529445293084973e-05, + "loss": 0.4547, + "step": 13400 + }, + { + "epoch": 0.91, + "learning_rate": 3.523745468639566e-05, + "loss": 0.4349, + "step": 13450 + }, + { + "epoch": 0.91, + "learning_rate": 3.518045644194159e-05, + "loss": 0.4243, + "step": 13500 + }, + { + "epoch": 0.92, + "learning_rate": 3.5123458197487516e-05, + "loss": 0.4471, + "step": 13550 + }, + { + "epoch": 0.92, + "learning_rate": 3.506645995303345e-05, + "loss": 0.4511, + "step": 13600 + }, + { + "epoch": 0.92, + "learning_rate": 3.500946170857938e-05, + "loss": 0.4786, + "step": 13650 + }, + { + "epoch": 0.93, + "learning_rate": 3.495246346412531e-05, + "loss": 0.4378, + "step": 13700 + }, + { + "epoch": 0.93, + "learning_rate": 3.4895465219671235e-05, + "loss": 0.4542, + "step": 13750 + }, + { + "epoch": 0.93, + "learning_rate": 3.483846697521716e-05, + "loss": 0.4533, + "step": 13800 + }, + { + "epoch": 0.94, + "learning_rate": 3.47814687307631e-05, + "loss": 0.453, + "step": 13850 + }, + { + "epoch": 0.94, + "learning_rate": 3.4724470486309026e-05, + "loss": 0.4585, + "step": 13900 + }, + { + "epoch": 0.94, + "learning_rate": 3.4667472241854954e-05, + "loss": 0.4277, + "step": 13950 + }, + { + "epoch": 0.95, + "learning_rate": 3.461047399740088e-05, + "loss": 0.4447, + "step": 14000 + }, + { + "epoch": 0.95, + "learning_rate": 3.455347575294681e-05, + "loss": 0.4586, + "step": 14050 + }, + { + "epoch": 0.95, + "learning_rate": 3.4496477508492745e-05, + "loss": 0.4695, + "step": 14100 + }, + { + "epoch": 0.96, + "learning_rate": 3.443947926403867e-05, + "loss": 0.4866, + "step": 14150 + }, + { + "epoch": 0.96, + "learning_rate": 3.43824810195846e-05, + "loss": 0.4645, + "step": 14200 + }, + { + "epoch": 0.96, + "learning_rate": 3.432548277513053e-05, + "loss": 0.468, + "step": 14250 + }, + { + "epoch": 0.97, + "learning_rate": 3.426848453067646e-05, + "loss": 0.4599, + "step": 14300 + }, + { + "epoch": 0.97, + "learning_rate": 3.421148628622239e-05, + "loss": 0.4653, + "step": 14350 + }, + { + "epoch": 0.97, + "learning_rate": 3.415448804176832e-05, + "loss": 0.4466, + "step": 14400 + }, + { + "epoch": 0.98, + "learning_rate": 3.409748979731425e-05, + "loss": 0.4781, + "step": 14450 + }, + { + "epoch": 0.98, + "learning_rate": 3.404049155286017e-05, + "loss": 0.4575, + "step": 14500 + }, + { + "epoch": 0.98, + "learning_rate": 3.39834933084061e-05, + "loss": 0.4547, + "step": 14550 + }, + { + "epoch": 0.99, + "learning_rate": 3.392649506395203e-05, + "loss": 0.436, + "step": 14600 + }, + { + "epoch": 0.99, + "learning_rate": 3.386949681949796e-05, + "loss": 0.432, + "step": 14650 + }, + { + "epoch": 0.99, + "learning_rate": 3.381249857504389e-05, + "loss": 0.4602, + "step": 14700 + }, + { + "epoch": 1.0, + "learning_rate": 3.3755500330589816e-05, + "loss": 0.4339, + "step": 14750 + }, + { + "epoch": 1.0, + "learning_rate": 3.3698502086135744e-05, + "loss": 0.4424, + "step": 14800 + }, + { + "epoch": 1.0, + "learning_rate": 3.364150384168168e-05, + "loss": 0.4588, + "step": 14850 + }, + { + "epoch": 1.01, + "learning_rate": 3.358450559722761e-05, + "loss": 0.4564, + "step": 14900 + }, + { + "epoch": 1.01, + "learning_rate": 3.3527507352773535e-05, + "loss": 0.4571, + "step": 14950 + }, + { + "epoch": 1.01, + "learning_rate": 3.347050910831946e-05, + "loss": 0.4501, + "step": 15000 + }, + { + "epoch": 1.02, + "learning_rate": 3.341351086386539e-05, + "loss": 0.4238, + "step": 15050 + }, + { + "epoch": 1.02, + "learning_rate": 3.3356512619411326e-05, + "loss": 0.4521, + "step": 15100 + }, + { + "epoch": 1.02, + "learning_rate": 3.3299514374957254e-05, + "loss": 0.4545, + "step": 15150 + }, + { + "epoch": 1.03, + "learning_rate": 3.324251613050318e-05, + "loss": 0.4586, + "step": 15200 + }, + { + "epoch": 1.03, + "learning_rate": 3.318551788604911e-05, + "loss": 0.4197, + "step": 15250 + }, + { + "epoch": 1.03, + "learning_rate": 3.312851964159504e-05, + "loss": 0.4576, + "step": 15300 + }, + { + "epoch": 1.04, + "learning_rate": 3.307152139714097e-05, + "loss": 0.4315, + "step": 15350 + }, + { + "epoch": 1.04, + "learning_rate": 3.30145231526869e-05, + "loss": 0.4138, + "step": 15400 + }, + { + "epoch": 1.04, + "learning_rate": 3.295752490823283e-05, + "loss": 0.4436, + "step": 15450 + }, + { + "epoch": 1.05, + "learning_rate": 3.290052666377876e-05, + "loss": 0.4423, + "step": 15500 + }, + { + "epoch": 1.05, + "learning_rate": 3.2843528419324685e-05, + "loss": 0.4696, + "step": 15550 + }, + { + "epoch": 1.05, + "learning_rate": 3.278653017487061e-05, + "loss": 0.434, + "step": 15600 + }, + { + "epoch": 1.06, + "learning_rate": 3.272953193041655e-05, + "loss": 0.4509, + "step": 15650 + }, + { + "epoch": 1.06, + "learning_rate": 3.2672533685962476e-05, + "loss": 0.4651, + "step": 15700 + }, + { + "epoch": 1.07, + "learning_rate": 3.2615535441508404e-05, + "loss": 0.5966, + "step": 15750 + }, + { + "epoch": 1.07, + "learning_rate": 3.255853719705433e-05, + "loss": 0.4684, + "step": 15800 + }, + { + "epoch": 1.07, + "learning_rate": 3.250153895260026e-05, + "loss": 0.4602, + "step": 15850 + }, + { + "epoch": 1.08, + "learning_rate": 3.2444540708146195e-05, + "loss": 0.4792, + "step": 15900 + }, + { + "epoch": 1.08, + "learning_rate": 3.238754246369212e-05, + "loss": 0.489, + "step": 15950 + }, + { + "epoch": 1.08, + "learning_rate": 3.233054421923805e-05, + "loss": 0.4577, + "step": 16000 + }, + { + "epoch": 1.09, + "learning_rate": 3.227354597478398e-05, + "loss": 0.474, + "step": 16050 + }, + { + "epoch": 1.09, + "learning_rate": 3.221654773032991e-05, + "loss": 0.4908, + "step": 16100 + }, + { + "epoch": 1.09, + "learning_rate": 3.2159549485875835e-05, + "loss": 0.4544, + "step": 16150 + }, + { + "epoch": 1.1, + "learning_rate": 3.2102551241421763e-05, + "loss": 0.4703, + "step": 16200 + }, + { + "epoch": 1.1, + "learning_rate": 3.204555299696769e-05, + "loss": 0.4626, + "step": 16250 + }, + { + "epoch": 1.1, + "learning_rate": 3.198855475251362e-05, + "loss": 0.452, + "step": 16300 + }, + { + "epoch": 1.11, + "learning_rate": 3.1931556508059554e-05, + "loss": 0.463, + "step": 16350 + }, + { + "epoch": 1.11, + "learning_rate": 3.187455826360548e-05, + "loss": 0.4837, + "step": 16400 + }, + { + "epoch": 1.11, + "learning_rate": 3.181756001915141e-05, + "loss": 0.4514, + "step": 16450 + }, + { + "epoch": 1.12, + "learning_rate": 3.176056177469734e-05, + "loss": 0.4509, + "step": 16500 + }, + { + "epoch": 1.12, + "learning_rate": 3.170356353024327e-05, + "loss": 0.4866, + "step": 16550 + }, + { + "epoch": 1.12, + "learning_rate": 3.16465652857892e-05, + "loss": 0.4715, + "step": 16600 + }, + { + "epoch": 1.13, + "learning_rate": 3.158956704133513e-05, + "loss": 0.4567, + "step": 16650 + }, + { + "epoch": 1.13, + "learning_rate": 3.153256879688106e-05, + "loss": 0.4849, + "step": 16700 + }, + { + "epoch": 1.13, + "learning_rate": 3.1475570552426986e-05, + "loss": 0.4728, + "step": 16750 + }, + { + "epoch": 1.14, + "learning_rate": 3.1418572307972914e-05, + "loss": 0.4786, + "step": 16800 + }, + { + "epoch": 1.14, + "learning_rate": 3.136157406351884e-05, + "loss": 0.4524, + "step": 16850 + }, + { + "epoch": 1.14, + "learning_rate": 3.1304575819064777e-05, + "loss": 0.4636, + "step": 16900 + }, + { + "epoch": 1.15, + "learning_rate": 3.1247577574610705e-05, + "loss": 0.4702, + "step": 16950 + }, + { + "epoch": 1.15, + "learning_rate": 3.119057933015663e-05, + "loss": 0.4745, + "step": 17000 + }, + { + "epoch": 1.15, + "learning_rate": 3.113358108570256e-05, + "loss": 0.4628, + "step": 17050 + }, + { + "epoch": 1.16, + "learning_rate": 3.107658284124849e-05, + "loss": 0.4564, + "step": 17100 + }, + { + "epoch": 1.16, + "learning_rate": 3.1019584596794424e-05, + "loss": 0.441, + "step": 17150 + }, + { + "epoch": 1.16, + "learning_rate": 3.096258635234035e-05, + "loss": 0.4533, + "step": 17200 + }, + { + "epoch": 1.17, + "learning_rate": 3.090558810788628e-05, + "loss": 0.4554, + "step": 17250 + }, + { + "epoch": 1.17, + "learning_rate": 3.084858986343221e-05, + "loss": 0.4478, + "step": 17300 + }, + { + "epoch": 1.17, + "learning_rate": 3.0791591618978136e-05, + "loss": 0.4402, + "step": 17350 + }, + { + "epoch": 1.18, + "learning_rate": 3.073459337452407e-05, + "loss": 0.4424, + "step": 17400 + }, + { + "epoch": 1.18, + "learning_rate": 3.067759513007e-05, + "loss": 0.442, + "step": 17450 + }, + { + "epoch": 1.18, + "learning_rate": 3.062059688561593e-05, + "loss": 0.4467, + "step": 17500 + }, + { + "epoch": 1.19, + "learning_rate": 3.0563598641161855e-05, + "loss": 0.4426, + "step": 17550 + }, + { + "epoch": 1.19, + "learning_rate": 3.0506600396707786e-05, + "loss": 0.4461, + "step": 17600 + }, + { + "epoch": 1.19, + "learning_rate": 3.0449602152253714e-05, + "loss": 0.4512, + "step": 17650 + }, + { + "epoch": 1.2, + "learning_rate": 3.0392603907799642e-05, + "loss": 0.4496, + "step": 17700 + }, + { + "epoch": 1.2, + "learning_rate": 3.0335605663345567e-05, + "loss": 0.4622, + "step": 17750 + }, + { + "epoch": 1.2, + "learning_rate": 3.02786074188915e-05, + "loss": 0.4519, + "step": 17800 + }, + { + "epoch": 1.21, + "learning_rate": 3.0221609174437427e-05, + "loss": 0.4418, + "step": 17850 + }, + { + "epoch": 1.21, + "learning_rate": 3.0164610929983355e-05, + "loss": 0.4574, + "step": 17900 + }, + { + "epoch": 1.21, + "learning_rate": 3.0107612685529286e-05, + "loss": 0.4453, + "step": 17950 + }, + { + "epoch": 1.22, + "learning_rate": 3.0050614441075214e-05, + "loss": 0.4571, + "step": 18000 + }, + { + "epoch": 1.22, + "learning_rate": 2.9993616196621142e-05, + "loss": 0.4594, + "step": 18050 + }, + { + "epoch": 1.22, + "learning_rate": 2.9936617952167074e-05, + "loss": 0.4406, + "step": 18100 + }, + { + "epoch": 1.23, + "learning_rate": 2.9879619707713e-05, + "loss": 0.4556, + "step": 18150 + }, + { + "epoch": 1.23, + "learning_rate": 2.9822621463258933e-05, + "loss": 0.4724, + "step": 18200 + }, + { + "epoch": 1.23, + "learning_rate": 2.976562321880486e-05, + "loss": 0.4933, + "step": 18250 + }, + { + "epoch": 1.24, + "learning_rate": 2.970862497435079e-05, + "loss": 0.469, + "step": 18300 + }, + { + "epoch": 1.24, + "learning_rate": 2.965162672989672e-05, + "loss": 0.4717, + "step": 18350 + }, + { + "epoch": 1.24, + "learning_rate": 2.959462848544265e-05, + "loss": 0.4661, + "step": 18400 + }, + { + "epoch": 1.25, + "learning_rate": 2.953763024098858e-05, + "loss": 0.4688, + "step": 18450 + }, + { + "epoch": 1.25, + "learning_rate": 2.9480631996534508e-05, + "loss": 0.4663, + "step": 18500 + }, + { + "epoch": 1.25, + "learning_rate": 2.9423633752080436e-05, + "loss": 0.4566, + "step": 18550 + }, + { + "epoch": 1.26, + "learning_rate": 2.9366635507626368e-05, + "loss": 0.4506, + "step": 18600 + }, + { + "epoch": 1.26, + "learning_rate": 2.9309637263172296e-05, + "loss": 0.474, + "step": 18650 + }, + { + "epoch": 1.26, + "learning_rate": 2.9252639018718224e-05, + "loss": 0.4725, + "step": 18700 + }, + { + "epoch": 1.27, + "learning_rate": 2.9195640774264155e-05, + "loss": 0.4634, + "step": 18750 + }, + { + "epoch": 1.27, + "learning_rate": 2.9138642529810083e-05, + "loss": 0.4481, + "step": 18800 + }, + { + "epoch": 1.27, + "learning_rate": 2.9081644285356015e-05, + "loss": 0.4626, + "step": 18850 + }, + { + "epoch": 1.28, + "learning_rate": 2.9024646040901943e-05, + "loss": 0.481, + "step": 18900 + }, + { + "epoch": 1.28, + "learning_rate": 2.896764779644787e-05, + "loss": 0.4722, + "step": 18950 + }, + { + "epoch": 1.28, + "learning_rate": 2.8910649551993802e-05, + "loss": 0.442, + "step": 19000 + }, + { + "epoch": 1.29, + "learning_rate": 2.885365130753973e-05, + "loss": 0.4898, + "step": 19050 + }, + { + "epoch": 1.29, + "learning_rate": 2.8796653063085662e-05, + "loss": 0.4528, + "step": 19100 + }, + { + "epoch": 1.3, + "learning_rate": 2.873965481863159e-05, + "loss": 0.4724, + "step": 19150 + }, + { + "epoch": 1.3, + "learning_rate": 2.8682656574177518e-05, + "loss": 0.4491, + "step": 19200 + }, + { + "epoch": 1.3, + "learning_rate": 2.862565832972345e-05, + "loss": 0.4485, + "step": 19250 + }, + { + "epoch": 1.31, + "learning_rate": 2.8568660085269377e-05, + "loss": 0.4482, + "step": 19300 + }, + { + "epoch": 1.31, + "learning_rate": 2.8511661840815305e-05, + "loss": 0.4829, + "step": 19350 + }, + { + "epoch": 1.31, + "learning_rate": 2.845466359636123e-05, + "loss": 0.4445, + "step": 19400 + }, + { + "epoch": 1.32, + "learning_rate": 2.839766535190716e-05, + "loss": 0.4405, + "step": 19450 + }, + { + "epoch": 1.32, + "learning_rate": 2.834066710745309e-05, + "loss": 0.4384, + "step": 19500 + }, + { + "epoch": 1.32, + "learning_rate": 2.8283668862999018e-05, + "loss": 0.4467, + "step": 19550 + }, + { + "epoch": 1.33, + "learning_rate": 2.822667061854495e-05, + "loss": 0.4562, + "step": 19600 + }, + { + "epoch": 1.33, + "learning_rate": 2.8169672374090877e-05, + "loss": 0.4699, + "step": 19650 + }, + { + "epoch": 1.33, + "learning_rate": 2.8112674129636805e-05, + "loss": 0.4678, + "step": 19700 + }, + { + "epoch": 1.34, + "learning_rate": 2.8055675885182737e-05, + "loss": 0.4582, + "step": 19750 + }, + { + "epoch": 1.34, + "learning_rate": 2.7998677640728665e-05, + "loss": 0.4573, + "step": 19800 + }, + { + "epoch": 1.34, + "learning_rate": 2.7941679396274596e-05, + "loss": 0.4693, + "step": 19850 + }, + { + "epoch": 1.35, + "learning_rate": 2.7884681151820524e-05, + "loss": 0.4623, + "step": 19900 + }, + { + "epoch": 1.35, + "learning_rate": 2.7827682907366452e-05, + "loss": 0.4467, + "step": 19950 + }, + { + "epoch": 1.35, + "learning_rate": 2.7770684662912384e-05, + "loss": 0.4454, + "step": 20000 + }, + { + "epoch": 1.36, + "learning_rate": 2.771368641845831e-05, + "loss": 0.4502, + "step": 20050 + }, + { + "epoch": 1.36, + "learning_rate": 2.7656688174004243e-05, + "loss": 0.4671, + "step": 20100 + }, + { + "epoch": 1.36, + "learning_rate": 2.759968992955017e-05, + "loss": 0.4675, + "step": 20150 + }, + { + "epoch": 1.37, + "learning_rate": 2.75426916850961e-05, + "loss": 0.4585, + "step": 20200 + }, + { + "epoch": 1.37, + "learning_rate": 2.748569344064203e-05, + "loss": 0.4725, + "step": 20250 + }, + { + "epoch": 1.37, + "learning_rate": 2.742869519618796e-05, + "loss": 0.4883, + "step": 20300 + }, + { + "epoch": 1.38, + "learning_rate": 2.7371696951733887e-05, + "loss": 0.4526, + "step": 20350 + }, + { + "epoch": 1.38, + "learning_rate": 2.7314698707279818e-05, + "loss": 0.432, + "step": 20400 + }, + { + "epoch": 1.38, + "learning_rate": 2.7257700462825746e-05, + "loss": 0.4647, + "step": 20450 + }, + { + "epoch": 1.39, + "learning_rate": 2.7200702218371678e-05, + "loss": 0.4803, + "step": 20500 + }, + { + "epoch": 1.39, + "learning_rate": 2.7143703973917606e-05, + "loss": 0.4536, + "step": 20550 + }, + { + "epoch": 1.39, + "learning_rate": 2.7086705729463534e-05, + "loss": 0.4601, + "step": 20600 + }, + { + "epoch": 1.4, + "learning_rate": 2.7029707485009465e-05, + "loss": 0.4664, + "step": 20650 + }, + { + "epoch": 1.4, + "learning_rate": 2.6972709240555393e-05, + "loss": 0.4715, + "step": 20700 + }, + { + "epoch": 1.4, + "learning_rate": 2.6915710996101325e-05, + "loss": 0.451, + "step": 20750 + }, + { + "epoch": 1.41, + "learning_rate": 2.6858712751647253e-05, + "loss": 0.4567, + "step": 20800 + }, + { + "epoch": 1.41, + "learning_rate": 2.680171450719318e-05, + "loss": 0.4773, + "step": 20850 + }, + { + "epoch": 1.41, + "learning_rate": 2.6744716262739112e-05, + "loss": 0.4746, + "step": 20900 + }, + { + "epoch": 1.42, + "learning_rate": 2.668771801828504e-05, + "loss": 0.4636, + "step": 20950 + }, + { + "epoch": 1.42, + "learning_rate": 2.663071977383097e-05, + "loss": 0.4715, + "step": 21000 + }, + { + "epoch": 1.42, + "learning_rate": 2.6573721529376893e-05, + "loss": 0.4631, + "step": 21050 + }, + { + "epoch": 1.43, + "learning_rate": 2.6516723284922825e-05, + "loss": 0.4626, + "step": 21100 + }, + { + "epoch": 1.43, + "learning_rate": 2.6459725040468753e-05, + "loss": 0.458, + "step": 21150 + }, + { + "epoch": 1.43, + "learning_rate": 2.640272679601468e-05, + "loss": 0.4568, + "step": 21200 + }, + { + "epoch": 1.44, + "learning_rate": 2.6345728551560612e-05, + "loss": 0.4642, + "step": 21250 + }, + { + "epoch": 1.44, + "learning_rate": 2.628873030710654e-05, + "loss": 0.4558, + "step": 21300 + }, + { + "epoch": 1.44, + "learning_rate": 2.6231732062652468e-05, + "loss": 0.4539, + "step": 21350 + }, + { + "epoch": 1.45, + "learning_rate": 2.61747338181984e-05, + "loss": 0.4756, + "step": 21400 + }, + { + "epoch": 1.45, + "learning_rate": 2.6117735573744328e-05, + "loss": 0.4646, + "step": 21450 + }, + { + "epoch": 1.45, + "learning_rate": 2.606073732929026e-05, + "loss": 0.4461, + "step": 21500 + }, + { + "epoch": 1.46, + "learning_rate": 2.6003739084836187e-05, + "loss": 0.4643, + "step": 21550 + }, + { + "epoch": 1.46, + "learning_rate": 2.5946740840382115e-05, + "loss": 0.4728, + "step": 21600 + }, + { + "epoch": 1.46, + "learning_rate": 2.5889742595928047e-05, + "loss": 0.4643, + "step": 21650 + }, + { + "epoch": 1.47, + "learning_rate": 2.5832744351473975e-05, + "loss": 0.4754, + "step": 21700 + }, + { + "epoch": 1.47, + "learning_rate": 2.5775746107019906e-05, + "loss": 0.5094, + "step": 21750 + }, + { + "epoch": 1.47, + "learning_rate": 2.5718747862565834e-05, + "loss": 0.548, + "step": 21800 + }, + { + "epoch": 1.48, + "learning_rate": 2.5661749618111762e-05, + "loss": 0.5629, + "step": 21850 + }, + { + "epoch": 1.48, + "learning_rate": 2.5604751373657694e-05, + "loss": 0.5656, + "step": 21900 + }, + { + "epoch": 1.48, + "learning_rate": 2.5547753129203622e-05, + "loss": 0.5313, + "step": 21950 + }, + { + "epoch": 1.49, + "learning_rate": 2.549075488474955e-05, + "loss": 0.5195, + "step": 22000 + }, + { + "epoch": 1.49, + "learning_rate": 2.543375664029548e-05, + "loss": 0.5275, + "step": 22050 + }, + { + "epoch": 1.49, + "learning_rate": 2.537675839584141e-05, + "loss": 0.5084, + "step": 22100 + }, + { + "epoch": 1.5, + "learning_rate": 2.531976015138734e-05, + "loss": 0.4969, + "step": 22150 + }, + { + "epoch": 1.5, + "learning_rate": 2.526276190693327e-05, + "loss": 0.4924, + "step": 22200 + }, + { + "epoch": 1.5, + "learning_rate": 2.5205763662479197e-05, + "loss": 0.5039, + "step": 22250 + }, + { + "epoch": 1.51, + "learning_rate": 2.5148765418025128e-05, + "loss": 0.4906, + "step": 22300 + }, + { + "epoch": 1.51, + "learning_rate": 2.5091767173571056e-05, + "loss": 0.4634, + "step": 22350 + }, + { + "epoch": 1.51, + "learning_rate": 2.5034768929116988e-05, + "loss": 0.4807, + "step": 22400 + }, + { + "epoch": 1.52, + "learning_rate": 2.4977770684662912e-05, + "loss": 0.4635, + "step": 22450 + }, + { + "epoch": 1.52, + "learning_rate": 2.492077244020884e-05, + "loss": 0.4851, + "step": 22500 + }, + { + "epoch": 1.52, + "learning_rate": 2.4863774195754772e-05, + "loss": 0.4647, + "step": 22550 + }, + { + "epoch": 1.53, + "learning_rate": 2.48067759513007e-05, + "loss": 0.4764, + "step": 22600 + }, + { + "epoch": 1.53, + "learning_rate": 2.474977770684663e-05, + "loss": 0.4688, + "step": 22650 + }, + { + "epoch": 1.54, + "learning_rate": 2.469277946239256e-05, + "loss": 0.4592, + "step": 22700 + }, + { + "epoch": 1.54, + "learning_rate": 2.4635781217938488e-05, + "loss": 0.4809, + "step": 22750 + }, + { + "epoch": 1.54, + "learning_rate": 2.457878297348442e-05, + "loss": 0.4754, + "step": 22800 + }, + { + "epoch": 1.55, + "learning_rate": 2.4521784729030347e-05, + "loss": 0.4727, + "step": 22850 + }, + { + "epoch": 1.55, + "learning_rate": 2.446478648457628e-05, + "loss": 0.4643, + "step": 22900 + }, + { + "epoch": 1.55, + "learning_rate": 2.4407788240122207e-05, + "loss": 0.4628, + "step": 22950 + }, + { + "epoch": 1.56, + "learning_rate": 2.4350789995668135e-05, + "loss": 0.4587, + "step": 23000 + }, + { + "epoch": 1.56, + "learning_rate": 2.4293791751214066e-05, + "loss": 0.4665, + "step": 23050 + }, + { + "epoch": 1.56, + "learning_rate": 2.423679350675999e-05, + "loss": 0.4624, + "step": 23100 + }, + { + "epoch": 1.57, + "learning_rate": 2.4179795262305922e-05, + "loss": 0.4667, + "step": 23150 + }, + { + "epoch": 1.57, + "learning_rate": 2.412279701785185e-05, + "loss": 0.4898, + "step": 23200 + }, + { + "epoch": 1.57, + "learning_rate": 2.4065798773397778e-05, + "loss": 0.4808, + "step": 23250 + }, + { + "epoch": 1.58, + "learning_rate": 2.400880052894371e-05, + "loss": 0.4781, + "step": 23300 + }, + { + "epoch": 1.58, + "learning_rate": 2.3951802284489638e-05, + "loss": 0.4926, + "step": 23350 + }, + { + "epoch": 1.58, + "learning_rate": 2.389480404003557e-05, + "loss": 0.4804, + "step": 23400 + }, + { + "epoch": 1.59, + "learning_rate": 2.3837805795581497e-05, + "loss": 0.4645, + "step": 23450 + }, + { + "epoch": 1.59, + "learning_rate": 2.3780807551127425e-05, + "loss": 0.4867, + "step": 23500 + }, + { + "epoch": 1.59, + "learning_rate": 2.3723809306673357e-05, + "loss": 0.4763, + "step": 23550 + }, + { + "epoch": 1.6, + "learning_rate": 2.3666811062219285e-05, + "loss": 0.4924, + "step": 23600 + }, + { + "epoch": 1.6, + "learning_rate": 2.3609812817765213e-05, + "loss": 0.4916, + "step": 23650 + }, + { + "epoch": 1.6, + "learning_rate": 2.3552814573311144e-05, + "loss": 0.4834, + "step": 23700 + }, + { + "epoch": 1.61, + "learning_rate": 2.3495816328857072e-05, + "loss": 0.4842, + "step": 23750 + }, + { + "epoch": 1.61, + "learning_rate": 2.3438818084403004e-05, + "loss": 0.4999, + "step": 23800 + }, + { + "epoch": 1.61, + "learning_rate": 2.3381819839948932e-05, + "loss": 0.4782, + "step": 23850 + }, + { + "epoch": 1.62, + "learning_rate": 2.332482159549486e-05, + "loss": 0.4867, + "step": 23900 + }, + { + "epoch": 1.62, + "learning_rate": 2.3267823351040788e-05, + "loss": 0.473, + "step": 23950 + }, + { + "epoch": 1.62, + "learning_rate": 2.3210825106586716e-05, + "loss": 0.4925, + "step": 24000 + }, + { + "epoch": 1.63, + "learning_rate": 2.3153826862132647e-05, + "loss": 0.4853, + "step": 24050 + }, + { + "epoch": 1.63, + "learning_rate": 2.3096828617678575e-05, + "loss": 0.4928, + "step": 24100 + }, + { + "epoch": 1.63, + "learning_rate": 2.3039830373224504e-05, + "loss": 0.4842, + "step": 24150 + }, + { + "epoch": 1.64, + "learning_rate": 2.2982832128770435e-05, + "loss": 0.4831, + "step": 24200 + }, + { + "epoch": 1.64, + "learning_rate": 2.2925833884316363e-05, + "loss": 0.4822, + "step": 24250 + }, + { + "epoch": 1.64, + "learning_rate": 2.2868835639862294e-05, + "loss": 0.4623, + "step": 24300 + }, + { + "epoch": 1.65, + "learning_rate": 2.2811837395408223e-05, + "loss": 0.4582, + "step": 24350 + }, + { + "epoch": 1.65, + "learning_rate": 2.275483915095415e-05, + "loss": 0.4681, + "step": 24400 + }, + { + "epoch": 1.65, + "learning_rate": 2.2697840906500082e-05, + "loss": 0.467, + "step": 24450 + }, + { + "epoch": 1.66, + "learning_rate": 2.264084266204601e-05, + "loss": 0.4718, + "step": 24500 + }, + { + "epoch": 1.66, + "learning_rate": 2.258384441759194e-05, + "loss": 0.4712, + "step": 24550 + }, + { + "epoch": 1.66, + "learning_rate": 2.252684617313787e-05, + "loss": 0.472, + "step": 24600 + }, + { + "epoch": 1.67, + "learning_rate": 2.2469847928683798e-05, + "loss": 0.4705, + "step": 24650 + }, + { + "epoch": 1.67, + "learning_rate": 2.241284968422973e-05, + "loss": 0.4553, + "step": 24700 + }, + { + "epoch": 1.67, + "learning_rate": 2.2355851439775654e-05, + "loss": 0.4804, + "step": 24750 + }, + { + "epoch": 1.68, + "learning_rate": 2.2298853195321585e-05, + "loss": 0.4814, + "step": 24800 + }, + { + "epoch": 1.68, + "learning_rate": 2.2241854950867513e-05, + "loss": 0.4678, + "step": 24850 + }, + { + "epoch": 1.68, + "learning_rate": 2.218485670641344e-05, + "loss": 0.4675, + "step": 24900 + }, + { + "epoch": 1.69, + "learning_rate": 2.2127858461959373e-05, + "loss": 0.4655, + "step": 24950 + }, + { + "epoch": 1.69, + "learning_rate": 2.20708602175053e-05, + "loss": 0.4687, + "step": 25000 + }, + { + "epoch": 1.69, + "learning_rate": 2.2013861973051232e-05, + "loss": 0.4589, + "step": 25050 + }, + { + "epoch": 1.7, + "learning_rate": 2.195686372859716e-05, + "loss": 0.4671, + "step": 25100 + }, + { + "epoch": 1.7, + "learning_rate": 2.189986548414309e-05, + "loss": 0.4679, + "step": 25150 + }, + { + "epoch": 1.7, + "learning_rate": 2.184286723968902e-05, + "loss": 0.4652, + "step": 25200 + }, + { + "epoch": 1.71, + "learning_rate": 2.1785868995234948e-05, + "loss": 0.4703, + "step": 25250 + }, + { + "epoch": 1.71, + "learning_rate": 2.1728870750780876e-05, + "loss": 0.4699, + "step": 25300 + }, + { + "epoch": 1.71, + "learning_rate": 2.1671872506326807e-05, + "loss": 0.4767, + "step": 25350 + }, + { + "epoch": 1.72, + "learning_rate": 2.1614874261872735e-05, + "loss": 0.4657, + "step": 25400 + }, + { + "epoch": 1.72, + "learning_rate": 2.1557876017418667e-05, + "loss": 0.4697, + "step": 25450 + }, + { + "epoch": 1.72, + "learning_rate": 2.1500877772964595e-05, + "loss": 0.4573, + "step": 25500 + }, + { + "epoch": 1.73, + "learning_rate": 2.1443879528510523e-05, + "loss": 0.4732, + "step": 25550 + }, + { + "epoch": 1.73, + "learning_rate": 2.138688128405645e-05, + "loss": 0.4723, + "step": 25600 + }, + { + "epoch": 1.73, + "learning_rate": 2.132988303960238e-05, + "loss": 0.483, + "step": 25650 + }, + { + "epoch": 1.74, + "learning_rate": 2.127288479514831e-05, + "loss": 0.4727, + "step": 25700 + }, + { + "epoch": 1.74, + "learning_rate": 2.121588655069424e-05, + "loss": 0.4577, + "step": 25750 + }, + { + "epoch": 1.74, + "learning_rate": 2.115888830624017e-05, + "loss": 0.4557, + "step": 25800 + }, + { + "epoch": 1.75, + "learning_rate": 2.1101890061786098e-05, + "loss": 0.46, + "step": 25850 + }, + { + "epoch": 1.75, + "learning_rate": 2.1044891817332026e-05, + "loss": 0.4819, + "step": 25900 + }, + { + "epoch": 1.75, + "learning_rate": 2.0987893572877958e-05, + "loss": 0.4792, + "step": 25950 + }, + { + "epoch": 1.76, + "learning_rate": 2.0930895328423886e-05, + "loss": 0.4655, + "step": 26000 + }, + { + "epoch": 1.76, + "learning_rate": 2.0873897083969814e-05, + "loss": 0.4756, + "step": 26050 + }, + { + "epoch": 1.77, + "learning_rate": 2.0816898839515745e-05, + "loss": 0.4788, + "step": 26100 + }, + { + "epoch": 1.77, + "learning_rate": 2.0759900595061673e-05, + "loss": 0.4684, + "step": 26150 + }, + { + "epoch": 1.77, + "learning_rate": 2.0702902350607605e-05, + "loss": 0.4458, + "step": 26200 + }, + { + "epoch": 1.78, + "learning_rate": 2.0645904106153533e-05, + "loss": 0.4838, + "step": 26250 + }, + { + "epoch": 1.78, + "learning_rate": 2.058890586169946e-05, + "loss": 0.4629, + "step": 26300 + }, + { + "epoch": 1.78, + "learning_rate": 2.0531907617245392e-05, + "loss": 0.471, + "step": 26350 + }, + { + "epoch": 1.79, + "learning_rate": 2.0474909372791317e-05, + "loss": 0.4667, + "step": 26400 + }, + { + "epoch": 1.79, + "learning_rate": 2.0417911128337248e-05, + "loss": 0.4684, + "step": 26450 + }, + { + "epoch": 1.79, + "learning_rate": 2.0360912883883176e-05, + "loss": 0.4879, + "step": 26500 + }, + { + "epoch": 1.8, + "learning_rate": 2.0303914639429104e-05, + "loss": 0.4789, + "step": 26550 + }, + { + "epoch": 1.8, + "learning_rate": 2.0246916394975036e-05, + "loss": 0.4743, + "step": 26600 + }, + { + "epoch": 1.8, + "learning_rate": 2.0189918150520964e-05, + "loss": 0.4621, + "step": 26650 + }, + { + "epoch": 1.81, + "learning_rate": 2.0132919906066895e-05, + "loss": 0.4463, + "step": 26700 + }, + { + "epoch": 1.81, + "learning_rate": 2.0075921661612823e-05, + "loss": 0.4662, + "step": 26750 + }, + { + "epoch": 1.81, + "learning_rate": 2.001892341715875e-05, + "loss": 0.4968, + "step": 26800 + }, + { + "epoch": 1.82, + "learning_rate": 1.9961925172704683e-05, + "loss": 0.4652, + "step": 26850 + }, + { + "epoch": 1.82, + "learning_rate": 1.990492692825061e-05, + "loss": 0.4772, + "step": 26900 + }, + { + "epoch": 1.82, + "learning_rate": 1.984792868379654e-05, + "loss": 0.4855, + "step": 26950 + }, + { + "epoch": 1.83, + "learning_rate": 1.979093043934247e-05, + "loss": 0.4771, + "step": 27000 + }, + { + "epoch": 1.83, + "learning_rate": 1.97339321948884e-05, + "loss": 0.4758, + "step": 27050 + }, + { + "epoch": 1.83, + "learning_rate": 1.967693395043433e-05, + "loss": 0.4901, + "step": 27100 + }, + { + "epoch": 1.84, + "learning_rate": 1.9619935705980258e-05, + "loss": 0.4627, + "step": 27150 + }, + { + "epoch": 1.84, + "learning_rate": 1.9562937461526186e-05, + "loss": 0.4656, + "step": 27200 + }, + { + "epoch": 1.84, + "learning_rate": 1.9505939217072114e-05, + "loss": 0.4754, + "step": 27250 + }, + { + "epoch": 1.85, + "learning_rate": 1.9448940972618042e-05, + "loss": 0.4963, + "step": 27300 + }, + { + "epoch": 1.85, + "learning_rate": 1.9391942728163973e-05, + "loss": 0.4665, + "step": 27350 + }, + { + "epoch": 1.85, + "learning_rate": 1.93349444837099e-05, + "loss": 0.4733, + "step": 27400 + }, + { + "epoch": 1.86, + "learning_rate": 1.9277946239255833e-05, + "loss": 0.4678, + "step": 27450 + }, + { + "epoch": 1.86, + "learning_rate": 1.922094799480176e-05, + "loss": 0.4889, + "step": 27500 + }, + { + "epoch": 1.86, + "learning_rate": 1.916394975034769e-05, + "loss": 0.4806, + "step": 27550 + }, + { + "epoch": 1.87, + "learning_rate": 1.910695150589362e-05, + "loss": 0.4547, + "step": 27600 + }, + { + "epoch": 1.87, + "learning_rate": 1.904995326143955e-05, + "loss": 0.492, + "step": 27650 + }, + { + "epoch": 1.87, + "learning_rate": 1.8992955016985477e-05, + "loss": 0.4799, + "step": 27700 + }, + { + "epoch": 1.88, + "learning_rate": 1.8935956772531408e-05, + "loss": 0.4741, + "step": 27750 + }, + { + "epoch": 1.88, + "learning_rate": 1.8878958528077336e-05, + "loss": 0.4816, + "step": 27800 + }, + { + "epoch": 1.88, + "learning_rate": 1.8821960283623268e-05, + "loss": 0.4761, + "step": 27850 + }, + { + "epoch": 1.89, + "learning_rate": 1.8764962039169196e-05, + "loss": 0.4596, + "step": 27900 + }, + { + "epoch": 1.89, + "learning_rate": 1.8707963794715124e-05, + "loss": 0.4823, + "step": 27950 + }, + { + "epoch": 1.89, + "learning_rate": 1.8650965550261055e-05, + "loss": 0.4787, + "step": 28000 + }, + { + "epoch": 1.9, + "learning_rate": 1.859396730580698e-05, + "loss": 0.464, + "step": 28050 + }, + { + "epoch": 1.9, + "learning_rate": 1.853696906135291e-05, + "loss": 0.4784, + "step": 28100 + }, + { + "epoch": 1.9, + "learning_rate": 1.847997081689884e-05, + "loss": 0.4931, + "step": 28150 + }, + { + "epoch": 1.91, + "learning_rate": 1.8422972572444767e-05, + "loss": 0.4663, + "step": 28200 + }, + { + "epoch": 1.91, + "learning_rate": 1.83659743279907e-05, + "loss": 0.468, + "step": 28250 + }, + { + "epoch": 1.91, + "learning_rate": 1.8308976083536627e-05, + "loss": 0.4927, + "step": 28300 + }, + { + "epoch": 1.92, + "learning_rate": 1.8251977839082558e-05, + "loss": 0.4759, + "step": 28350 + }, + { + "epoch": 1.92, + "learning_rate": 1.8194979594628486e-05, + "loss": 0.4773, + "step": 28400 + }, + { + "epoch": 1.92, + "learning_rate": 1.8137981350174414e-05, + "loss": 0.4849, + "step": 28450 + }, + { + "epoch": 1.93, + "learning_rate": 1.8080983105720346e-05, + "loss": 0.4894, + "step": 28500 + }, + { + "epoch": 1.93, + "learning_rate": 1.8023984861266274e-05, + "loss": 0.4584, + "step": 28550 + }, + { + "epoch": 1.93, + "learning_rate": 1.7966986616812205e-05, + "loss": 0.4739, + "step": 28600 + }, + { + "epoch": 1.94, + "learning_rate": 1.7909988372358133e-05, + "loss": 0.4971, + "step": 28650 + }, + { + "epoch": 1.94, + "learning_rate": 1.785299012790406e-05, + "loss": 0.4737, + "step": 28700 + }, + { + "epoch": 1.94, + "learning_rate": 1.7795991883449993e-05, + "loss": 0.4674, + "step": 28750 + }, + { + "epoch": 1.95, + "learning_rate": 1.773899363899592e-05, + "loss": 0.5095, + "step": 28800 + }, + { + "epoch": 1.95, + "learning_rate": 1.768199539454185e-05, + "loss": 0.6674, + "step": 28850 + }, + { + "epoch": 1.95, + "learning_rate": 1.7624997150087777e-05, + "loss": 0.683, + "step": 28900 + }, + { + "epoch": 1.96, + "learning_rate": 1.7567998905633705e-05, + "loss": 0.554, + "step": 28950 + }, + { + "epoch": 1.96, + "learning_rate": 1.7511000661179637e-05, + "loss": 0.529, + "step": 29000 + }, + { + "epoch": 1.96, + "learning_rate": 1.7454002416725565e-05, + "loss": 0.5206, + "step": 29050 + }, + { + "epoch": 1.97, + "learning_rate": 1.7397004172271496e-05, + "loss": 0.5223, + "step": 29100 + }, + { + "epoch": 1.97, + "learning_rate": 1.7340005927817424e-05, + "loss": 0.5134, + "step": 29150 + }, + { + "epoch": 1.97, + "learning_rate": 1.7283007683363352e-05, + "loss": 0.5048, + "step": 29200 + }, + { + "epoch": 1.98, + "learning_rate": 1.7226009438909284e-05, + "loss": 0.5268, + "step": 29250 + }, + { + "epoch": 1.98, + "learning_rate": 1.716901119445521e-05, + "loss": 0.5021, + "step": 29300 + }, + { + "epoch": 1.98, + "learning_rate": 1.711201295000114e-05, + "loss": 0.507, + "step": 29350 + }, + { + "epoch": 1.99, + "learning_rate": 1.705501470554707e-05, + "loss": 0.5111, + "step": 29400 + }, + { + "epoch": 1.99, + "learning_rate": 1.6998016461093e-05, + "loss": 0.5057, + "step": 29450 + }, + { + "epoch": 1.99, + "learning_rate": 1.694101821663893e-05, + "loss": 0.5187, + "step": 29500 + }, + { + "epoch": 2.0, + "learning_rate": 1.688401997218486e-05, + "loss": 0.5258, + "step": 29550 + }, + { + "epoch": 2.0, + "learning_rate": 1.6827021727730787e-05, + "loss": 0.5441, + "step": 29600 + }, + { + "epoch": 2.01, + "learning_rate": 1.6770023483276718e-05, + "loss": 0.513, + "step": 29650 + }, + { + "epoch": 2.01, + "learning_rate": 1.6713025238822643e-05, + "loss": 0.5211, + "step": 29700 + }, + { + "epoch": 2.01, + "learning_rate": 1.6656026994368574e-05, + "loss": 0.5033, + "step": 29750 + }, + { + "epoch": 2.02, + "learning_rate": 1.6599028749914502e-05, + "loss": 0.5205, + "step": 29800 + }, + { + "epoch": 2.02, + "learning_rate": 1.654203050546043e-05, + "loss": 0.5044, + "step": 29850 + }, + { + "epoch": 2.02, + "learning_rate": 1.6485032261006362e-05, + "loss": 0.5123, + "step": 29900 + }, + { + "epoch": 2.03, + "learning_rate": 1.642803401655229e-05, + "loss": 0.5146, + "step": 29950 + }, + { + "epoch": 2.03, + "learning_rate": 1.637103577209822e-05, + "loss": 0.5034, + "step": 30000 + }, + { + "epoch": 2.03, + "learning_rate": 1.631403752764415e-05, + "loss": 0.5002, + "step": 30050 + }, + { + "epoch": 2.04, + "learning_rate": 1.6257039283190077e-05, + "loss": 0.5213, + "step": 30100 + }, + { + "epoch": 2.04, + "learning_rate": 1.620004103873601e-05, + "loss": 0.5083, + "step": 30150 + }, + { + "epoch": 2.04, + "learning_rate": 1.6143042794281937e-05, + "loss": 0.5096, + "step": 30200 + }, + { + "epoch": 2.05, + "learning_rate": 1.608604454982787e-05, + "loss": 0.5147, + "step": 30250 + }, + { + "epoch": 2.05, + "learning_rate": 1.6029046305373796e-05, + "loss": 0.4939, + "step": 30300 + }, + { + "epoch": 2.05, + "learning_rate": 1.5972048060919724e-05, + "loss": 0.5027, + "step": 30350 + }, + { + "epoch": 2.06, + "learning_rate": 1.5915049816465656e-05, + "loss": 0.5152, + "step": 30400 + }, + { + "epoch": 2.06, + "learning_rate": 1.5858051572011584e-05, + "loss": 0.517, + "step": 30450 + }, + { + "epoch": 2.06, + "learning_rate": 1.5801053327557512e-05, + "loss": 0.4968, + "step": 30500 + }, + { + "epoch": 2.07, + "learning_rate": 1.574405508310344e-05, + "loss": 0.5201, + "step": 30550 + }, + { + "epoch": 2.07, + "learning_rate": 1.5687056838649368e-05, + "loss": 0.5068, + "step": 30600 + }, + { + "epoch": 2.07, + "learning_rate": 1.56300585941953e-05, + "loss": 0.4969, + "step": 30650 + }, + { + "epoch": 2.08, + "learning_rate": 1.5573060349741228e-05, + "loss": 0.5006, + "step": 30700 + }, + { + "epoch": 2.08, + "learning_rate": 1.551606210528716e-05, + "loss": 0.5208, + "step": 30750 + }, + { + "epoch": 2.08, + "learning_rate": 1.5459063860833087e-05, + "loss": 0.5175, + "step": 30800 + }, + { + "epoch": 2.09, + "learning_rate": 1.5402065616379015e-05, + "loss": 0.4906, + "step": 30850 + }, + { + "epoch": 2.09, + "learning_rate": 1.5345067371924947e-05, + "loss": 0.5021, + "step": 30900 + }, + { + "epoch": 2.09, + "learning_rate": 1.5288069127470875e-05, + "loss": 0.5058, + "step": 30950 + }, + { + "epoch": 2.1, + "learning_rate": 1.5231070883016804e-05, + "loss": 0.5035, + "step": 31000 + }, + { + "epoch": 2.1, + "learning_rate": 1.5174072638562734e-05, + "loss": 0.4971, + "step": 31050 + }, + { + "epoch": 2.1, + "learning_rate": 1.5117074394108662e-05, + "loss": 0.5042, + "step": 31100 + }, + { + "epoch": 2.11, + "learning_rate": 1.5060076149654592e-05, + "loss": 0.5126, + "step": 31150 + }, + { + "epoch": 2.11, + "learning_rate": 1.5003077905200522e-05, + "loss": 0.5091, + "step": 31200 + }, + { + "epoch": 2.11, + "learning_rate": 1.4946079660746451e-05, + "loss": 0.5054, + "step": 31250 + }, + { + "epoch": 2.12, + "learning_rate": 1.488908141629238e-05, + "loss": 0.5025, + "step": 31300 + }, + { + "epoch": 2.12, + "learning_rate": 1.4832083171838308e-05, + "loss": 0.5099, + "step": 31350 + }, + { + "epoch": 2.12, + "learning_rate": 1.4775084927384236e-05, + "loss": 0.4915, + "step": 31400 + }, + { + "epoch": 2.13, + "learning_rate": 1.4718086682930165e-05, + "loss": 0.5073, + "step": 31450 + }, + { + "epoch": 2.13, + "learning_rate": 1.4661088438476095e-05, + "loss": 0.5031, + "step": 31500 + }, + { + "epoch": 2.13, + "learning_rate": 1.4604090194022025e-05, + "loss": 0.5181, + "step": 31550 + }, + { + "epoch": 2.14, + "learning_rate": 1.4547091949567953e-05, + "loss": 0.5257, + "step": 31600 + }, + { + "epoch": 2.14, + "learning_rate": 1.4490093705113883e-05, + "loss": 0.5142, + "step": 31650 + }, + { + "epoch": 2.14, + "learning_rate": 1.4433095460659812e-05, + "loss": 0.5039, + "step": 31700 + }, + { + "epoch": 2.15, + "learning_rate": 1.4376097216205742e-05, + "loss": 0.5099, + "step": 31750 + }, + { + "epoch": 2.15, + "learning_rate": 1.431909897175167e-05, + "loss": 0.5162, + "step": 31800 + }, + { + "epoch": 2.15, + "learning_rate": 1.42621007272976e-05, + "loss": 0.505, + "step": 31850 + }, + { + "epoch": 2.16, + "learning_rate": 1.420510248284353e-05, + "loss": 0.503, + "step": 31900 + }, + { + "epoch": 2.16, + "learning_rate": 1.414810423838946e-05, + "loss": 0.5164, + "step": 31950 + }, + { + "epoch": 2.16, + "learning_rate": 1.409110599393539e-05, + "loss": 0.5147, + "step": 32000 + }, + { + "epoch": 2.17, + "learning_rate": 1.4034107749481317e-05, + "loss": 0.4865, + "step": 32050 + }, + { + "epoch": 2.17, + "learning_rate": 1.3977109505027247e-05, + "loss": 0.5074, + "step": 32100 + }, + { + "epoch": 2.17, + "learning_rate": 1.3920111260573173e-05, + "loss": 0.4984, + "step": 32150 + }, + { + "epoch": 2.18, + "learning_rate": 1.3863113016119103e-05, + "loss": 0.4858, + "step": 32200 + }, + { + "epoch": 2.18, + "learning_rate": 1.3806114771665033e-05, + "loss": 0.5167, + "step": 32250 + }, + { + "epoch": 2.18, + "learning_rate": 1.3749116527210963e-05, + "loss": 0.5001, + "step": 32300 + }, + { + "epoch": 2.19, + "learning_rate": 1.369211828275689e-05, + "loss": 0.5178, + "step": 32350 + }, + { + "epoch": 2.19, + "learning_rate": 1.363512003830282e-05, + "loss": 0.5179, + "step": 32400 + }, + { + "epoch": 2.19, + "learning_rate": 1.357812179384875e-05, + "loss": 0.4965, + "step": 32450 + }, + { + "epoch": 2.2, + "learning_rate": 1.352112354939468e-05, + "loss": 0.503, + "step": 32500 + }, + { + "epoch": 2.2, + "learning_rate": 1.3464125304940608e-05, + "loss": 0.5151, + "step": 32550 + }, + { + "epoch": 2.2, + "learning_rate": 1.3407127060486538e-05, + "loss": 0.4917, + "step": 32600 + }, + { + "epoch": 2.21, + "learning_rate": 1.3350128816032467e-05, + "loss": 0.4839, + "step": 32650 + }, + { + "epoch": 2.21, + "learning_rate": 1.3293130571578397e-05, + "loss": 0.5002, + "step": 32700 + }, + { + "epoch": 2.21, + "learning_rate": 1.3236132327124325e-05, + "loss": 0.5082, + "step": 32750 + }, + { + "epoch": 2.22, + "learning_rate": 1.3179134082670255e-05, + "loss": 0.5221, + "step": 32800 + }, + { + "epoch": 2.22, + "learning_rate": 1.3122135838216185e-05, + "loss": 0.5093, + "step": 32850 + }, + { + "epoch": 2.22, + "learning_rate": 1.3065137593762114e-05, + "loss": 0.5144, + "step": 32900 + }, + { + "epoch": 2.23, + "learning_rate": 1.300813934930804e-05, + "loss": 0.4986, + "step": 32950 + }, + { + "epoch": 2.23, + "learning_rate": 1.295114110485397e-05, + "loss": 0.5137, + "step": 33000 + }, + { + "epoch": 2.24, + "learning_rate": 1.2894142860399899e-05, + "loss": 0.5076, + "step": 33050 + }, + { + "epoch": 2.24, + "learning_rate": 1.2837144615945828e-05, + "loss": 0.5125, + "step": 33100 + }, + { + "epoch": 2.24, + "learning_rate": 1.2780146371491758e-05, + "loss": 0.4967, + "step": 33150 + }, + { + "epoch": 2.25, + "learning_rate": 1.2723148127037688e-05, + "loss": 0.5228, + "step": 33200 + }, + { + "epoch": 2.25, + "learning_rate": 1.2666149882583616e-05, + "loss": 0.4989, + "step": 33250 + }, + { + "epoch": 2.25, + "learning_rate": 1.2609151638129546e-05, + "loss": 0.5216, + "step": 33300 + }, + { + "epoch": 2.26, + "learning_rate": 1.2552153393675475e-05, + "loss": 0.5039, + "step": 33350 + }, + { + "epoch": 2.26, + "learning_rate": 1.2495155149221405e-05, + "loss": 0.5053, + "step": 33400 + }, + { + "epoch": 2.26, + "learning_rate": 1.2438156904767335e-05, + "loss": 0.5013, + "step": 33450 + }, + { + "epoch": 2.27, + "learning_rate": 1.2381158660313263e-05, + "loss": 0.513, + "step": 33500 + }, + { + "epoch": 2.27, + "learning_rate": 1.2324160415859193e-05, + "loss": 0.4902, + "step": 33550 + }, + { + "epoch": 2.27, + "learning_rate": 1.226716217140512e-05, + "loss": 0.519, + "step": 33600 + }, + { + "epoch": 2.28, + "learning_rate": 1.221016392695105e-05, + "loss": 0.4956, + "step": 33650 + }, + { + "epoch": 2.28, + "learning_rate": 1.215316568249698e-05, + "loss": 0.5088, + "step": 33700 + }, + { + "epoch": 2.28, + "learning_rate": 1.2096167438042908e-05, + "loss": 0.5174, + "step": 33750 + }, + { + "epoch": 2.29, + "learning_rate": 1.2039169193588838e-05, + "loss": 0.5183, + "step": 33800 + }, + { + "epoch": 2.29, + "learning_rate": 1.1982170949134768e-05, + "loss": 0.4934, + "step": 33850 + }, + { + "epoch": 2.29, + "learning_rate": 1.1925172704680698e-05, + "loss": 0.5109, + "step": 33900 + }, + { + "epoch": 2.3, + "learning_rate": 1.1868174460226626e-05, + "loss": 0.5163, + "step": 33950 + }, + { + "epoch": 2.3, + "learning_rate": 1.1811176215772554e-05, + "loss": 0.5088, + "step": 34000 + }, + { + "epoch": 2.3, + "learning_rate": 1.1754177971318483e-05, + "loss": 0.5143, + "step": 34050 + }, + { + "epoch": 2.31, + "learning_rate": 1.1697179726864413e-05, + "loss": 0.5087, + "step": 34100 + }, + { + "epoch": 2.31, + "learning_rate": 1.1640181482410343e-05, + "loss": 0.5253, + "step": 34150 + }, + { + "epoch": 2.31, + "learning_rate": 1.1583183237956271e-05, + "loss": 0.5104, + "step": 34200 + }, + { + "epoch": 2.32, + "learning_rate": 1.15261849935022e-05, + "loss": 0.513, + "step": 34250 + }, + { + "epoch": 2.32, + "learning_rate": 1.146918674904813e-05, + "loss": 0.5103, + "step": 34300 + }, + { + "epoch": 2.32, + "learning_rate": 1.141218850459406e-05, + "loss": 0.4975, + "step": 34350 + }, + { + "epoch": 2.33, + "learning_rate": 1.1355190260139988e-05, + "loss": 0.5092, + "step": 34400 + }, + { + "epoch": 2.33, + "learning_rate": 1.1298192015685916e-05, + "loss": 0.5011, + "step": 34450 + }, + { + "epoch": 2.33, + "learning_rate": 1.1241193771231846e-05, + "loss": 0.5027, + "step": 34500 + }, + { + "epoch": 2.34, + "learning_rate": 1.1184195526777776e-05, + "loss": 0.5034, + "step": 34550 + }, + { + "epoch": 2.34, + "learning_rate": 1.1127197282323706e-05, + "loss": 0.4982, + "step": 34600 + }, + { + "epoch": 2.34, + "learning_rate": 1.1070199037869634e-05, + "loss": 0.5037, + "step": 34650 + }, + { + "epoch": 2.35, + "learning_rate": 1.1013200793415563e-05, + "loss": 0.4958, + "step": 34700 + }, + { + "epoch": 2.35, + "learning_rate": 1.0956202548961493e-05, + "loss": 0.5199, + "step": 34750 + }, + { + "epoch": 2.35, + "learning_rate": 1.0899204304507421e-05, + "loss": 0.5108, + "step": 34800 + }, + { + "epoch": 2.36, + "learning_rate": 1.0842206060053351e-05, + "loss": 0.505, + "step": 34850 + }, + { + "epoch": 2.36, + "learning_rate": 1.0785207815599279e-05, + "loss": 0.5053, + "step": 34900 + }, + { + "epoch": 2.36, + "learning_rate": 1.0728209571145209e-05, + "loss": 0.5072, + "step": 34950 + }, + { + "epoch": 2.37, + "learning_rate": 1.0671211326691138e-05, + "loss": 0.523, + "step": 35000 + }, + { + "epoch": 2.37, + "learning_rate": 1.0614213082237068e-05, + "loss": 0.5121, + "step": 35050 + }, + { + "epoch": 2.37, + "learning_rate": 1.0557214837782998e-05, + "loss": 0.5072, + "step": 35100 + }, + { + "epoch": 2.38, + "learning_rate": 1.0500216593328926e-05, + "loss": 0.4977, + "step": 35150 + }, + { + "epoch": 2.38, + "learning_rate": 1.0443218348874856e-05, + "loss": 0.5165, + "step": 35200 + }, + { + "epoch": 2.38, + "learning_rate": 1.0386220104420784e-05, + "loss": 0.5042, + "step": 35250 + }, + { + "epoch": 2.39, + "learning_rate": 1.0329221859966714e-05, + "loss": 0.516, + "step": 35300 + }, + { + "epoch": 2.39, + "learning_rate": 1.0272223615512643e-05, + "loss": 0.4964, + "step": 35350 + }, + { + "epoch": 2.39, + "learning_rate": 1.0215225371058571e-05, + "loss": 0.4903, + "step": 35400 + }, + { + "epoch": 2.4, + "learning_rate": 1.0158227126604501e-05, + "loss": 0.5009, + "step": 35450 + }, + { + "epoch": 2.4, + "learning_rate": 1.010122888215043e-05, + "loss": 0.5015, + "step": 35500 + }, + { + "epoch": 2.4, + "learning_rate": 1.004423063769636e-05, + "loss": 0.4986, + "step": 35550 + }, + { + "epoch": 2.41, + "learning_rate": 9.987232393242289e-06, + "loss": 0.5018, + "step": 35600 + }, + { + "epoch": 2.41, + "learning_rate": 9.930234148788217e-06, + "loss": 0.5197, + "step": 35650 + }, + { + "epoch": 2.41, + "learning_rate": 9.873235904334146e-06, + "loss": 0.5129, + "step": 35700 + }, + { + "epoch": 2.42, + "learning_rate": 9.816237659880076e-06, + "loss": 0.5091, + "step": 35750 + }, + { + "epoch": 2.42, + "learning_rate": 9.759239415426006e-06, + "loss": 0.5197, + "step": 35800 + }, + { + "epoch": 2.42, + "learning_rate": 9.702241170971934e-06, + "loss": 0.4794, + "step": 35850 + }, + { + "epoch": 2.43, + "learning_rate": 9.645242926517864e-06, + "loss": 0.5027, + "step": 35900 + }, + { + "epoch": 2.43, + "learning_rate": 9.588244682063793e-06, + "loss": 0.5051, + "step": 35950 + }, + { + "epoch": 2.43, + "learning_rate": 9.531246437609723e-06, + "loss": 0.5007, + "step": 36000 + }, + { + "epoch": 2.44, + "learning_rate": 9.474248193155651e-06, + "loss": 0.5028, + "step": 36050 + }, + { + "epoch": 2.44, + "learning_rate": 9.41724994870158e-06, + "loss": 0.5173, + "step": 36100 + }, + { + "epoch": 2.44, + "learning_rate": 9.360251704247509e-06, + "loss": 0.4957, + "step": 36150 + }, + { + "epoch": 2.45, + "learning_rate": 9.303253459793439e-06, + "loss": 0.4869, + "step": 36200 + }, + { + "epoch": 2.45, + "learning_rate": 9.246255215339369e-06, + "loss": 0.5006, + "step": 36250 + }, + { + "epoch": 2.45, + "learning_rate": 9.189256970885297e-06, + "loss": 0.5063, + "step": 36300 + }, + { + "epoch": 2.46, + "learning_rate": 9.132258726431226e-06, + "loss": 0.4872, + "step": 36350 + }, + { + "epoch": 2.46, + "learning_rate": 9.075260481977156e-06, + "loss": 0.5157, + "step": 36400 + }, + { + "epoch": 2.47, + "learning_rate": 9.018262237523084e-06, + "loss": 0.4927, + "step": 36450 + }, + { + "epoch": 2.47, + "learning_rate": 8.961263993069014e-06, + "loss": 0.5193, + "step": 36500 + }, + { + "epoch": 2.47, + "learning_rate": 8.904265748614942e-06, + "loss": 0.4993, + "step": 36550 + }, + { + "epoch": 2.48, + "learning_rate": 8.847267504160872e-06, + "loss": 0.5108, + "step": 36600 + }, + { + "epoch": 2.48, + "learning_rate": 8.790269259706801e-06, + "loss": 0.4985, + "step": 36650 + }, + { + "epoch": 2.48, + "learning_rate": 8.733271015252731e-06, + "loss": 0.5015, + "step": 36700 + }, + { + "epoch": 2.49, + "learning_rate": 8.676272770798661e-06, + "loss": 0.4881, + "step": 36750 + }, + { + "epoch": 2.49, + "learning_rate": 8.619274526344589e-06, + "loss": 0.4971, + "step": 36800 + }, + { + "epoch": 2.49, + "learning_rate": 8.562276281890517e-06, + "loss": 0.4989, + "step": 36850 + }, + { + "epoch": 2.5, + "learning_rate": 8.505278037436447e-06, + "loss": 0.505, + "step": 36900 + }, + { + "epoch": 2.5, + "learning_rate": 8.448279792982377e-06, + "loss": 0.5069, + "step": 36950 + }, + { + "epoch": 2.5, + "learning_rate": 8.391281548528306e-06, + "loss": 0.5175, + "step": 37000 + }, + { + "epoch": 2.51, + "learning_rate": 8.334283304074234e-06, + "loss": 0.4933, + "step": 37050 + }, + { + "epoch": 2.51, + "learning_rate": 8.277285059620164e-06, + "loss": 0.5073, + "step": 37100 + }, + { + "epoch": 2.51, + "learning_rate": 8.220286815166094e-06, + "loss": 0.5014, + "step": 37150 + }, + { + "epoch": 2.52, + "learning_rate": 8.163288570712024e-06, + "loss": 0.5197, + "step": 37200 + }, + { + "epoch": 2.52, + "learning_rate": 8.106290326257952e-06, + "loss": 0.5035, + "step": 37250 + }, + { + "epoch": 2.52, + "learning_rate": 8.04929208180388e-06, + "loss": 0.4902, + "step": 37300 + }, + { + "epoch": 2.53, + "learning_rate": 7.99229383734981e-06, + "loss": 0.5123, + "step": 37350 + }, + { + "epoch": 2.53, + "learning_rate": 7.93529559289574e-06, + "loss": 0.5182, + "step": 37400 + }, + { + "epoch": 2.53, + "learning_rate": 7.878297348441669e-06, + "loss": 0.5093, + "step": 37450 + }, + { + "epoch": 2.54, + "learning_rate": 7.821299103987597e-06, + "loss": 0.4936, + "step": 37500 + }, + { + "epoch": 2.54, + "learning_rate": 7.764300859533527e-06, + "loss": 0.4883, + "step": 37550 + }, + { + "epoch": 2.54, + "learning_rate": 7.707302615079457e-06, + "loss": 0.4968, + "step": 37600 + }, + { + "epoch": 2.55, + "learning_rate": 7.650304370625386e-06, + "loss": 0.4925, + "step": 37650 + }, + { + "epoch": 2.55, + "learning_rate": 7.5933061261713135e-06, + "loss": 0.5, + "step": 37700 + }, + { + "epoch": 2.55, + "learning_rate": 7.536307881717243e-06, + "loss": 0.5009, + "step": 37750 + }, + { + "epoch": 2.56, + "learning_rate": 7.479309637263172e-06, + "loss": 0.5161, + "step": 37800 + }, + { + "epoch": 2.56, + "learning_rate": 7.422311392809102e-06, + "loss": 0.5119, + "step": 37850 + }, + { + "epoch": 2.56, + "learning_rate": 7.365313148355031e-06, + "loss": 0.4972, + "step": 37900 + }, + { + "epoch": 2.57, + "learning_rate": 7.3083149039009605e-06, + "loss": 0.5116, + "step": 37950 + }, + { + "epoch": 2.57, + "learning_rate": 7.251316659446889e-06, + "loss": 0.5006, + "step": 38000 + }, + { + "epoch": 2.57, + "learning_rate": 7.194318414992819e-06, + "loss": 0.5207, + "step": 38050 + }, + { + "epoch": 2.58, + "learning_rate": 7.137320170538747e-06, + "loss": 0.505, + "step": 38100 + }, + { + "epoch": 2.58, + "learning_rate": 7.080321926084676e-06, + "loss": 0.5112, + "step": 38150 + }, + { + "epoch": 2.58, + "learning_rate": 7.023323681630606e-06, + "loss": 0.5169, + "step": 38200 + }, + { + "epoch": 2.59, + "learning_rate": 6.966325437176536e-06, + "loss": 0.5127, + "step": 38250 + }, + { + "epoch": 2.59, + "learning_rate": 6.9093271927224645e-06, + "loss": 0.5065, + "step": 38300 + }, + { + "epoch": 2.59, + "learning_rate": 6.852328948268394e-06, + "loss": 0.4879, + "step": 38350 + }, + { + "epoch": 2.6, + "learning_rate": 6.795330703814323e-06, + "loss": 0.5281, + "step": 38400 + }, + { + "epoch": 2.6, + "learning_rate": 6.738332459360253e-06, + "loss": 0.5051, + "step": 38450 + }, + { + "epoch": 2.6, + "learning_rate": 6.681334214906181e-06, + "loss": 0.5217, + "step": 38500 + }, + { + "epoch": 2.61, + "learning_rate": 6.62433597045211e-06, + "loss": 0.4942, + "step": 38550 + }, + { + "epoch": 2.61, + "learning_rate": 6.56733772599804e-06, + "loss": 0.5241, + "step": 38600 + }, + { + "epoch": 2.61, + "learning_rate": 6.5103394815439685e-06, + "loss": 0.4991, + "step": 38650 + }, + { + "epoch": 2.62, + "learning_rate": 6.453341237089898e-06, + "loss": 0.5034, + "step": 38700 + }, + { + "epoch": 2.62, + "learning_rate": 6.396342992635827e-06, + "loss": 0.5008, + "step": 38750 + }, + { + "epoch": 2.62, + "learning_rate": 6.339344748181757e-06, + "loss": 0.5183, + "step": 38800 + }, + { + "epoch": 2.63, + "learning_rate": 6.282346503727686e-06, + "loss": 0.5006, + "step": 38850 + }, + { + "epoch": 2.63, + "learning_rate": 6.225348259273615e-06, + "loss": 0.4936, + "step": 38900 + }, + { + "epoch": 2.63, + "learning_rate": 6.1683500148195444e-06, + "loss": 0.4758, + "step": 38950 + }, + { + "epoch": 2.64, + "learning_rate": 6.111351770365473e-06, + "loss": 0.4654, + "step": 39000 + }, + { + "epoch": 2.64, + "learning_rate": 6.054353525911402e-06, + "loss": 0.4584, + "step": 39050 + }, + { + "epoch": 2.64, + "learning_rate": 5.997355281457331e-06, + "loss": 0.4929, + "step": 39100 + }, + { + "epoch": 2.65, + "learning_rate": 5.940357037003261e-06, + "loss": 0.4793, + "step": 39150 + }, + { + "epoch": 2.65, + "learning_rate": 5.88335879254919e-06, + "loss": 0.4625, + "step": 39200 + }, + { + "epoch": 2.65, + "learning_rate": 5.826360548095119e-06, + "loss": 0.462, + "step": 39250 + }, + { + "epoch": 2.66, + "learning_rate": 5.7693623036410484e-06, + "loss": 0.4598, + "step": 39300 + }, + { + "epoch": 2.66, + "learning_rate": 5.712364059186977e-06, + "loss": 0.4778, + "step": 39350 + }, + { + "epoch": 2.66, + "learning_rate": 5.655365814732907e-06, + "loss": 0.4707, + "step": 39400 + }, + { + "epoch": 2.67, + "learning_rate": 5.598367570278835e-06, + "loss": 0.4468, + "step": 39450 + }, + { + "epoch": 2.67, + "learning_rate": 5.541369325824765e-06, + "loss": 0.454, + "step": 39500 + }, + { + "epoch": 2.67, + "learning_rate": 5.484371081370694e-06, + "loss": 0.4454, + "step": 39550 + }, + { + "epoch": 2.68, + "learning_rate": 5.4273728369166235e-06, + "loss": 0.4493, + "step": 39600 + }, + { + "epoch": 2.68, + "learning_rate": 5.3703745924625524e-06, + "loss": 0.451, + "step": 39650 + }, + { + "epoch": 2.68, + "learning_rate": 5.313376348008481e-06, + "loss": 0.4515, + "step": 39700 + }, + { + "epoch": 2.69, + "learning_rate": 5.256378103554411e-06, + "loss": 0.4714, + "step": 39750 + }, + { + "epoch": 2.69, + "learning_rate": 5.19937985910034e-06, + "loss": 0.4658, + "step": 39800 + }, + { + "epoch": 2.69, + "learning_rate": 5.142381614646269e-06, + "loss": 0.4694, + "step": 39850 + }, + { + "epoch": 2.7, + "learning_rate": 5.085383370192199e-06, + "loss": 0.4487, + "step": 39900 + }, + { + "epoch": 2.7, + "learning_rate": 5.0283851257381275e-06, + "loss": 0.4528, + "step": 39950 + }, + { + "epoch": 2.71, + "learning_rate": 4.971386881284057e-06, + "loss": 0.4515, + "step": 40000 + }, + { + "epoch": 2.71, + "learning_rate": 4.914388636829985e-06, + "loss": 0.4878, + "step": 40050 + }, + { + "epoch": 2.71, + "learning_rate": 4.857390392375915e-06, + "loss": 0.4668, + "step": 40100 + }, + { + "epoch": 2.72, + "learning_rate": 4.800392147921844e-06, + "loss": 0.4651, + "step": 40150 + }, + { + "epoch": 2.72, + "learning_rate": 4.743393903467774e-06, + "loss": 0.4722, + "step": 40200 + }, + { + "epoch": 2.72, + "learning_rate": 4.686395659013703e-06, + "loss": 0.47, + "step": 40250 + }, + { + "epoch": 2.73, + "learning_rate": 4.6293974145596315e-06, + "loss": 0.4629, + "step": 40300 + }, + { + "epoch": 2.73, + "learning_rate": 4.572399170105561e-06, + "loss": 0.4534, + "step": 40350 + }, + { + "epoch": 2.73, + "learning_rate": 4.51540092565149e-06, + "loss": 0.4816, + "step": 40400 + }, + { + "epoch": 2.74, + "learning_rate": 4.458402681197419e-06, + "loss": 0.4601, + "step": 40450 + }, + { + "epoch": 2.74, + "learning_rate": 4.401404436743348e-06, + "loss": 0.4642, + "step": 40500 + }, + { + "epoch": 2.74, + "learning_rate": 4.344406192289278e-06, + "loss": 0.4699, + "step": 40550 + }, + { + "epoch": 2.75, + "learning_rate": 4.2874079478352075e-06, + "loss": 0.4459, + "step": 40600 + }, + { + "epoch": 2.75, + "learning_rate": 4.2304097033811355e-06, + "loss": 0.4494, + "step": 40650 + }, + { + "epoch": 2.75, + "learning_rate": 4.173411458927065e-06, + "loss": 0.4565, + "step": 40700 + }, + { + "epoch": 2.76, + "learning_rate": 4.116413214472994e-06, + "loss": 0.4567, + "step": 40750 + }, + { + "epoch": 2.76, + "learning_rate": 4.059414970018924e-06, + "loss": 0.4602, + "step": 40800 + }, + { + "epoch": 2.76, + "learning_rate": 4.002416725564853e-06, + "loss": 0.4396, + "step": 40850 + }, + { + "epoch": 2.77, + "learning_rate": 3.945418481110782e-06, + "loss": 0.459, + "step": 40900 + }, + { + "epoch": 2.77, + "learning_rate": 3.8884202366567115e-06, + "loss": 0.4686, + "step": 40950 + }, + { + "epoch": 2.77, + "learning_rate": 3.83142199220264e-06, + "loss": 0.462, + "step": 41000 + }, + { + "epoch": 2.78, + "learning_rate": 3.7744237477485697e-06, + "loss": 0.4658, + "step": 41050 + }, + { + "epoch": 2.78, + "learning_rate": 3.7174255032944986e-06, + "loss": 0.4596, + "step": 41100 + }, + { + "epoch": 2.78, + "learning_rate": 3.660427258840428e-06, + "loss": 0.4557, + "step": 41150 + }, + { + "epoch": 2.79, + "learning_rate": 3.6034290143863572e-06, + "loss": 0.4635, + "step": 41200 + }, + { + "epoch": 2.79, + "learning_rate": 3.5464307699322866e-06, + "loss": 0.4548, + "step": 41250 + }, + { + "epoch": 2.79, + "learning_rate": 3.489432525478215e-06, + "loss": 0.4472, + "step": 41300 + }, + { + "epoch": 2.8, + "learning_rate": 3.4324342810241448e-06, + "loss": 0.4563, + "step": 41350 + }, + { + "epoch": 2.8, + "learning_rate": 3.375436036570074e-06, + "loss": 0.4541, + "step": 41400 + }, + { + "epoch": 2.8, + "learning_rate": 3.3184377921160034e-06, + "loss": 0.4619, + "step": 41450 + }, + { + "epoch": 2.81, + "learning_rate": 3.261439547661932e-06, + "loss": 0.4267, + "step": 41500 + }, + { + "epoch": 2.81, + "learning_rate": 3.2044413032078612e-06, + "loss": 0.4374, + "step": 41550 + }, + { + "epoch": 2.81, + "learning_rate": 3.1474430587537906e-06, + "loss": 0.4332, + "step": 41600 + }, + { + "epoch": 2.82, + "learning_rate": 3.0904448142997195e-06, + "loss": 0.4487, + "step": 41650 + }, + { + "epoch": 2.82, + "learning_rate": 3.033446569845649e-06, + "loss": 0.4467, + "step": 41700 + }, + { + "epoch": 2.82, + "learning_rate": 2.976448325391578e-06, + "loss": 0.4488, + "step": 41750 + }, + { + "epoch": 2.83, + "learning_rate": 2.9194500809375074e-06, + "loss": 0.4473, + "step": 41800 + }, + { + "epoch": 2.83, + "learning_rate": 2.8624518364834363e-06, + "loss": 0.4497, + "step": 41850 + }, + { + "epoch": 2.83, + "learning_rate": 2.8054535920293657e-06, + "loss": 0.4458, + "step": 41900 + }, + { + "epoch": 2.84, + "learning_rate": 2.7484553475752946e-06, + "loss": 0.4496, + "step": 41950 + }, + { + "epoch": 2.84, + "learning_rate": 2.691457103121224e-06, + "loss": 0.4375, + "step": 42000 + }, + { + "epoch": 2.84, + "learning_rate": 2.634458858667153e-06, + "loss": 0.4403, + "step": 42050 + }, + { + "epoch": 2.85, + "learning_rate": 2.5774606142130825e-06, + "loss": 0.4317, + "step": 42100 + }, + { + "epoch": 2.85, + "learning_rate": 2.5204623697590114e-06, + "loss": 0.4473, + "step": 42150 + }, + { + "epoch": 2.85, + "learning_rate": 2.4634641253049407e-06, + "loss": 0.4407, + "step": 42200 + }, + { + "epoch": 2.86, + "learning_rate": 2.4064658808508696e-06, + "loss": 0.414, + "step": 42250 + }, + { + "epoch": 2.86, + "learning_rate": 2.349467636396799e-06, + "loss": 0.4204, + "step": 42300 + }, + { + "epoch": 2.86, + "learning_rate": 2.2924693919427283e-06, + "loss": 0.4546, + "step": 42350 + }, + { + "epoch": 2.87, + "learning_rate": 2.2354711474886576e-06, + "loss": 0.4494, + "step": 42400 + }, + { + "epoch": 2.87, + "learning_rate": 2.1784729030345865e-06, + "loss": 0.4347, + "step": 42450 + }, + { + "epoch": 2.87, + "learning_rate": 2.121474658580516e-06, + "loss": 0.461, + "step": 42500 + }, + { + "epoch": 2.88, + "learning_rate": 2.0644764141264447e-06, + "loss": 0.4579, + "step": 42550 + }, + { + "epoch": 2.88, + "learning_rate": 2.007478169672374e-06, + "loss": 0.4155, + "step": 42600 + }, + { + "epoch": 2.88, + "learning_rate": 1.9504799252183034e-06, + "loss": 0.4498, + "step": 42650 + }, + { + "epoch": 2.89, + "learning_rate": 1.8934816807642325e-06, + "loss": 0.4504, + "step": 42700 + }, + { + "epoch": 2.89, + "learning_rate": 1.8364834363101616e-06, + "loss": 0.4387, + "step": 42750 + }, + { + "epoch": 2.89, + "learning_rate": 1.779485191856091e-06, + "loss": 0.4304, + "step": 42800 + }, + { + "epoch": 2.9, + "learning_rate": 1.7224869474020203e-06, + "loss": 0.4286, + "step": 42850 + }, + { + "epoch": 2.9, + "learning_rate": 1.6654887029479494e-06, + "loss": 0.4177, + "step": 42900 + }, + { + "epoch": 2.9, + "learning_rate": 1.6084904584938787e-06, + "loss": 0.4518, + "step": 42950 + }, + { + "epoch": 2.91, + "learning_rate": 1.5514922140398076e-06, + "loss": 0.4462, + "step": 43000 + }, + { + "epoch": 2.91, + "learning_rate": 1.4944939695857367e-06, + "loss": 0.432, + "step": 43050 + }, + { + "epoch": 2.91, + "learning_rate": 1.437495725131666e-06, + "loss": 0.4284, + "step": 43100 + }, + { + "epoch": 2.92, + "learning_rate": 1.3804974806775951e-06, + "loss": 0.4183, + "step": 43150 + }, + { + "epoch": 2.92, + "learning_rate": 1.3234992362235243e-06, + "loss": 0.4254, + "step": 43200 + }, + { + "epoch": 2.92, + "learning_rate": 1.2665009917694536e-06, + "loss": 0.4459, + "step": 43250 + }, + { + "epoch": 2.93, + "learning_rate": 1.2095027473153827e-06, + "loss": 0.433, + "step": 43300 + }, + { + "epoch": 2.93, + "learning_rate": 1.1525045028613118e-06, + "loss": 0.4376, + "step": 43350 + }, + { + "epoch": 2.94, + "learning_rate": 1.0955062584072411e-06, + "loss": 0.4389, + "step": 43400 + }, + { + "epoch": 2.94, + "learning_rate": 1.0385080139531702e-06, + "loss": 0.4332, + "step": 43450 + }, + { + "epoch": 2.94, + "learning_rate": 9.815097694990994e-07, + "loss": 0.4354, + "step": 43500 + }, + { + "epoch": 2.95, + "learning_rate": 9.245115250450286e-07, + "loss": 0.4511, + "step": 43550 + }, + { + "epoch": 2.95, + "learning_rate": 8.675132805909578e-07, + "loss": 0.4149, + "step": 43600 + }, + { + "epoch": 2.95, + "learning_rate": 8.105150361368869e-07, + "loss": 0.4322, + "step": 43650 + }, + { + "epoch": 2.96, + "learning_rate": 7.535167916828162e-07, + "loss": 0.4128, + "step": 43700 + }, + { + "epoch": 2.96, + "learning_rate": 6.965185472287453e-07, + "loss": 0.4194, + "step": 43750 + }, + { + "epoch": 2.96, + "learning_rate": 6.395203027746746e-07, + "loss": 0.4325, + "step": 43800 + }, + { + "epoch": 2.97, + "learning_rate": 5.825220583206038e-07, + "loss": 0.4287, + "step": 43850 + }, + { + "epoch": 2.97, + "learning_rate": 5.255238138665329e-07, + "loss": 0.4353, + "step": 43900 + }, + { + "epoch": 2.97, + "learning_rate": 4.685255694124621e-07, + "loss": 0.4207, + "step": 43950 + }, + { + "epoch": 2.98, + "learning_rate": 4.1152732495839127e-07, + "loss": 0.4465, + "step": 44000 + }, + { + "epoch": 2.98, + "learning_rate": 3.545290805043205e-07, + "loss": 0.4238, + "step": 44050 + }, + { + "epoch": 2.98, + "learning_rate": 2.9753083605024965e-07, + "loss": 0.4226, + "step": 44100 + }, + { + "epoch": 2.99, + "learning_rate": 2.4053259159617887e-07, + "loss": 0.4465, + "step": 44150 + }, + { + "epoch": 2.99, + "learning_rate": 1.8353434714210804e-07, + "loss": 0.4094, + "step": 44200 + }, + { + "epoch": 2.99, + "learning_rate": 1.265361026880372e-07, + "loss": 0.4491, + "step": 44250 + }, + { + "epoch": 3.0, + "learning_rate": 6.95378582339664e-08, + "loss": 0.4208, + "step": 44300 + }, + { + "epoch": 3.0, + "learning_rate": 1.253961377989558e-08, + "loss": 0.4375, + "step": 44350 + } + ], + "max_steps": 44361, + "num_train_epochs": 3, + "total_flos": 1.77824607798456e+17, + "trial_name": null, + "trial_params": null +}