diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,22672 @@ +{ + "best_metric": 2.7920210361480713, + "best_model_checkpoint": "model_v1_complete_training_wt_init_48_mini/checkpoint-1830000", + "epoch": 15.088621802749513, + "global_step": 1841687, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 5.000000000000001e-07, + "loss": 10.2892, + "step": 500 + }, + { + "epoch": 0.01, + "learning_rate": 1.0000000000000002e-06, + "loss": 8.8433, + "step": 1000 + }, + { + "epoch": 0.01, + "learning_rate": 1.5e-06, + "loss": 8.1625, + "step": 1500 + }, + { + "epoch": 0.02, + "learning_rate": 2.0000000000000003e-06, + "loss": 7.85, + "step": 2000 + }, + { + "epoch": 0.02, + "learning_rate": 2.5e-06, + "loss": 7.6776, + "step": 2500 + }, + { + "epoch": 0.02, + "learning_rate": 3e-06, + "loss": 7.5065, + "step": 3000 + }, + { + "epoch": 0.03, + "learning_rate": 3.5e-06, + "loss": 7.3363, + "step": 3500 + }, + { + "epoch": 0.03, + "learning_rate": 4.000000000000001e-06, + "loss": 7.2137, + "step": 4000 + }, + { + "epoch": 0.04, + "learning_rate": 4.5e-06, + "loss": 7.1039, + "step": 4500 + }, + { + "epoch": 0.04, + "learning_rate": 5e-06, + "loss": 7.0197, + "step": 5000 + }, + { + "epoch": 0.05, + "learning_rate": 5.500000000000001e-06, + "loss": 6.9294, + "step": 5500 + }, + { + "epoch": 0.05, + "learning_rate": 6e-06, + "loss": 6.854, + "step": 6000 + }, + { + "epoch": 0.05, + "learning_rate": 6.5000000000000004e-06, + "loss": 6.7942, + "step": 6500 + }, + { + "epoch": 0.06, + "learning_rate": 7e-06, + "loss": 6.7328, + "step": 7000 + }, + { + "epoch": 0.06, + "learning_rate": 7.500000000000001e-06, + "loss": 6.685, + "step": 7500 + }, + { + "epoch": 0.07, + "learning_rate": 8.000000000000001e-06, + "loss": 6.6331, + "step": 8000 + }, + { + "epoch": 0.07, + "learning_rate": 8.5e-06, + "loss": 6.5921, + "step": 8500 + }, + { + "epoch": 0.07, + "learning_rate": 9e-06, + "loss": 6.5616, + "step": 9000 + }, + { + "epoch": 0.08, + "learning_rate": 9.5e-06, + "loss": 6.5268, + "step": 9500 + }, + { + "epoch": 0.08, + "learning_rate": 1e-05, + "loss": 6.5012, + "step": 10000 + }, + { + "epoch": 0.09, + "learning_rate": 9.998356047280082e-06, + "loss": 6.4694, + "step": 10500 + }, + { + "epoch": 0.09, + "learning_rate": 9.996712094560161e-06, + "loss": 6.4482, + "step": 11000 + }, + { + "epoch": 0.09, + "learning_rate": 9.995068141840242e-06, + "loss": 6.4289, + "step": 11500 + }, + { + "epoch": 0.1, + "learning_rate": 9.993424189120321e-06, + "loss": 6.4084, + "step": 12000 + }, + { + "epoch": 0.1, + "learning_rate": 9.991780236400402e-06, + "loss": 6.3823, + "step": 12500 + }, + { + "epoch": 0.11, + "learning_rate": 9.990136283680483e-06, + "loss": 6.3696, + "step": 13000 + }, + { + "epoch": 0.11, + "learning_rate": 9.988492330960563e-06, + "loss": 6.359, + "step": 13500 + }, + { + "epoch": 0.11, + "learning_rate": 9.986848378240642e-06, + "loss": 6.3447, + "step": 14000 + }, + { + "epoch": 0.12, + "learning_rate": 9.985204425520723e-06, + "loss": 6.327, + "step": 14500 + }, + { + "epoch": 0.12, + "learning_rate": 9.983560472800802e-06, + "loss": 6.3197, + "step": 15000 + }, + { + "epoch": 0.13, + "learning_rate": 9.981916520080883e-06, + "loss": 6.3012, + "step": 15500 + }, + { + "epoch": 0.13, + "learning_rate": 9.980272567360964e-06, + "loss": 6.2978, + "step": 16000 + }, + { + "epoch": 0.14, + "learning_rate": 9.978628614641043e-06, + "loss": 6.2945, + "step": 16500 + }, + { + "epoch": 0.14, + "learning_rate": 9.976984661921124e-06, + "loss": 6.2789, + "step": 17000 + }, + { + "epoch": 0.14, + "learning_rate": 9.975340709201205e-06, + "loss": 6.2788, + "step": 17500 + }, + { + "epoch": 0.15, + "learning_rate": 9.973696756481285e-06, + "loss": 6.2633, + "step": 18000 + }, + { + "epoch": 0.15, + "learning_rate": 9.972052803761366e-06, + "loss": 6.2471, + "step": 18500 + }, + { + "epoch": 0.16, + "learning_rate": 9.970408851041445e-06, + "loss": 6.1923, + "step": 19000 + }, + { + "epoch": 0.16, + "learning_rate": 9.968764898321524e-06, + "loss": 6.1695, + "step": 19500 + }, + { + "epoch": 0.16, + "learning_rate": 9.967120945601605e-06, + "loss": 6.1645, + "step": 20000 + }, + { + "epoch": 0.17, + "learning_rate": 9.965476992881686e-06, + "loss": 6.1372, + "step": 20500 + }, + { + "epoch": 0.17, + "learning_rate": 9.963833040161765e-06, + "loss": 6.1184, + "step": 21000 + }, + { + "epoch": 0.18, + "learning_rate": 9.962189087441846e-06, + "loss": 6.1077, + "step": 21500 + }, + { + "epoch": 0.18, + "learning_rate": 9.960545134721926e-06, + "loss": 6.0858, + "step": 22000 + }, + { + "epoch": 0.18, + "learning_rate": 9.958901182002007e-06, + "loss": 6.0807, + "step": 22500 + }, + { + "epoch": 0.19, + "learning_rate": 9.957257229282087e-06, + "loss": 6.0578, + "step": 23000 + }, + { + "epoch": 0.19, + "learning_rate": 9.955613276562167e-06, + "loss": 6.0571, + "step": 23500 + }, + { + "epoch": 0.2, + "learning_rate": 9.953969323842248e-06, + "loss": 6.0397, + "step": 24000 + }, + { + "epoch": 0.2, + "learning_rate": 9.952325371122327e-06, + "loss": 6.0305, + "step": 24500 + }, + { + "epoch": 0.2, + "learning_rate": 9.950681418402408e-06, + "loss": 6.0192, + "step": 25000 + }, + { + "epoch": 0.21, + "learning_rate": 9.949037465682487e-06, + "loss": 6.003, + "step": 25500 + }, + { + "epoch": 0.21, + "learning_rate": 9.947393512962568e-06, + "loss": 5.9987, + "step": 26000 + }, + { + "epoch": 0.22, + "learning_rate": 9.945749560242648e-06, + "loss": 5.9946, + "step": 26500 + }, + { + "epoch": 0.22, + "learning_rate": 9.944105607522729e-06, + "loss": 5.9855, + "step": 27000 + }, + { + "epoch": 0.23, + "learning_rate": 9.942461654802808e-06, + "loss": 5.9723, + "step": 27500 + }, + { + "epoch": 0.23, + "learning_rate": 9.940817702082889e-06, + "loss": 5.9701, + "step": 28000 + }, + { + "epoch": 0.23, + "learning_rate": 9.93917374936297e-06, + "loss": 5.9636, + "step": 28500 + }, + { + "epoch": 0.24, + "learning_rate": 9.937529796643049e-06, + "loss": 5.9503, + "step": 29000 + }, + { + "epoch": 0.24, + "learning_rate": 9.93588584392313e-06, + "loss": 5.9404, + "step": 29500 + }, + { + "epoch": 0.25, + "learning_rate": 9.934241891203211e-06, + "loss": 5.9411, + "step": 30000 + }, + { + "epoch": 0.25, + "eval_accuracy": 0.1518340698223809, + "eval_loss": 5.883250713348389, + "eval_runtime": 404.4584, + "eval_samples_per_second": 762.392, + "eval_steps_per_second": 15.885, + "step": 30000 + }, + { + "epoch": 0.25, + "learning_rate": 9.93259793848329e-06, + "loss": 5.9333, + "step": 30500 + }, + { + "epoch": 0.25, + "learning_rate": 9.930953985763371e-06, + "loss": 5.9235, + "step": 31000 + }, + { + "epoch": 0.26, + "learning_rate": 9.92931003304345e-06, + "loss": 5.9169, + "step": 31500 + }, + { + "epoch": 0.26, + "learning_rate": 9.92766608032353e-06, + "loss": 5.9136, + "step": 32000 + }, + { + "epoch": 0.27, + "learning_rate": 9.92602212760361e-06, + "loss": 5.9025, + "step": 32500 + }, + { + "epoch": 0.27, + "learning_rate": 9.924378174883692e-06, + "loss": 5.9008, + "step": 33000 + }, + { + "epoch": 0.27, + "learning_rate": 9.922734222163771e-06, + "loss": 5.8908, + "step": 33500 + }, + { + "epoch": 0.28, + "learning_rate": 9.921090269443852e-06, + "loss": 5.8871, + "step": 34000 + }, + { + "epoch": 0.28, + "learning_rate": 9.919446316723931e-06, + "loss": 5.8785, + "step": 34500 + }, + { + "epoch": 0.29, + "learning_rate": 9.917802364004012e-06, + "loss": 5.877, + "step": 35000 + }, + { + "epoch": 0.29, + "learning_rate": 9.916158411284093e-06, + "loss": 5.8796, + "step": 35500 + }, + { + "epoch": 0.29, + "learning_rate": 9.914514458564172e-06, + "loss": 5.866, + "step": 36000 + }, + { + "epoch": 0.3, + "learning_rate": 9.912870505844253e-06, + "loss": 5.8613, + "step": 36500 + }, + { + "epoch": 0.3, + "learning_rate": 9.911226553124333e-06, + "loss": 5.8509, + "step": 37000 + }, + { + "epoch": 0.31, + "learning_rate": 9.909582600404412e-06, + "loss": 5.8453, + "step": 37500 + }, + { + "epoch": 0.31, + "learning_rate": 9.907938647684493e-06, + "loss": 5.843, + "step": 38000 + }, + { + "epoch": 0.32, + "learning_rate": 9.906294694964574e-06, + "loss": 5.8326, + "step": 38500 + }, + { + "epoch": 0.32, + "learning_rate": 9.904650742244653e-06, + "loss": 5.8313, + "step": 39000 + }, + { + "epoch": 0.32, + "learning_rate": 9.903006789524734e-06, + "loss": 5.8303, + "step": 39500 + }, + { + "epoch": 0.33, + "learning_rate": 9.901362836804813e-06, + "loss": 5.825, + "step": 40000 + }, + { + "epoch": 0.33, + "learning_rate": 9.899718884084894e-06, + "loss": 5.809, + "step": 40500 + }, + { + "epoch": 0.34, + "learning_rate": 9.898074931364975e-06, + "loss": 5.8117, + "step": 41000 + }, + { + "epoch": 0.34, + "learning_rate": 9.896430978645055e-06, + "loss": 5.8035, + "step": 41500 + }, + { + "epoch": 0.34, + "learning_rate": 9.894787025925136e-06, + "loss": 5.7973, + "step": 42000 + }, + { + "epoch": 0.35, + "learning_rate": 9.893143073205217e-06, + "loss": 5.7966, + "step": 42500 + }, + { + "epoch": 0.35, + "learning_rate": 9.891499120485296e-06, + "loss": 5.789, + "step": 43000 + }, + { + "epoch": 0.36, + "learning_rate": 9.889855167765375e-06, + "loss": 5.7827, + "step": 43500 + }, + { + "epoch": 0.36, + "learning_rate": 9.888211215045456e-06, + "loss": 5.7801, + "step": 44000 + }, + { + "epoch": 0.36, + "learning_rate": 9.886567262325535e-06, + "loss": 5.7761, + "step": 44500 + }, + { + "epoch": 0.37, + "learning_rate": 9.884923309605616e-06, + "loss": 5.7702, + "step": 45000 + }, + { + "epoch": 0.37, + "learning_rate": 9.883279356885697e-06, + "loss": 5.7638, + "step": 45500 + }, + { + "epoch": 0.38, + "learning_rate": 9.881635404165777e-06, + "loss": 5.7549, + "step": 46000 + }, + { + "epoch": 0.38, + "learning_rate": 9.879991451445858e-06, + "loss": 5.7496, + "step": 46500 + }, + { + "epoch": 0.39, + "learning_rate": 9.878347498725937e-06, + "loss": 5.7451, + "step": 47000 + }, + { + "epoch": 0.39, + "learning_rate": 9.876703546006018e-06, + "loss": 5.7447, + "step": 47500 + }, + { + "epoch": 0.39, + "learning_rate": 9.875059593286099e-06, + "loss": 5.7451, + "step": 48000 + }, + { + "epoch": 0.4, + "learning_rate": 9.873415640566178e-06, + "loss": 5.7393, + "step": 48500 + }, + { + "epoch": 0.4, + "learning_rate": 9.87177168784626e-06, + "loss": 5.7319, + "step": 49000 + }, + { + "epoch": 0.41, + "learning_rate": 9.870127735126338e-06, + "loss": 5.73, + "step": 49500 + }, + { + "epoch": 0.41, + "learning_rate": 9.868483782406418e-06, + "loss": 5.7289, + "step": 50000 + }, + { + "epoch": 0.41, + "learning_rate": 9.866839829686499e-06, + "loss": 5.7158, + "step": 50500 + }, + { + "epoch": 0.42, + "learning_rate": 9.86519587696658e-06, + "loss": 5.713, + "step": 51000 + }, + { + "epoch": 0.42, + "learning_rate": 9.863551924246659e-06, + "loss": 5.7082, + "step": 51500 + }, + { + "epoch": 0.43, + "learning_rate": 9.86190797152674e-06, + "loss": 5.704, + "step": 52000 + }, + { + "epoch": 0.43, + "learning_rate": 9.86026401880682e-06, + "loss": 5.7021, + "step": 52500 + }, + { + "epoch": 0.43, + "learning_rate": 9.8586200660869e-06, + "loss": 5.7041, + "step": 53000 + }, + { + "epoch": 0.44, + "learning_rate": 9.856976113366981e-06, + "loss": 5.6927, + "step": 53500 + }, + { + "epoch": 0.44, + "learning_rate": 9.85533216064706e-06, + "loss": 5.6896, + "step": 54000 + }, + { + "epoch": 0.45, + "learning_rate": 9.853688207927141e-06, + "loss": 5.6803, + "step": 54500 + }, + { + "epoch": 0.45, + "learning_rate": 9.85204425520722e-06, + "loss": 5.6817, + "step": 55000 + }, + { + "epoch": 0.45, + "learning_rate": 9.8504003024873e-06, + "loss": 5.6833, + "step": 55500 + }, + { + "epoch": 0.46, + "learning_rate": 9.848756349767381e-06, + "loss": 5.6681, + "step": 56000 + }, + { + "epoch": 0.46, + "learning_rate": 9.847112397047462e-06, + "loss": 5.6702, + "step": 56500 + }, + { + "epoch": 0.47, + "learning_rate": 9.845468444327541e-06, + "loss": 5.6569, + "step": 57000 + }, + { + "epoch": 0.47, + "learning_rate": 9.843824491607622e-06, + "loss": 5.6653, + "step": 57500 + }, + { + "epoch": 0.48, + "learning_rate": 9.842180538887703e-06, + "loss": 5.6525, + "step": 58000 + }, + { + "epoch": 0.48, + "learning_rate": 9.840536586167782e-06, + "loss": 5.6518, + "step": 58500 + }, + { + "epoch": 0.48, + "learning_rate": 9.838892633447863e-06, + "loss": 5.6497, + "step": 59000 + }, + { + "epoch": 0.49, + "learning_rate": 9.837248680727943e-06, + "loss": 5.6454, + "step": 59500 + }, + { + "epoch": 0.49, + "learning_rate": 9.835604728008024e-06, + "loss": 5.6408, + "step": 60000 + }, + { + "epoch": 0.49, + "eval_accuracy": 0.19079809901308453, + "eval_loss": 5.526524543762207, + "eval_runtime": 411.2166, + "eval_samples_per_second": 749.863, + "eval_steps_per_second": 15.624, + "step": 60000 + }, + { + "epoch": 0.5, + "learning_rate": 9.833960775288105e-06, + "loss": 5.6208, + "step": 60500 + }, + { + "epoch": 0.5, + "learning_rate": 9.832316822568184e-06, + "loss": 5.5933, + "step": 61000 + }, + { + "epoch": 0.5, + "learning_rate": 9.830672869848265e-06, + "loss": 5.5664, + "step": 61500 + }, + { + "epoch": 0.51, + "learning_rate": 9.829028917128344e-06, + "loss": 5.5416, + "step": 62000 + }, + { + "epoch": 0.51, + "learning_rate": 9.827384964408423e-06, + "loss": 5.5315, + "step": 62500 + }, + { + "epoch": 0.52, + "learning_rate": 9.825741011688504e-06, + "loss": 5.5227, + "step": 63000 + }, + { + "epoch": 0.52, + "learning_rate": 9.824097058968585e-06, + "loss": 5.4887, + "step": 63500 + }, + { + "epoch": 0.52, + "learning_rate": 9.822453106248665e-06, + "loss": 5.4846, + "step": 64000 + }, + { + "epoch": 0.53, + "learning_rate": 9.820809153528746e-06, + "loss": 5.4683, + "step": 64500 + }, + { + "epoch": 0.53, + "learning_rate": 9.819165200808825e-06, + "loss": 5.4631, + "step": 65000 + }, + { + "epoch": 0.54, + "learning_rate": 9.817521248088906e-06, + "loss": 5.4465, + "step": 65500 + }, + { + "epoch": 0.54, + "learning_rate": 9.815877295368987e-06, + "loss": 5.4323, + "step": 66000 + }, + { + "epoch": 0.54, + "learning_rate": 9.814233342649066e-06, + "loss": 5.415, + "step": 66500 + }, + { + "epoch": 0.55, + "learning_rate": 9.812589389929147e-06, + "loss": 5.4046, + "step": 67000 + }, + { + "epoch": 0.55, + "learning_rate": 9.810945437209226e-06, + "loss": 5.3932, + "step": 67500 + }, + { + "epoch": 0.56, + "learning_rate": 9.809301484489306e-06, + "loss": 5.3858, + "step": 68000 + }, + { + "epoch": 0.56, + "learning_rate": 9.807657531769387e-06, + "loss": 5.3667, + "step": 68500 + }, + { + "epoch": 0.57, + "learning_rate": 9.806013579049468e-06, + "loss": 5.3536, + "step": 69000 + }, + { + "epoch": 0.57, + "learning_rate": 9.804369626329547e-06, + "loss": 5.3413, + "step": 69500 + }, + { + "epoch": 0.57, + "learning_rate": 9.802725673609628e-06, + "loss": 5.327, + "step": 70000 + }, + { + "epoch": 0.58, + "learning_rate": 9.801081720889709e-06, + "loss": 5.3028, + "step": 70500 + }, + { + "epoch": 0.58, + "learning_rate": 9.799437768169788e-06, + "loss": 5.2809, + "step": 71000 + }, + { + "epoch": 0.59, + "learning_rate": 9.797793815449869e-06, + "loss": 5.2594, + "step": 71500 + }, + { + "epoch": 0.59, + "learning_rate": 9.796149862729948e-06, + "loss": 5.2433, + "step": 72000 + }, + { + "epoch": 0.59, + "learning_rate": 9.79450591001003e-06, + "loss": 5.2326, + "step": 72500 + }, + { + "epoch": 0.6, + "learning_rate": 9.79286195729011e-06, + "loss": 5.2018, + "step": 73000 + }, + { + "epoch": 0.6, + "learning_rate": 9.79121800457019e-06, + "loss": 5.1855, + "step": 73500 + }, + { + "epoch": 0.61, + "learning_rate": 9.789574051850269e-06, + "loss": 5.1629, + "step": 74000 + }, + { + "epoch": 0.61, + "learning_rate": 9.78793009913035e-06, + "loss": 5.1439, + "step": 74500 + }, + { + "epoch": 0.61, + "learning_rate": 9.786286146410429e-06, + "loss": 5.1354, + "step": 75000 + }, + { + "epoch": 0.62, + "learning_rate": 9.78464219369051e-06, + "loss": 5.1111, + "step": 75500 + }, + { + "epoch": 0.62, + "learning_rate": 9.782998240970591e-06, + "loss": 5.0945, + "step": 76000 + }, + { + "epoch": 0.63, + "learning_rate": 9.78135428825067e-06, + "loss": 5.0785, + "step": 76500 + }, + { + "epoch": 0.63, + "learning_rate": 9.779710335530751e-06, + "loss": 5.0583, + "step": 77000 + }, + { + "epoch": 0.63, + "learning_rate": 9.77806638281083e-06, + "loss": 5.0422, + "step": 77500 + }, + { + "epoch": 0.64, + "learning_rate": 9.776422430090912e-06, + "loss": 5.0259, + "step": 78000 + }, + { + "epoch": 0.64, + "learning_rate": 9.774778477370993e-06, + "loss": 5.0104, + "step": 78500 + }, + { + "epoch": 0.65, + "learning_rate": 9.773134524651072e-06, + "loss": 4.9894, + "step": 79000 + }, + { + "epoch": 0.65, + "learning_rate": 9.771490571931153e-06, + "loss": 4.9678, + "step": 79500 + }, + { + "epoch": 0.66, + "learning_rate": 9.769846619211232e-06, + "loss": 4.9547, + "step": 80000 + }, + { + "epoch": 0.66, + "learning_rate": 9.768202666491311e-06, + "loss": 4.9365, + "step": 80500 + }, + { + "epoch": 0.66, + "learning_rate": 9.766558713771392e-06, + "loss": 4.9222, + "step": 81000 + }, + { + "epoch": 0.67, + "learning_rate": 9.764914761051473e-06, + "loss": 4.9002, + "step": 81500 + }, + { + "epoch": 0.67, + "learning_rate": 9.763270808331553e-06, + "loss": 4.8928, + "step": 82000 + }, + { + "epoch": 0.68, + "learning_rate": 9.761626855611634e-06, + "loss": 4.866, + "step": 82500 + }, + { + "epoch": 0.68, + "learning_rate": 9.759982902891713e-06, + "loss": 4.8529, + "step": 83000 + }, + { + "epoch": 0.68, + "learning_rate": 9.758338950171794e-06, + "loss": 4.841, + "step": 83500 + }, + { + "epoch": 0.69, + "learning_rate": 9.756694997451875e-06, + "loss": 4.8105, + "step": 84000 + }, + { + "epoch": 0.69, + "learning_rate": 9.755051044731954e-06, + "loss": 4.7817, + "step": 84500 + }, + { + "epoch": 0.7, + "learning_rate": 9.753407092012035e-06, + "loss": 4.7561, + "step": 85000 + }, + { + "epoch": 0.7, + "learning_rate": 9.751763139292116e-06, + "loss": 4.7291, + "step": 85500 + }, + { + "epoch": 0.7, + "learning_rate": 9.750119186572195e-06, + "loss": 4.6946, + "step": 86000 + }, + { + "epoch": 0.71, + "learning_rate": 9.748475233852275e-06, + "loss": 4.6735, + "step": 86500 + }, + { + "epoch": 0.71, + "learning_rate": 9.746831281132356e-06, + "loss": 4.6509, + "step": 87000 + }, + { + "epoch": 0.72, + "learning_rate": 9.745187328412435e-06, + "loss": 4.6292, + "step": 87500 + }, + { + "epoch": 0.72, + "learning_rate": 9.743543375692516e-06, + "loss": 4.6109, + "step": 88000 + }, + { + "epoch": 0.73, + "learning_rate": 9.741899422972597e-06, + "loss": 4.5882, + "step": 88500 + }, + { + "epoch": 0.73, + "learning_rate": 9.740255470252676e-06, + "loss": 4.573, + "step": 89000 + }, + { + "epoch": 0.73, + "learning_rate": 9.738611517532757e-06, + "loss": 4.5576, + "step": 89500 + }, + { + "epoch": 0.74, + "learning_rate": 9.736967564812836e-06, + "loss": 4.5385, + "step": 90000 + }, + { + "epoch": 0.74, + "eval_accuracy": 0.31378937363065973, + "eval_loss": 4.313334941864014, + "eval_runtime": 406.1908, + "eval_samples_per_second": 759.141, + "eval_steps_per_second": 15.818, + "step": 90000 + }, + { + "epoch": 0.74, + "learning_rate": 9.735323612092917e-06, + "loss": 4.5261, + "step": 90500 + }, + { + "epoch": 0.75, + "learning_rate": 9.733679659372998e-06, + "loss": 4.5152, + "step": 91000 + }, + { + "epoch": 0.75, + "learning_rate": 9.732035706653078e-06, + "loss": 4.5018, + "step": 91500 + }, + { + "epoch": 0.75, + "learning_rate": 9.730391753933157e-06, + "loss": 4.4877, + "step": 92000 + }, + { + "epoch": 0.76, + "learning_rate": 9.728747801213238e-06, + "loss": 4.4749, + "step": 92500 + }, + { + "epoch": 0.76, + "learning_rate": 9.727103848493317e-06, + "loss": 4.4638, + "step": 93000 + }, + { + "epoch": 0.77, + "learning_rate": 9.725459895773398e-06, + "loss": 4.4451, + "step": 93500 + }, + { + "epoch": 0.77, + "learning_rate": 9.723815943053479e-06, + "loss": 4.437, + "step": 94000 + }, + { + "epoch": 0.77, + "learning_rate": 9.722171990333558e-06, + "loss": 4.433, + "step": 94500 + }, + { + "epoch": 0.78, + "learning_rate": 9.72052803761364e-06, + "loss": 4.4104, + "step": 95000 + }, + { + "epoch": 0.78, + "learning_rate": 9.718884084893719e-06, + "loss": 4.4126, + "step": 95500 + }, + { + "epoch": 0.79, + "learning_rate": 9.7172401321738e-06, + "loss": 4.4033, + "step": 96000 + }, + { + "epoch": 0.79, + "learning_rate": 9.71559617945388e-06, + "loss": 4.3898, + "step": 96500 + }, + { + "epoch": 0.79, + "learning_rate": 9.71395222673396e-06, + "loss": 4.3815, + "step": 97000 + }, + { + "epoch": 0.8, + "learning_rate": 9.71230827401404e-06, + "loss": 4.3669, + "step": 97500 + }, + { + "epoch": 0.8, + "learning_rate": 9.71066432129412e-06, + "loss": 4.359, + "step": 98000 + }, + { + "epoch": 0.81, + "learning_rate": 9.709020368574201e-06, + "loss": 4.3495, + "step": 98500 + }, + { + "epoch": 0.81, + "learning_rate": 9.70737641585428e-06, + "loss": 4.3459, + "step": 99000 + }, + { + "epoch": 0.82, + "learning_rate": 9.705732463134361e-06, + "loss": 4.3349, + "step": 99500 + }, + { + "epoch": 0.82, + "learning_rate": 9.70408851041444e-06, + "loss": 4.3202, + "step": 100000 + }, + { + "epoch": 0.82, + "learning_rate": 9.702444557694522e-06, + "loss": 4.32, + "step": 100500 + }, + { + "epoch": 0.83, + "learning_rate": 9.700800604974602e-06, + "loss": 4.3064, + "step": 101000 + }, + { + "epoch": 0.83, + "learning_rate": 9.699156652254682e-06, + "loss": 4.3046, + "step": 101500 + }, + { + "epoch": 0.84, + "learning_rate": 9.697512699534763e-06, + "loss": 4.2962, + "step": 102000 + }, + { + "epoch": 0.84, + "learning_rate": 9.695868746814842e-06, + "loss": 4.2827, + "step": 102500 + }, + { + "epoch": 0.84, + "learning_rate": 9.694224794094923e-06, + "loss": 4.2798, + "step": 103000 + }, + { + "epoch": 0.85, + "learning_rate": 9.692580841375004e-06, + "loss": 4.2678, + "step": 103500 + }, + { + "epoch": 0.85, + "learning_rate": 9.690936888655083e-06, + "loss": 4.2663, + "step": 104000 + }, + { + "epoch": 0.86, + "learning_rate": 9.689292935935163e-06, + "loss": 4.2641, + "step": 104500 + }, + { + "epoch": 0.86, + "learning_rate": 9.687648983215243e-06, + "loss": 4.2555, + "step": 105000 + }, + { + "epoch": 0.86, + "learning_rate": 9.686005030495323e-06, + "loss": 4.2571, + "step": 105500 + }, + { + "epoch": 0.87, + "learning_rate": 9.684361077775404e-06, + "loss": 4.2354, + "step": 106000 + }, + { + "epoch": 0.87, + "learning_rate": 9.682717125055485e-06, + "loss": 4.2328, + "step": 106500 + }, + { + "epoch": 0.88, + "learning_rate": 9.681073172335564e-06, + "loss": 4.2232, + "step": 107000 + }, + { + "epoch": 0.88, + "learning_rate": 9.679429219615645e-06, + "loss": 4.2218, + "step": 107500 + }, + { + "epoch": 0.88, + "learning_rate": 9.677785266895724e-06, + "loss": 4.2115, + "step": 108000 + }, + { + "epoch": 0.89, + "learning_rate": 9.676141314175805e-06, + "loss": 4.2085, + "step": 108500 + }, + { + "epoch": 0.89, + "learning_rate": 9.674497361455886e-06, + "loss": 4.1947, + "step": 109000 + }, + { + "epoch": 0.9, + "learning_rate": 9.672853408735965e-06, + "loss": 4.1982, + "step": 109500 + }, + { + "epoch": 0.9, + "learning_rate": 9.671209456016045e-06, + "loss": 4.1942, + "step": 110000 + }, + { + "epoch": 0.91, + "learning_rate": 9.669565503296126e-06, + "loss": 4.1869, + "step": 110500 + }, + { + "epoch": 0.91, + "learning_rate": 9.667921550576205e-06, + "loss": 4.1827, + "step": 111000 + }, + { + "epoch": 0.91, + "learning_rate": 9.666277597856286e-06, + "loss": 4.174, + "step": 111500 + }, + { + "epoch": 0.92, + "learning_rate": 9.664633645136367e-06, + "loss": 4.1719, + "step": 112000 + }, + { + "epoch": 0.92, + "learning_rate": 9.662989692416446e-06, + "loss": 4.1692, + "step": 112500 + }, + { + "epoch": 0.93, + "learning_rate": 9.661345739696527e-06, + "loss": 4.1603, + "step": 113000 + }, + { + "epoch": 0.93, + "learning_rate": 9.659701786976608e-06, + "loss": 4.1583, + "step": 113500 + }, + { + "epoch": 0.93, + "learning_rate": 9.658057834256687e-06, + "loss": 4.1563, + "step": 114000 + }, + { + "epoch": 0.94, + "learning_rate": 9.656413881536768e-06, + "loss": 4.1527, + "step": 114500 + }, + { + "epoch": 0.94, + "learning_rate": 9.654769928816848e-06, + "loss": 4.1418, + "step": 115000 + }, + { + "epoch": 0.95, + "learning_rate": 9.653125976096929e-06, + "loss": 4.1333, + "step": 115500 + }, + { + "epoch": 0.95, + "learning_rate": 9.65148202337701e-06, + "loss": 4.1356, + "step": 116000 + }, + { + "epoch": 0.95, + "learning_rate": 9.649838070657089e-06, + "loss": 4.1236, + "step": 116500 + }, + { + "epoch": 0.96, + "learning_rate": 9.648194117937168e-06, + "loss": 4.126, + "step": 117000 + }, + { + "epoch": 0.96, + "learning_rate": 9.64655016521725e-06, + "loss": 4.1232, + "step": 117500 + }, + { + "epoch": 0.97, + "learning_rate": 9.644906212497328e-06, + "loss": 4.1148, + "step": 118000 + }, + { + "epoch": 0.97, + "learning_rate": 9.64326225977741e-06, + "loss": 4.1131, + "step": 118500 + }, + { + "epoch": 0.97, + "learning_rate": 9.64161830705749e-06, + "loss": 4.1212, + "step": 119000 + }, + { + "epoch": 0.98, + "learning_rate": 9.63997435433757e-06, + "loss": 4.1011, + "step": 119500 + }, + { + "epoch": 0.98, + "learning_rate": 9.63833040161765e-06, + "loss": 4.1015, + "step": 120000 + }, + { + "epoch": 0.98, + "eval_accuracy": 0.35834025167851113, + "eval_loss": 3.899629831314087, + "eval_runtime": 406.9878, + "eval_samples_per_second": 757.654, + "eval_steps_per_second": 15.787, + "step": 120000 + }, + { + "epoch": 0.99, + "learning_rate": 9.63668644889773e-06, + "loss": 4.0954, + "step": 120500 + }, + { + "epoch": 0.99, + "learning_rate": 9.635042496177811e-06, + "loss": 4.0893, + "step": 121000 + }, + { + "epoch": 1.0, + "learning_rate": 9.633398543457892e-06, + "loss": 4.0861, + "step": 121500 + }, + { + "epoch": 1.0, + "learning_rate": 9.631754590737971e-06, + "loss": 4.0811, + "step": 122000 + }, + { + "epoch": 1.0, + "learning_rate": 9.63011063801805e-06, + "loss": 4.0771, + "step": 122500 + }, + { + "epoch": 1.01, + "learning_rate": 9.628466685298131e-06, + "loss": 4.0736, + "step": 123000 + }, + { + "epoch": 1.01, + "learning_rate": 9.62682273257821e-06, + "loss": 4.0716, + "step": 123500 + }, + { + "epoch": 1.02, + "learning_rate": 9.625178779858292e-06, + "loss": 4.064, + "step": 124000 + }, + { + "epoch": 1.02, + "learning_rate": 9.623534827138373e-06, + "loss": 4.0587, + "step": 124500 + }, + { + "epoch": 1.02, + "learning_rate": 9.621890874418452e-06, + "loss": 4.056, + "step": 125000 + }, + { + "epoch": 1.03, + "learning_rate": 9.620246921698533e-06, + "loss": 4.0621, + "step": 125500 + }, + { + "epoch": 1.03, + "learning_rate": 9.618602968978614e-06, + "loss": 4.0473, + "step": 126000 + }, + { + "epoch": 1.04, + "learning_rate": 9.616959016258693e-06, + "loss": 4.0481, + "step": 126500 + }, + { + "epoch": 1.04, + "learning_rate": 9.615315063538774e-06, + "loss": 4.0496, + "step": 127000 + }, + { + "epoch": 1.04, + "learning_rate": 9.613671110818853e-06, + "loss": 4.041, + "step": 127500 + }, + { + "epoch": 1.05, + "learning_rate": 9.612027158098934e-06, + "loss": 4.0395, + "step": 128000 + }, + { + "epoch": 1.05, + "learning_rate": 9.610383205379014e-06, + "loss": 4.0357, + "step": 128500 + }, + { + "epoch": 1.06, + "learning_rate": 9.608739252659095e-06, + "loss": 4.0339, + "step": 129000 + }, + { + "epoch": 1.06, + "learning_rate": 9.607095299939174e-06, + "loss": 4.0269, + "step": 129500 + }, + { + "epoch": 1.07, + "learning_rate": 9.605451347219255e-06, + "loss": 4.0204, + "step": 130000 + }, + { + "epoch": 1.07, + "learning_rate": 9.603807394499334e-06, + "loss": 4.0235, + "step": 130500 + }, + { + "epoch": 1.07, + "learning_rate": 9.602163441779415e-06, + "loss": 4.0276, + "step": 131000 + }, + { + "epoch": 1.08, + "learning_rate": 9.600519489059496e-06, + "loss": 4.0172, + "step": 131500 + }, + { + "epoch": 1.08, + "learning_rate": 9.598875536339575e-06, + "loss": 4.0097, + "step": 132000 + }, + { + "epoch": 1.09, + "learning_rate": 9.597231583619656e-06, + "loss": 4.0083, + "step": 132500 + }, + { + "epoch": 1.09, + "learning_rate": 9.595587630899736e-06, + "loss": 4.0063, + "step": 133000 + }, + { + "epoch": 1.09, + "learning_rate": 9.593943678179817e-06, + "loss": 4.0038, + "step": 133500 + }, + { + "epoch": 1.1, + "learning_rate": 9.592299725459898e-06, + "loss": 3.995, + "step": 134000 + }, + { + "epoch": 1.1, + "learning_rate": 9.590655772739977e-06, + "loss": 3.9927, + "step": 134500 + }, + { + "epoch": 1.11, + "learning_rate": 9.589011820020056e-06, + "loss": 3.9941, + "step": 135000 + }, + { + "epoch": 1.11, + "learning_rate": 9.587367867300137e-06, + "loss": 3.997, + "step": 135500 + }, + { + "epoch": 1.11, + "learning_rate": 9.585723914580216e-06, + "loss": 3.986, + "step": 136000 + }, + { + "epoch": 1.12, + "learning_rate": 9.584079961860297e-06, + "loss": 3.9807, + "step": 136500 + }, + { + "epoch": 1.12, + "learning_rate": 9.582436009140378e-06, + "loss": 3.9846, + "step": 137000 + }, + { + "epoch": 1.13, + "learning_rate": 9.580792056420458e-06, + "loss": 3.9712, + "step": 137500 + }, + { + "epoch": 1.13, + "learning_rate": 9.579148103700539e-06, + "loss": 3.9697, + "step": 138000 + }, + { + "epoch": 1.13, + "learning_rate": 9.57750415098062e-06, + "loss": 3.9665, + "step": 138500 + }, + { + "epoch": 1.14, + "learning_rate": 9.575860198260699e-06, + "loss": 3.9676, + "step": 139000 + }, + { + "epoch": 1.14, + "learning_rate": 9.57421624554078e-06, + "loss": 3.9659, + "step": 139500 + }, + { + "epoch": 1.15, + "learning_rate": 9.572572292820859e-06, + "loss": 3.9657, + "step": 140000 + }, + { + "epoch": 1.15, + "learning_rate": 9.570928340100938e-06, + "loss": 3.9616, + "step": 140500 + }, + { + "epoch": 1.16, + "learning_rate": 9.56928438738102e-06, + "loss": 3.95, + "step": 141000 + }, + { + "epoch": 1.16, + "learning_rate": 9.5676404346611e-06, + "loss": 3.9508, + "step": 141500 + }, + { + "epoch": 1.16, + "learning_rate": 9.56599648194118e-06, + "loss": 3.9479, + "step": 142000 + }, + { + "epoch": 1.17, + "learning_rate": 9.56435252922126e-06, + "loss": 3.9495, + "step": 142500 + }, + { + "epoch": 1.17, + "learning_rate": 9.56270857650134e-06, + "loss": 3.9443, + "step": 143000 + }, + { + "epoch": 1.18, + "learning_rate": 9.561064623781421e-06, + "loss": 3.9392, + "step": 143500 + }, + { + "epoch": 1.18, + "learning_rate": 9.559420671061502e-06, + "loss": 3.943, + "step": 144000 + }, + { + "epoch": 1.18, + "learning_rate": 9.557776718341581e-06, + "loss": 3.9397, + "step": 144500 + }, + { + "epoch": 1.19, + "learning_rate": 9.556132765621662e-06, + "loss": 3.9338, + "step": 145000 + }, + { + "epoch": 1.19, + "learning_rate": 9.554488812901741e-06, + "loss": 3.9284, + "step": 145500 + }, + { + "epoch": 1.2, + "learning_rate": 9.552844860181822e-06, + "loss": 3.9276, + "step": 146000 + }, + { + "epoch": 1.2, + "learning_rate": 9.551200907461902e-06, + "loss": 3.9332, + "step": 146500 + }, + { + "epoch": 1.2, + "learning_rate": 9.549556954741983e-06, + "loss": 3.9263, + "step": 147000 + }, + { + "epoch": 1.21, + "learning_rate": 9.547913002022062e-06, + "loss": 3.9198, + "step": 147500 + }, + { + "epoch": 1.21, + "learning_rate": 9.546269049302143e-06, + "loss": 3.9173, + "step": 148000 + }, + { + "epoch": 1.22, + "learning_rate": 9.544625096582222e-06, + "loss": 3.919, + "step": 148500 + }, + { + "epoch": 1.22, + "learning_rate": 9.542981143862303e-06, + "loss": 3.917, + "step": 149000 + }, + { + "epoch": 1.22, + "learning_rate": 9.541337191142384e-06, + "loss": 3.9189, + "step": 149500 + }, + { + "epoch": 1.23, + "learning_rate": 9.539693238422463e-06, + "loss": 3.9119, + "step": 150000 + }, + { + "epoch": 1.23, + "eval_accuracy": 0.3782837597418556, + "eval_loss": 3.7199175357818604, + "eval_runtime": 410.4253, + "eval_samples_per_second": 751.308, + "eval_steps_per_second": 15.654, + "step": 150000 + }, + { + "epoch": 1.23, + "learning_rate": 9.538049285702544e-06, + "loss": 3.9076, + "step": 150500 + }, + { + "epoch": 1.24, + "learning_rate": 9.536405332982625e-06, + "loss": 3.8992, + "step": 151000 + }, + { + "epoch": 1.24, + "learning_rate": 9.534761380262705e-06, + "loss": 3.8987, + "step": 151500 + }, + { + "epoch": 1.25, + "learning_rate": 9.533117427542786e-06, + "loss": 3.8942, + "step": 152000 + }, + { + "epoch": 1.25, + "learning_rate": 9.531473474822865e-06, + "loss": 3.9044, + "step": 152500 + }, + { + "epoch": 1.25, + "learning_rate": 9.529829522102944e-06, + "loss": 3.8889, + "step": 153000 + }, + { + "epoch": 1.26, + "learning_rate": 9.528185569383025e-06, + "loss": 3.8895, + "step": 153500 + }, + { + "epoch": 1.26, + "learning_rate": 9.526541616663106e-06, + "loss": 3.8905, + "step": 154000 + }, + { + "epoch": 1.27, + "learning_rate": 9.524897663943185e-06, + "loss": 3.8842, + "step": 154500 + }, + { + "epoch": 1.27, + "learning_rate": 9.523253711223266e-06, + "loss": 3.8843, + "step": 155000 + }, + { + "epoch": 1.27, + "learning_rate": 9.521609758503346e-06, + "loss": 3.8713, + "step": 155500 + }, + { + "epoch": 1.28, + "learning_rate": 9.519965805783427e-06, + "loss": 3.8828, + "step": 156000 + }, + { + "epoch": 1.28, + "learning_rate": 9.518321853063508e-06, + "loss": 3.8808, + "step": 156500 + }, + { + "epoch": 1.29, + "learning_rate": 9.516677900343587e-06, + "loss": 3.8747, + "step": 157000 + }, + { + "epoch": 1.29, + "learning_rate": 9.515033947623668e-06, + "loss": 3.8737, + "step": 157500 + }, + { + "epoch": 1.29, + "learning_rate": 9.513389994903747e-06, + "loss": 3.8709, + "step": 158000 + }, + { + "epoch": 1.3, + "learning_rate": 9.511746042183826e-06, + "loss": 3.8574, + "step": 158500 + }, + { + "epoch": 1.3, + "learning_rate": 9.510102089463907e-06, + "loss": 3.8695, + "step": 159000 + }, + { + "epoch": 1.31, + "learning_rate": 9.508458136743988e-06, + "loss": 3.8644, + "step": 159500 + }, + { + "epoch": 1.31, + "learning_rate": 9.506814184024068e-06, + "loss": 3.8627, + "step": 160000 + }, + { + "epoch": 1.31, + "learning_rate": 9.505170231304149e-06, + "loss": 3.8545, + "step": 160500 + }, + { + "epoch": 1.32, + "learning_rate": 9.503526278584228e-06, + "loss": 3.852, + "step": 161000 + }, + { + "epoch": 1.32, + "learning_rate": 9.501882325864309e-06, + "loss": 3.8451, + "step": 161500 + }, + { + "epoch": 1.33, + "learning_rate": 9.50023837314439e-06, + "loss": 3.8514, + "step": 162000 + }, + { + "epoch": 1.33, + "learning_rate": 9.498594420424469e-06, + "loss": 3.8516, + "step": 162500 + }, + { + "epoch": 1.34, + "learning_rate": 9.49695046770455e-06, + "loss": 3.8459, + "step": 163000 + }, + { + "epoch": 1.34, + "learning_rate": 9.495306514984631e-06, + "loss": 3.8456, + "step": 163500 + }, + { + "epoch": 1.34, + "learning_rate": 9.49366256226471e-06, + "loss": 3.8393, + "step": 164000 + }, + { + "epoch": 1.35, + "learning_rate": 9.492018609544791e-06, + "loss": 3.8407, + "step": 164500 + }, + { + "epoch": 1.35, + "learning_rate": 9.49037465682487e-06, + "loss": 3.8337, + "step": 165000 + }, + { + "epoch": 1.36, + "learning_rate": 9.48873070410495e-06, + "loss": 3.8352, + "step": 165500 + }, + { + "epoch": 1.36, + "learning_rate": 9.48708675138503e-06, + "loss": 3.8305, + "step": 166000 + }, + { + "epoch": 1.36, + "learning_rate": 9.485442798665112e-06, + "loss": 3.8327, + "step": 166500 + }, + { + "epoch": 1.37, + "learning_rate": 9.483798845945191e-06, + "loss": 3.8242, + "step": 167000 + }, + { + "epoch": 1.37, + "learning_rate": 9.482154893225272e-06, + "loss": 3.8248, + "step": 167500 + }, + { + "epoch": 1.38, + "learning_rate": 9.480510940505351e-06, + "loss": 3.8208, + "step": 168000 + }, + { + "epoch": 1.38, + "learning_rate": 9.478866987785432e-06, + "loss": 3.8195, + "step": 168500 + }, + { + "epoch": 1.38, + "learning_rate": 9.477223035065513e-06, + "loss": 3.8262, + "step": 169000 + }, + { + "epoch": 1.39, + "learning_rate": 9.475579082345593e-06, + "loss": 3.8205, + "step": 169500 + }, + { + "epoch": 1.39, + "learning_rate": 9.473935129625673e-06, + "loss": 3.823, + "step": 170000 + }, + { + "epoch": 1.4, + "learning_rate": 9.472291176905753e-06, + "loss": 3.8127, + "step": 170500 + }, + { + "epoch": 1.4, + "learning_rate": 9.470647224185832e-06, + "loss": 3.813, + "step": 171000 + }, + { + "epoch": 1.41, + "learning_rate": 9.469003271465913e-06, + "loss": 3.8126, + "step": 171500 + }, + { + "epoch": 1.41, + "learning_rate": 9.467359318745994e-06, + "loss": 3.808, + "step": 172000 + }, + { + "epoch": 1.41, + "learning_rate": 9.465715366026073e-06, + "loss": 3.8051, + "step": 172500 + }, + { + "epoch": 1.42, + "learning_rate": 9.464071413306154e-06, + "loss": 3.8038, + "step": 173000 + }, + { + "epoch": 1.42, + "learning_rate": 9.462427460586234e-06, + "loss": 3.7961, + "step": 173500 + }, + { + "epoch": 1.43, + "learning_rate": 9.460783507866314e-06, + "loss": 3.7979, + "step": 174000 + }, + { + "epoch": 1.43, + "learning_rate": 9.459139555146395e-06, + "loss": 3.795, + "step": 174500 + }, + { + "epoch": 1.43, + "learning_rate": 9.457495602426475e-06, + "loss": 3.8004, + "step": 175000 + }, + { + "epoch": 1.44, + "learning_rate": 9.455851649706556e-06, + "loss": 3.7958, + "step": 175500 + }, + { + "epoch": 1.44, + "learning_rate": 9.454207696986637e-06, + "loss": 3.7909, + "step": 176000 + }, + { + "epoch": 1.45, + "learning_rate": 9.452563744266716e-06, + "loss": 3.8, + "step": 176500 + }, + { + "epoch": 1.45, + "learning_rate": 9.450919791546795e-06, + "loss": 3.7826, + "step": 177000 + }, + { + "epoch": 1.45, + "learning_rate": 9.449275838826876e-06, + "loss": 3.7857, + "step": 177500 + }, + { + "epoch": 1.46, + "learning_rate": 9.447631886106956e-06, + "loss": 3.7867, + "step": 178000 + }, + { + "epoch": 1.46, + "learning_rate": 9.445987933387036e-06, + "loss": 3.7935, + "step": 178500 + }, + { + "epoch": 1.47, + "learning_rate": 9.444343980667117e-06, + "loss": 3.7803, + "step": 179000 + }, + { + "epoch": 1.47, + "learning_rate": 9.442700027947197e-06, + "loss": 3.782, + "step": 179500 + }, + { + "epoch": 1.47, + "learning_rate": 9.441056075227278e-06, + "loss": 3.7832, + "step": 180000 + }, + { + "epoch": 1.47, + "eval_accuracy": 0.39196367058721415, + "eval_loss": 3.6038873195648193, + "eval_runtime": 407.692, + "eval_samples_per_second": 756.345, + "eval_steps_per_second": 15.759, + "step": 180000 + }, + { + "epoch": 1.48, + "learning_rate": 9.439412122507357e-06, + "loss": 3.775, + "step": 180500 + }, + { + "epoch": 1.48, + "learning_rate": 9.437768169787438e-06, + "loss": 3.7777, + "step": 181000 + }, + { + "epoch": 1.49, + "learning_rate": 9.436124217067519e-06, + "loss": 3.769, + "step": 181500 + }, + { + "epoch": 1.49, + "learning_rate": 9.434480264347598e-06, + "loss": 3.7712, + "step": 182000 + }, + { + "epoch": 1.5, + "learning_rate": 9.43283631162768e-06, + "loss": 3.7638, + "step": 182500 + }, + { + "epoch": 1.5, + "learning_rate": 9.431192358907758e-06, + "loss": 3.7686, + "step": 183000 + }, + { + "epoch": 1.5, + "learning_rate": 9.429548406187838e-06, + "loss": 3.7652, + "step": 183500 + }, + { + "epoch": 1.51, + "learning_rate": 9.427904453467919e-06, + "loss": 3.7649, + "step": 184000 + }, + { + "epoch": 1.51, + "learning_rate": 9.426260500748e-06, + "loss": 3.7608, + "step": 184500 + }, + { + "epoch": 1.52, + "learning_rate": 9.424616548028079e-06, + "loss": 3.762, + "step": 185000 + }, + { + "epoch": 1.52, + "learning_rate": 9.42297259530816e-06, + "loss": 3.7551, + "step": 185500 + }, + { + "epoch": 1.52, + "learning_rate": 9.42132864258824e-06, + "loss": 3.7545, + "step": 186000 + }, + { + "epoch": 1.53, + "learning_rate": 9.41968468986832e-06, + "loss": 3.7556, + "step": 186500 + }, + { + "epoch": 1.53, + "learning_rate": 9.418040737148401e-06, + "loss": 3.7507, + "step": 187000 + }, + { + "epoch": 1.54, + "learning_rate": 9.41639678442848e-06, + "loss": 3.7467, + "step": 187500 + }, + { + "epoch": 1.54, + "learning_rate": 9.414752831708561e-06, + "loss": 3.7507, + "step": 188000 + }, + { + "epoch": 1.54, + "learning_rate": 9.413108878988642e-06, + "loss": 3.7384, + "step": 188500 + }, + { + "epoch": 1.55, + "learning_rate": 9.41146492626872e-06, + "loss": 3.7473, + "step": 189000 + }, + { + "epoch": 1.55, + "learning_rate": 9.409820973548801e-06, + "loss": 3.7429, + "step": 189500 + }, + { + "epoch": 1.56, + "learning_rate": 9.408177020828882e-06, + "loss": 3.7427, + "step": 190000 + }, + { + "epoch": 1.56, + "learning_rate": 9.406533068108961e-06, + "loss": 3.74, + "step": 190500 + }, + { + "epoch": 1.56, + "learning_rate": 9.404889115389042e-06, + "loss": 3.7361, + "step": 191000 + }, + { + "epoch": 1.57, + "learning_rate": 9.403245162669123e-06, + "loss": 3.7427, + "step": 191500 + }, + { + "epoch": 1.57, + "learning_rate": 9.401601209949202e-06, + "loss": 3.7388, + "step": 192000 + }, + { + "epoch": 1.58, + "learning_rate": 9.399957257229283e-06, + "loss": 3.7349, + "step": 192500 + }, + { + "epoch": 1.58, + "learning_rate": 9.398313304509363e-06, + "loss": 3.7302, + "step": 193000 + }, + { + "epoch": 1.59, + "learning_rate": 9.396669351789444e-06, + "loss": 3.7408, + "step": 193500 + }, + { + "epoch": 1.59, + "learning_rate": 9.395025399069525e-06, + "loss": 3.7239, + "step": 194000 + }, + { + "epoch": 1.59, + "learning_rate": 9.393381446349604e-06, + "loss": 3.7258, + "step": 194500 + }, + { + "epoch": 1.6, + "learning_rate": 9.391737493629683e-06, + "loss": 3.7149, + "step": 195000 + }, + { + "epoch": 1.6, + "learning_rate": 9.390093540909764e-06, + "loss": 3.7213, + "step": 195500 + }, + { + "epoch": 1.61, + "learning_rate": 9.388449588189843e-06, + "loss": 3.7218, + "step": 196000 + }, + { + "epoch": 1.61, + "learning_rate": 9.386805635469924e-06, + "loss": 3.7175, + "step": 196500 + }, + { + "epoch": 1.61, + "learning_rate": 9.385161682750005e-06, + "loss": 3.7088, + "step": 197000 + }, + { + "epoch": 1.62, + "learning_rate": 9.383517730030085e-06, + "loss": 3.7147, + "step": 197500 + }, + { + "epoch": 1.62, + "learning_rate": 9.381873777310166e-06, + "loss": 3.7194, + "step": 198000 + }, + { + "epoch": 1.63, + "learning_rate": 9.380229824590245e-06, + "loss": 3.7048, + "step": 198500 + }, + { + "epoch": 1.63, + "learning_rate": 9.378585871870326e-06, + "loss": 3.7022, + "step": 199000 + }, + { + "epoch": 1.63, + "learning_rate": 9.376941919150407e-06, + "loss": 3.6975, + "step": 199500 + }, + { + "epoch": 1.64, + "learning_rate": 9.375297966430486e-06, + "loss": 3.6994, + "step": 200000 + }, + { + "epoch": 1.64, + "learning_rate": 9.373654013710567e-06, + "loss": 3.6963, + "step": 200500 + }, + { + "epoch": 1.65, + "learning_rate": 9.372010060990646e-06, + "loss": 3.6973, + "step": 201000 + }, + { + "epoch": 1.65, + "learning_rate": 9.370366108270726e-06, + "loss": 3.6949, + "step": 201500 + }, + { + "epoch": 1.65, + "learning_rate": 9.368722155550807e-06, + "loss": 3.6867, + "step": 202000 + }, + { + "epoch": 1.66, + "learning_rate": 9.367078202830888e-06, + "loss": 3.6865, + "step": 202500 + }, + { + "epoch": 1.66, + "learning_rate": 9.365434250110967e-06, + "loss": 3.6862, + "step": 203000 + }, + { + "epoch": 1.67, + "learning_rate": 9.363790297391048e-06, + "loss": 3.682, + "step": 203500 + }, + { + "epoch": 1.67, + "learning_rate": 9.362146344671129e-06, + "loss": 3.6823, + "step": 204000 + }, + { + "epoch": 1.68, + "learning_rate": 9.360502391951208e-06, + "loss": 3.6839, + "step": 204500 + }, + { + "epoch": 1.68, + "learning_rate": 9.358858439231289e-06, + "loss": 3.6768, + "step": 205000 + }, + { + "epoch": 1.68, + "learning_rate": 9.357214486511368e-06, + "loss": 3.6763, + "step": 205500 + }, + { + "epoch": 1.69, + "learning_rate": 9.35557053379145e-06, + "loss": 3.6763, + "step": 206000 + }, + { + "epoch": 1.69, + "learning_rate": 9.35392658107153e-06, + "loss": 3.6788, + "step": 206500 + }, + { + "epoch": 1.7, + "learning_rate": 9.35228262835161e-06, + "loss": 3.6731, + "step": 207000 + }, + { + "epoch": 1.7, + "learning_rate": 9.350638675631689e-06, + "loss": 3.6703, + "step": 207500 + }, + { + "epoch": 1.7, + "learning_rate": 9.34899472291177e-06, + "loss": 3.6735, + "step": 208000 + }, + { + "epoch": 1.71, + "learning_rate": 9.347350770191849e-06, + "loss": 3.6659, + "step": 208500 + }, + { + "epoch": 1.71, + "learning_rate": 9.34570681747193e-06, + "loss": 3.6618, + "step": 209000 + }, + { + "epoch": 1.72, + "learning_rate": 9.344062864752011e-06, + "loss": 3.66, + "step": 209500 + }, + { + "epoch": 1.72, + "learning_rate": 9.34241891203209e-06, + "loss": 3.6686, + "step": 210000 + }, + { + "epoch": 1.72, + "eval_accuracy": 0.4032999985682802, + "eval_loss": 3.505682945251465, + "eval_runtime": 407.8472, + "eval_samples_per_second": 756.058, + "eval_steps_per_second": 15.753, + "step": 210000 + }, + { + "epoch": 1.72, + "learning_rate": 9.340774959312171e-06, + "loss": 3.6608, + "step": 210500 + }, + { + "epoch": 1.73, + "learning_rate": 9.33913100659225e-06, + "loss": 3.6528, + "step": 211000 + }, + { + "epoch": 1.73, + "learning_rate": 9.337487053872332e-06, + "loss": 3.6607, + "step": 211500 + }, + { + "epoch": 1.74, + "learning_rate": 9.335843101152413e-06, + "loss": 3.6561, + "step": 212000 + }, + { + "epoch": 1.74, + "learning_rate": 9.334199148432492e-06, + "loss": 3.6495, + "step": 212500 + }, + { + "epoch": 1.75, + "learning_rate": 9.332555195712573e-06, + "loss": 3.6438, + "step": 213000 + }, + { + "epoch": 1.75, + "learning_rate": 9.330911242992652e-06, + "loss": 3.6483, + "step": 213500 + }, + { + "epoch": 1.75, + "learning_rate": 9.329267290272731e-06, + "loss": 3.6504, + "step": 214000 + }, + { + "epoch": 1.76, + "learning_rate": 9.327623337552812e-06, + "loss": 3.6394, + "step": 214500 + }, + { + "epoch": 1.76, + "learning_rate": 9.325979384832893e-06, + "loss": 3.6413, + "step": 215000 + }, + { + "epoch": 1.77, + "learning_rate": 9.324335432112973e-06, + "loss": 3.6425, + "step": 215500 + }, + { + "epoch": 1.77, + "learning_rate": 9.322691479393054e-06, + "loss": 3.6369, + "step": 216000 + }, + { + "epoch": 1.77, + "learning_rate": 9.321047526673135e-06, + "loss": 3.6366, + "step": 216500 + }, + { + "epoch": 1.78, + "learning_rate": 9.319403573953214e-06, + "loss": 3.637, + "step": 217000 + }, + { + "epoch": 1.78, + "learning_rate": 9.317759621233295e-06, + "loss": 3.638, + "step": 217500 + }, + { + "epoch": 1.79, + "learning_rate": 9.316115668513374e-06, + "loss": 3.6389, + "step": 218000 + }, + { + "epoch": 1.79, + "learning_rate": 9.314471715793455e-06, + "loss": 3.6397, + "step": 218500 + }, + { + "epoch": 1.79, + "learning_rate": 9.312827763073536e-06, + "loss": 3.6262, + "step": 219000 + }, + { + "epoch": 1.8, + "learning_rate": 9.311183810353615e-06, + "loss": 3.6296, + "step": 219500 + }, + { + "epoch": 1.8, + "learning_rate": 9.309539857633695e-06, + "loss": 3.6228, + "step": 220000 + }, + { + "epoch": 1.81, + "learning_rate": 9.307895904913776e-06, + "loss": 3.6257, + "step": 220500 + }, + { + "epoch": 1.81, + "learning_rate": 9.306251952193855e-06, + "loss": 3.6242, + "step": 221000 + }, + { + "epoch": 1.81, + "learning_rate": 9.304607999473936e-06, + "loss": 3.6263, + "step": 221500 + }, + { + "epoch": 1.82, + "learning_rate": 9.302964046754017e-06, + "loss": 3.6204, + "step": 222000 + }, + { + "epoch": 1.82, + "learning_rate": 9.301320094034096e-06, + "loss": 3.6092, + "step": 222500 + }, + { + "epoch": 1.83, + "learning_rate": 9.299676141314177e-06, + "loss": 3.6146, + "step": 223000 + }, + { + "epoch": 1.83, + "learning_rate": 9.298032188594256e-06, + "loss": 3.6233, + "step": 223500 + }, + { + "epoch": 1.84, + "learning_rate": 9.296388235874337e-06, + "loss": 3.62, + "step": 224000 + }, + { + "epoch": 1.84, + "learning_rate": 9.294744283154418e-06, + "loss": 3.6191, + "step": 224500 + }, + { + "epoch": 1.84, + "learning_rate": 9.293100330434498e-06, + "loss": 3.616, + "step": 225000 + }, + { + "epoch": 1.85, + "learning_rate": 9.291456377714577e-06, + "loss": 3.6147, + "step": 225500 + }, + { + "epoch": 1.85, + "learning_rate": 9.289812424994658e-06, + "loss": 3.6147, + "step": 226000 + }, + { + "epoch": 1.86, + "learning_rate": 9.288168472274737e-06, + "loss": 3.6144, + "step": 226500 + }, + { + "epoch": 1.86, + "learning_rate": 9.286524519554818e-06, + "loss": 3.6047, + "step": 227000 + }, + { + "epoch": 1.86, + "learning_rate": 9.284880566834899e-06, + "loss": 3.6106, + "step": 227500 + }, + { + "epoch": 1.87, + "learning_rate": 9.283236614114978e-06, + "loss": 3.6059, + "step": 228000 + }, + { + "epoch": 1.87, + "learning_rate": 9.28159266139506e-06, + "loss": 3.6122, + "step": 228500 + }, + { + "epoch": 1.88, + "learning_rate": 9.279948708675139e-06, + "loss": 3.6052, + "step": 229000 + }, + { + "epoch": 1.88, + "learning_rate": 9.27830475595522e-06, + "loss": 3.6047, + "step": 229500 + }, + { + "epoch": 1.88, + "learning_rate": 9.2766608032353e-06, + "loss": 3.6021, + "step": 230000 + }, + { + "epoch": 1.89, + "learning_rate": 9.27501685051538e-06, + "loss": 3.6023, + "step": 230500 + }, + { + "epoch": 1.89, + "learning_rate": 9.27337289779546e-06, + "loss": 3.6019, + "step": 231000 + }, + { + "epoch": 1.9, + "learning_rate": 9.27172894507554e-06, + "loss": 3.595, + "step": 231500 + }, + { + "epoch": 1.9, + "learning_rate": 9.270084992355621e-06, + "loss": 3.5902, + "step": 232000 + }, + { + "epoch": 1.9, + "learning_rate": 9.2684410396357e-06, + "loss": 3.5964, + "step": 232500 + }, + { + "epoch": 1.91, + "learning_rate": 9.266797086915781e-06, + "loss": 3.5953, + "step": 233000 + }, + { + "epoch": 1.91, + "learning_rate": 9.26515313419586e-06, + "loss": 3.5934, + "step": 233500 + }, + { + "epoch": 1.92, + "learning_rate": 9.263509181475942e-06, + "loss": 3.5851, + "step": 234000 + }, + { + "epoch": 1.92, + "learning_rate": 9.261865228756023e-06, + "loss": 3.5861, + "step": 234500 + }, + { + "epoch": 1.93, + "learning_rate": 9.260221276036102e-06, + "loss": 3.5849, + "step": 235000 + }, + { + "epoch": 1.93, + "learning_rate": 9.258577323316183e-06, + "loss": 3.5883, + "step": 235500 + }, + { + "epoch": 1.93, + "learning_rate": 9.256933370596262e-06, + "loss": 3.5822, + "step": 236000 + }, + { + "epoch": 1.94, + "learning_rate": 9.255289417876343e-06, + "loss": 3.5742, + "step": 236500 + }, + { + "epoch": 1.94, + "learning_rate": 9.253645465156424e-06, + "loss": 3.5809, + "step": 237000 + }, + { + "epoch": 1.95, + "learning_rate": 9.252001512436503e-06, + "loss": 3.5805, + "step": 237500 + }, + { + "epoch": 1.95, + "learning_rate": 9.250357559716583e-06, + "loss": 3.5813, + "step": 238000 + }, + { + "epoch": 1.95, + "learning_rate": 9.248713606996664e-06, + "loss": 3.5793, + "step": 238500 + }, + { + "epoch": 1.96, + "learning_rate": 9.247069654276743e-06, + "loss": 3.5819, + "step": 239000 + }, + { + "epoch": 1.96, + "learning_rate": 9.245425701556824e-06, + "loss": 3.5771, + "step": 239500 + }, + { + "epoch": 1.97, + "learning_rate": 9.243781748836905e-06, + "loss": 3.5793, + "step": 240000 + }, + { + "epoch": 1.97, + "eval_accuracy": 0.4137328604632164, + "eval_loss": 3.4226527214050293, + "eval_runtime": 405.6879, + "eval_samples_per_second": 760.082, + "eval_steps_per_second": 15.837, + "step": 240000 + }, + { + "epoch": 1.97, + "learning_rate": 9.242137796116984e-06, + "loss": 3.5746, + "step": 240500 + }, + { + "epoch": 1.97, + "learning_rate": 9.240493843397065e-06, + "loss": 3.571, + "step": 241000 + }, + { + "epoch": 1.98, + "learning_rate": 9.238849890677144e-06, + "loss": 3.5759, + "step": 241500 + }, + { + "epoch": 1.98, + "learning_rate": 9.237205937957225e-06, + "loss": 3.5681, + "step": 242000 + }, + { + "epoch": 1.99, + "learning_rate": 9.235561985237306e-06, + "loss": 3.5718, + "step": 242500 + }, + { + "epoch": 1.99, + "learning_rate": 9.233918032517386e-06, + "loss": 3.5656, + "step": 243000 + }, + { + "epoch": 1.99, + "learning_rate": 9.232274079797465e-06, + "loss": 3.5635, + "step": 243500 + }, + { + "epoch": 2.0, + "learning_rate": 9.230630127077546e-06, + "loss": 3.5619, + "step": 244000 + }, + { + "epoch": 2.0, + "learning_rate": 9.228986174357627e-06, + "loss": 3.5596, + "step": 244500 + }, + { + "epoch": 2.01, + "learning_rate": 9.227342221637706e-06, + "loss": 3.5599, + "step": 245000 + }, + { + "epoch": 2.01, + "learning_rate": 9.225698268917787e-06, + "loss": 3.57, + "step": 245500 + }, + { + "epoch": 2.02, + "learning_rate": 9.224054316197866e-06, + "loss": 3.5608, + "step": 246000 + }, + { + "epoch": 2.02, + "learning_rate": 9.222410363477947e-06, + "loss": 3.5601, + "step": 246500 + }, + { + "epoch": 2.02, + "learning_rate": 9.220766410758028e-06, + "loss": 3.562, + "step": 247000 + }, + { + "epoch": 2.03, + "learning_rate": 9.219122458038107e-06, + "loss": 3.5569, + "step": 247500 + }, + { + "epoch": 2.03, + "learning_rate": 9.217478505318188e-06, + "loss": 3.5563, + "step": 248000 + }, + { + "epoch": 2.04, + "learning_rate": 9.215834552598268e-06, + "loss": 3.5634, + "step": 248500 + }, + { + "epoch": 2.04, + "learning_rate": 9.214190599878349e-06, + "loss": 3.55, + "step": 249000 + }, + { + "epoch": 2.04, + "learning_rate": 9.212546647158428e-06, + "loss": 3.5459, + "step": 249500 + }, + { + "epoch": 2.05, + "learning_rate": 9.210902694438509e-06, + "loss": 3.5478, + "step": 250000 + }, + { + "epoch": 2.05, + "learning_rate": 9.209258741718588e-06, + "loss": 3.5526, + "step": 250500 + }, + { + "epoch": 2.06, + "learning_rate": 9.20761478899867e-06, + "loss": 3.5476, + "step": 251000 + }, + { + "epoch": 2.06, + "learning_rate": 9.205970836278748e-06, + "loss": 3.5567, + "step": 251500 + }, + { + "epoch": 2.06, + "learning_rate": 9.20432688355883e-06, + "loss": 3.5423, + "step": 252000 + }, + { + "epoch": 2.07, + "learning_rate": 9.20268293083891e-06, + "loss": 3.5487, + "step": 252500 + }, + { + "epoch": 2.07, + "learning_rate": 9.20103897811899e-06, + "loss": 3.5401, + "step": 253000 + }, + { + "epoch": 2.08, + "learning_rate": 9.19939502539907e-06, + "loss": 3.5503, + "step": 253500 + }, + { + "epoch": 2.08, + "learning_rate": 9.19775107267915e-06, + "loss": 3.5503, + "step": 254000 + }, + { + "epoch": 2.09, + "learning_rate": 9.196107119959231e-06, + "loss": 3.5316, + "step": 254500 + }, + { + "epoch": 2.09, + "learning_rate": 9.194463167239312e-06, + "loss": 3.541, + "step": 255000 + }, + { + "epoch": 2.09, + "learning_rate": 9.192819214519391e-06, + "loss": 3.5368, + "step": 255500 + }, + { + "epoch": 2.1, + "learning_rate": 9.19117526179947e-06, + "loss": 3.5351, + "step": 256000 + }, + { + "epoch": 2.1, + "learning_rate": 9.189531309079551e-06, + "loss": 3.537, + "step": 256500 + }, + { + "epoch": 2.11, + "learning_rate": 9.18788735635963e-06, + "loss": 3.536, + "step": 257000 + }, + { + "epoch": 2.11, + "learning_rate": 9.186243403639712e-06, + "loss": 3.5331, + "step": 257500 + }, + { + "epoch": 2.11, + "learning_rate": 9.184599450919793e-06, + "loss": 3.5408, + "step": 258000 + }, + { + "epoch": 2.12, + "learning_rate": 9.182955498199872e-06, + "loss": 3.5391, + "step": 258500 + }, + { + "epoch": 2.12, + "learning_rate": 9.181311545479953e-06, + "loss": 3.5339, + "step": 259000 + }, + { + "epoch": 2.13, + "learning_rate": 9.179667592760034e-06, + "loss": 3.535, + "step": 259500 + }, + { + "epoch": 2.13, + "learning_rate": 9.178023640040113e-06, + "loss": 3.5261, + "step": 260000 + }, + { + "epoch": 2.13, + "learning_rate": 9.176379687320194e-06, + "loss": 3.5266, + "step": 260500 + }, + { + "epoch": 2.14, + "learning_rate": 9.174735734600273e-06, + "loss": 3.5283, + "step": 261000 + }, + { + "epoch": 2.14, + "learning_rate": 9.173091781880353e-06, + "loss": 3.5348, + "step": 261500 + }, + { + "epoch": 2.15, + "learning_rate": 9.171447829160434e-06, + "loss": 3.5231, + "step": 262000 + }, + { + "epoch": 2.15, + "learning_rate": 9.169803876440515e-06, + "loss": 3.5138, + "step": 262500 + }, + { + "epoch": 2.15, + "learning_rate": 9.168159923720594e-06, + "loss": 3.5306, + "step": 263000 + }, + { + "epoch": 2.16, + "learning_rate": 9.166515971000675e-06, + "loss": 3.5224, + "step": 263500 + }, + { + "epoch": 2.16, + "learning_rate": 9.164872018280754e-06, + "loss": 3.5279, + "step": 264000 + }, + { + "epoch": 2.17, + "learning_rate": 9.163228065560835e-06, + "loss": 3.5207, + "step": 264500 + }, + { + "epoch": 2.17, + "learning_rate": 9.161584112840916e-06, + "loss": 3.5213, + "step": 265000 + }, + { + "epoch": 2.18, + "learning_rate": 9.159940160120995e-06, + "loss": 3.5152, + "step": 265500 + }, + { + "epoch": 2.18, + "learning_rate": 9.158296207401076e-06, + "loss": 3.5148, + "step": 266000 + }, + { + "epoch": 2.18, + "learning_rate": 9.156652254681156e-06, + "loss": 3.5185, + "step": 266500 + }, + { + "epoch": 2.19, + "learning_rate": 9.155008301961237e-06, + "loss": 3.5155, + "step": 267000 + }, + { + "epoch": 2.19, + "learning_rate": 9.153364349241318e-06, + "loss": 3.5192, + "step": 267500 + }, + { + "epoch": 2.2, + "learning_rate": 9.151720396521397e-06, + "loss": 3.5185, + "step": 268000 + }, + { + "epoch": 2.2, + "learning_rate": 9.150076443801476e-06, + "loss": 3.518, + "step": 268500 + }, + { + "epoch": 2.2, + "learning_rate": 9.148432491081557e-06, + "loss": 3.5159, + "step": 269000 + }, + { + "epoch": 2.21, + "learning_rate": 9.146788538361636e-06, + "loss": 3.5211, + "step": 269500 + }, + { + "epoch": 2.21, + "learning_rate": 9.145144585641717e-06, + "loss": 3.5128, + "step": 270000 + }, + { + "epoch": 2.21, + "eval_accuracy": 0.42085813479275763, + "eval_loss": 3.364504814147949, + "eval_runtime": 411.9164, + "eval_samples_per_second": 748.589, + "eval_steps_per_second": 15.598, + "step": 270000 + }, + { + "epoch": 2.22, + "learning_rate": 9.143500632921798e-06, + "loss": 3.5162, + "step": 270500 + }, + { + "epoch": 2.22, + "learning_rate": 9.141856680201878e-06, + "loss": 3.5144, + "step": 271000 + }, + { + "epoch": 2.22, + "learning_rate": 9.140212727481959e-06, + "loss": 3.5095, + "step": 271500 + }, + { + "epoch": 2.23, + "learning_rate": 9.13856877476204e-06, + "loss": 3.502, + "step": 272000 + }, + { + "epoch": 2.23, + "learning_rate": 9.136924822042119e-06, + "loss": 3.503, + "step": 272500 + }, + { + "epoch": 2.24, + "learning_rate": 9.1352808693222e-06, + "loss": 3.5011, + "step": 273000 + }, + { + "epoch": 2.24, + "learning_rate": 9.133636916602279e-06, + "loss": 3.505, + "step": 273500 + }, + { + "epoch": 2.24, + "learning_rate": 9.131992963882358e-06, + "loss": 3.5071, + "step": 274000 + }, + { + "epoch": 2.25, + "learning_rate": 9.13034901116244e-06, + "loss": 3.5018, + "step": 274500 + }, + { + "epoch": 2.25, + "learning_rate": 9.12870505844252e-06, + "loss": 3.5058, + "step": 275000 + }, + { + "epoch": 2.26, + "learning_rate": 9.1270611057226e-06, + "loss": 3.5056, + "step": 275500 + }, + { + "epoch": 2.26, + "learning_rate": 9.12541715300268e-06, + "loss": 3.5085, + "step": 276000 + }, + { + "epoch": 2.27, + "learning_rate": 9.12377320028276e-06, + "loss": 3.5111, + "step": 276500 + }, + { + "epoch": 2.27, + "learning_rate": 9.122129247562841e-06, + "loss": 3.4963, + "step": 277000 + }, + { + "epoch": 2.27, + "learning_rate": 9.120485294842922e-06, + "loss": 3.5001, + "step": 277500 + }, + { + "epoch": 2.28, + "learning_rate": 9.118841342123001e-06, + "loss": 3.5004, + "step": 278000 + }, + { + "epoch": 2.28, + "learning_rate": 9.117197389403082e-06, + "loss": 3.5027, + "step": 278500 + }, + { + "epoch": 2.29, + "learning_rate": 9.115553436683161e-06, + "loss": 3.4984, + "step": 279000 + }, + { + "epoch": 2.29, + "learning_rate": 9.113909483963242e-06, + "loss": 3.5006, + "step": 279500 + }, + { + "epoch": 2.29, + "learning_rate": 9.112265531243322e-06, + "loss": 3.4952, + "step": 280000 + }, + { + "epoch": 2.3, + "learning_rate": 9.110621578523403e-06, + "loss": 3.4953, + "step": 280500 + }, + { + "epoch": 2.3, + "learning_rate": 9.108977625803482e-06, + "loss": 3.4951, + "step": 281000 + }, + { + "epoch": 2.31, + "learning_rate": 9.107333673083563e-06, + "loss": 3.4984, + "step": 281500 + }, + { + "epoch": 2.31, + "learning_rate": 9.105689720363642e-06, + "loss": 3.4849, + "step": 282000 + }, + { + "epoch": 2.31, + "learning_rate": 9.104045767643723e-06, + "loss": 3.4901, + "step": 282500 + }, + { + "epoch": 2.32, + "learning_rate": 9.102401814923804e-06, + "loss": 3.4884, + "step": 283000 + }, + { + "epoch": 2.32, + "learning_rate": 9.100757862203883e-06, + "loss": 3.4937, + "step": 283500 + }, + { + "epoch": 2.33, + "learning_rate": 9.099113909483964e-06, + "loss": 3.4915, + "step": 284000 + }, + { + "epoch": 2.33, + "learning_rate": 9.097469956764045e-06, + "loss": 3.4922, + "step": 284500 + }, + { + "epoch": 2.33, + "learning_rate": 9.095826004044125e-06, + "loss": 3.4892, + "step": 285000 + }, + { + "epoch": 2.34, + "learning_rate": 9.094182051324206e-06, + "loss": 3.4837, + "step": 285500 + }, + { + "epoch": 2.34, + "learning_rate": 9.092538098604285e-06, + "loss": 3.4821, + "step": 286000 + }, + { + "epoch": 2.35, + "learning_rate": 9.090894145884364e-06, + "loss": 3.4836, + "step": 286500 + }, + { + "epoch": 2.35, + "learning_rate": 9.089250193164445e-06, + "loss": 3.4826, + "step": 287000 + }, + { + "epoch": 2.36, + "learning_rate": 9.087606240444526e-06, + "loss": 3.477, + "step": 287500 + }, + { + "epoch": 2.36, + "learning_rate": 9.085962287724605e-06, + "loss": 3.485, + "step": 288000 + }, + { + "epoch": 2.36, + "learning_rate": 9.084318335004686e-06, + "loss": 3.4778, + "step": 288500 + }, + { + "epoch": 2.37, + "learning_rate": 9.082674382284766e-06, + "loss": 3.49, + "step": 289000 + }, + { + "epoch": 2.37, + "learning_rate": 9.081030429564847e-06, + "loss": 3.4814, + "step": 289500 + }, + { + "epoch": 2.38, + "learning_rate": 9.079386476844928e-06, + "loss": 3.4752, + "step": 290000 + }, + { + "epoch": 2.38, + "learning_rate": 9.077742524125007e-06, + "loss": 3.4825, + "step": 290500 + }, + { + "epoch": 2.38, + "learning_rate": 9.076098571405088e-06, + "loss": 3.4784, + "step": 291000 + }, + { + "epoch": 2.39, + "learning_rate": 9.074454618685167e-06, + "loss": 3.4804, + "step": 291500 + }, + { + "epoch": 2.39, + "learning_rate": 9.072810665965246e-06, + "loss": 3.4815, + "step": 292000 + }, + { + "epoch": 2.4, + "learning_rate": 9.071166713245327e-06, + "loss": 3.4706, + "step": 292500 + }, + { + "epoch": 2.4, + "learning_rate": 9.069522760525408e-06, + "loss": 3.4753, + "step": 293000 + }, + { + "epoch": 2.4, + "learning_rate": 9.067878807805488e-06, + "loss": 3.4782, + "step": 293500 + }, + { + "epoch": 2.41, + "learning_rate": 9.066234855085569e-06, + "loss": 3.4705, + "step": 294000 + }, + { + "epoch": 2.41, + "learning_rate": 9.064590902365648e-06, + "loss": 3.4739, + "step": 294500 + }, + { + "epoch": 2.42, + "learning_rate": 9.062946949645729e-06, + "loss": 3.4786, + "step": 295000 + }, + { + "epoch": 2.42, + "learning_rate": 9.06130299692581e-06, + "loss": 3.4724, + "step": 295500 + }, + { + "epoch": 2.43, + "learning_rate": 9.059659044205889e-06, + "loss": 3.4684, + "step": 296000 + }, + { + "epoch": 2.43, + "learning_rate": 9.05801509148597e-06, + "loss": 3.4717, + "step": 296500 + }, + { + "epoch": 2.43, + "learning_rate": 9.056371138766051e-06, + "loss": 3.4704, + "step": 297000 + }, + { + "epoch": 2.44, + "learning_rate": 9.05472718604613e-06, + "loss": 3.4656, + "step": 297500 + }, + { + "epoch": 2.44, + "learning_rate": 9.05308323332621e-06, + "loss": 3.4666, + "step": 298000 + }, + { + "epoch": 2.45, + "learning_rate": 9.05143928060629e-06, + "loss": 3.4695, + "step": 298500 + }, + { + "epoch": 2.45, + "learning_rate": 9.04979532788637e-06, + "loss": 3.4686, + "step": 299000 + }, + { + "epoch": 2.45, + "learning_rate": 9.04815137516645e-06, + "loss": 3.4652, + "step": 299500 + }, + { + "epoch": 2.46, + "learning_rate": 9.046507422446532e-06, + "loss": 3.4597, + "step": 300000 + }, + { + "epoch": 2.46, + "eval_accuracy": 0.42611833634349283, + "eval_loss": 3.321903944015503, + "eval_runtime": 409.6321, + "eval_samples_per_second": 752.763, + "eval_steps_per_second": 15.685, + "step": 300000 + }, + { + "epoch": 2.46, + "learning_rate": 9.044863469726611e-06, + "loss": 3.4601, + "step": 300500 + }, + { + "epoch": 2.47, + "learning_rate": 9.043219517006692e-06, + "loss": 3.4717, + "step": 301000 + }, + { + "epoch": 2.47, + "learning_rate": 9.041575564286771e-06, + "loss": 3.463, + "step": 301500 + }, + { + "epoch": 2.47, + "learning_rate": 9.039931611566852e-06, + "loss": 3.463, + "step": 302000 + }, + { + "epoch": 2.48, + "learning_rate": 9.038287658846933e-06, + "loss": 3.4575, + "step": 302500 + }, + { + "epoch": 2.48, + "learning_rate": 9.036643706127013e-06, + "loss": 3.4648, + "step": 303000 + }, + { + "epoch": 2.49, + "learning_rate": 9.034999753407094e-06, + "loss": 3.4625, + "step": 303500 + }, + { + "epoch": 2.49, + "learning_rate": 9.033355800687173e-06, + "loss": 3.4562, + "step": 304000 + }, + { + "epoch": 2.49, + "learning_rate": 9.031711847967252e-06, + "loss": 3.4587, + "step": 304500 + }, + { + "epoch": 2.5, + "learning_rate": 9.030067895247333e-06, + "loss": 3.4486, + "step": 305000 + }, + { + "epoch": 2.5, + "learning_rate": 9.028423942527414e-06, + "loss": 3.4609, + "step": 305500 + }, + { + "epoch": 2.51, + "learning_rate": 9.026779989807493e-06, + "loss": 3.4502, + "step": 306000 + }, + { + "epoch": 2.51, + "learning_rate": 9.025136037087574e-06, + "loss": 3.4515, + "step": 306500 + }, + { + "epoch": 2.52, + "learning_rate": 9.023492084367654e-06, + "loss": 3.4536, + "step": 307000 + }, + { + "epoch": 2.52, + "learning_rate": 9.021848131647735e-06, + "loss": 3.4527, + "step": 307500 + }, + { + "epoch": 2.52, + "learning_rate": 9.020204178927815e-06, + "loss": 3.4506, + "step": 308000 + }, + { + "epoch": 2.53, + "learning_rate": 9.018560226207895e-06, + "loss": 3.4553, + "step": 308500 + }, + { + "epoch": 2.53, + "learning_rate": 9.016916273487976e-06, + "loss": 3.457, + "step": 309000 + }, + { + "epoch": 2.54, + "learning_rate": 9.015272320768057e-06, + "loss": 3.4517, + "step": 309500 + }, + { + "epoch": 2.54, + "learning_rate": 9.013628368048134e-06, + "loss": 3.4471, + "step": 310000 + }, + { + "epoch": 2.54, + "learning_rate": 9.011984415328215e-06, + "loss": 3.4469, + "step": 310500 + }, + { + "epoch": 2.55, + "learning_rate": 9.010340462608296e-06, + "loss": 3.4488, + "step": 311000 + }, + { + "epoch": 2.55, + "learning_rate": 9.008696509888376e-06, + "loss": 3.4416, + "step": 311500 + }, + { + "epoch": 2.56, + "learning_rate": 9.007052557168457e-06, + "loss": 3.4526, + "step": 312000 + }, + { + "epoch": 2.56, + "learning_rate": 9.005408604448537e-06, + "loss": 3.4508, + "step": 312500 + }, + { + "epoch": 2.56, + "learning_rate": 9.003764651728617e-06, + "loss": 3.4583, + "step": 313000 + }, + { + "epoch": 2.57, + "learning_rate": 9.002120699008698e-06, + "loss": 3.4571, + "step": 313500 + }, + { + "epoch": 2.57, + "learning_rate": 9.000476746288777e-06, + "loss": 3.454, + "step": 314000 + }, + { + "epoch": 2.58, + "learning_rate": 8.998832793568858e-06, + "loss": 3.4457, + "step": 314500 + }, + { + "epoch": 2.58, + "learning_rate": 8.997188840848939e-06, + "loss": 3.4385, + "step": 315000 + }, + { + "epoch": 2.58, + "learning_rate": 8.995544888129018e-06, + "loss": 3.4465, + "step": 315500 + }, + { + "epoch": 2.59, + "learning_rate": 8.9939009354091e-06, + "loss": 3.4435, + "step": 316000 + }, + { + "epoch": 2.59, + "learning_rate": 8.992256982689178e-06, + "loss": 3.4489, + "step": 316500 + }, + { + "epoch": 2.6, + "learning_rate": 8.990613029969258e-06, + "loss": 3.438, + "step": 317000 + }, + { + "epoch": 2.6, + "learning_rate": 8.988969077249339e-06, + "loss": 3.4412, + "step": 317500 + }, + { + "epoch": 2.61, + "learning_rate": 8.98732512452942e-06, + "loss": 3.4394, + "step": 318000 + }, + { + "epoch": 2.61, + "learning_rate": 8.985681171809499e-06, + "loss": 3.4455, + "step": 318500 + }, + { + "epoch": 2.61, + "learning_rate": 8.98403721908958e-06, + "loss": 3.4398, + "step": 319000 + }, + { + "epoch": 2.62, + "learning_rate": 8.98239326636966e-06, + "loss": 3.4448, + "step": 319500 + }, + { + "epoch": 2.62, + "learning_rate": 8.98074931364974e-06, + "loss": 3.4345, + "step": 320000 + }, + { + "epoch": 2.63, + "learning_rate": 8.979105360929821e-06, + "loss": 3.4359, + "step": 320500 + }, + { + "epoch": 2.63, + "learning_rate": 8.9774614082099e-06, + "loss": 3.4498, + "step": 321000 + }, + { + "epoch": 2.63, + "learning_rate": 8.975817455489981e-06, + "loss": 3.4363, + "step": 321500 + }, + { + "epoch": 2.64, + "learning_rate": 8.974173502770062e-06, + "loss": 3.4381, + "step": 322000 + }, + { + "epoch": 2.64, + "learning_rate": 8.97252955005014e-06, + "loss": 3.4334, + "step": 322500 + }, + { + "epoch": 2.65, + "learning_rate": 8.970885597330221e-06, + "loss": 3.4354, + "step": 323000 + }, + { + "epoch": 2.65, + "learning_rate": 8.969241644610302e-06, + "loss": 3.4338, + "step": 323500 + }, + { + "epoch": 2.65, + "learning_rate": 8.967597691890381e-06, + "loss": 3.4305, + "step": 324000 + }, + { + "epoch": 2.66, + "learning_rate": 8.965953739170462e-06, + "loss": 3.4322, + "step": 324500 + }, + { + "epoch": 2.66, + "learning_rate": 8.964309786450543e-06, + "loss": 3.4363, + "step": 325000 + }, + { + "epoch": 2.67, + "learning_rate": 8.962665833730622e-06, + "loss": 3.4398, + "step": 325500 + }, + { + "epoch": 2.67, + "learning_rate": 8.961021881010703e-06, + "loss": 3.4376, + "step": 326000 + }, + { + "epoch": 2.67, + "learning_rate": 8.959377928290783e-06, + "loss": 3.4268, + "step": 326500 + }, + { + "epoch": 2.68, + "learning_rate": 8.957733975570864e-06, + "loss": 3.4415, + "step": 327000 + }, + { + "epoch": 2.68, + "learning_rate": 8.956090022850945e-06, + "loss": 3.4253, + "step": 327500 + }, + { + "epoch": 2.69, + "learning_rate": 8.954446070131024e-06, + "loss": 3.4335, + "step": 328000 + }, + { + "epoch": 2.69, + "learning_rate": 8.952802117411103e-06, + "loss": 3.4273, + "step": 328500 + }, + { + "epoch": 2.7, + "learning_rate": 8.951158164691184e-06, + "loss": 3.4334, + "step": 329000 + }, + { + "epoch": 2.7, + "learning_rate": 8.949514211971263e-06, + "loss": 3.4288, + "step": 329500 + }, + { + "epoch": 2.7, + "learning_rate": 8.947870259251344e-06, + "loss": 3.4263, + "step": 330000 + }, + { + "epoch": 2.7, + "eval_accuracy": 0.43119620464233216, + "eval_loss": 3.284120559692383, + "eval_runtime": 409.6426, + "eval_samples_per_second": 752.744, + "eval_steps_per_second": 15.684, + "step": 330000 + }, + { + "epoch": 2.71, + "learning_rate": 8.946226306531425e-06, + "loss": 3.4205, + "step": 330500 + }, + { + "epoch": 2.71, + "learning_rate": 8.944582353811505e-06, + "loss": 3.4233, + "step": 331000 + }, + { + "epoch": 2.72, + "learning_rate": 8.942938401091586e-06, + "loss": 3.4275, + "step": 331500 + }, + { + "epoch": 2.72, + "learning_rate": 8.941294448371665e-06, + "loss": 3.4317, + "step": 332000 + }, + { + "epoch": 2.72, + "learning_rate": 8.939650495651746e-06, + "loss": 3.4239, + "step": 332500 + }, + { + "epoch": 2.73, + "learning_rate": 8.938006542931827e-06, + "loss": 3.4237, + "step": 333000 + }, + { + "epoch": 2.73, + "learning_rate": 8.936362590211906e-06, + "loss": 3.4192, + "step": 333500 + }, + { + "epoch": 2.74, + "learning_rate": 8.934718637491987e-06, + "loss": 3.4193, + "step": 334000 + }, + { + "epoch": 2.74, + "learning_rate": 8.933074684772066e-06, + "loss": 3.4232, + "step": 334500 + }, + { + "epoch": 2.74, + "learning_rate": 8.931430732052146e-06, + "loss": 3.422, + "step": 335000 + }, + { + "epoch": 2.75, + "learning_rate": 8.929786779332227e-06, + "loss": 3.4168, + "step": 335500 + }, + { + "epoch": 2.75, + "learning_rate": 8.928142826612308e-06, + "loss": 3.4203, + "step": 336000 + }, + { + "epoch": 2.76, + "learning_rate": 8.926498873892387e-06, + "loss": 3.4188, + "step": 336500 + }, + { + "epoch": 2.76, + "learning_rate": 8.924854921172468e-06, + "loss": 3.4202, + "step": 337000 + }, + { + "epoch": 2.77, + "learning_rate": 8.923210968452549e-06, + "loss": 3.4217, + "step": 337500 + }, + { + "epoch": 2.77, + "learning_rate": 8.921567015732628e-06, + "loss": 3.4185, + "step": 338000 + }, + { + "epoch": 2.77, + "learning_rate": 8.919923063012709e-06, + "loss": 3.4202, + "step": 338500 + }, + { + "epoch": 2.78, + "learning_rate": 8.918279110292788e-06, + "loss": 3.4172, + "step": 339000 + }, + { + "epoch": 2.78, + "learning_rate": 8.91663515757287e-06, + "loss": 3.4221, + "step": 339500 + }, + { + "epoch": 2.79, + "learning_rate": 8.91499120485295e-06, + "loss": 3.4136, + "step": 340000 + }, + { + "epoch": 2.79, + "learning_rate": 8.91334725213303e-06, + "loss": 3.4209, + "step": 340500 + }, + { + "epoch": 2.79, + "learning_rate": 8.911703299413109e-06, + "loss": 3.4182, + "step": 341000 + }, + { + "epoch": 2.8, + "learning_rate": 8.91005934669319e-06, + "loss": 3.4152, + "step": 341500 + }, + { + "epoch": 2.8, + "learning_rate": 8.90841539397327e-06, + "loss": 3.4124, + "step": 342000 + }, + { + "epoch": 2.81, + "learning_rate": 8.90677144125335e-06, + "loss": 3.408, + "step": 342500 + }, + { + "epoch": 2.81, + "learning_rate": 8.905127488533431e-06, + "loss": 3.4112, + "step": 343000 + }, + { + "epoch": 2.81, + "learning_rate": 8.90348353581351e-06, + "loss": 3.4144, + "step": 343500 + }, + { + "epoch": 2.82, + "learning_rate": 8.901839583093591e-06, + "loss": 3.4173, + "step": 344000 + }, + { + "epoch": 2.82, + "learning_rate": 8.90019563037367e-06, + "loss": 3.4107, + "step": 344500 + }, + { + "epoch": 2.83, + "learning_rate": 8.898551677653752e-06, + "loss": 3.4094, + "step": 345000 + }, + { + "epoch": 2.83, + "learning_rate": 8.896907724933833e-06, + "loss": 3.4165, + "step": 345500 + }, + { + "epoch": 2.83, + "learning_rate": 8.895263772213912e-06, + "loss": 3.4163, + "step": 346000 + }, + { + "epoch": 2.84, + "learning_rate": 8.893619819493991e-06, + "loss": 3.4158, + "step": 346500 + }, + { + "epoch": 2.84, + "learning_rate": 8.891975866774072e-06, + "loss": 3.4166, + "step": 347000 + }, + { + "epoch": 2.85, + "learning_rate": 8.890331914054151e-06, + "loss": 3.4059, + "step": 347500 + }, + { + "epoch": 2.85, + "learning_rate": 8.888687961334232e-06, + "loss": 3.4079, + "step": 348000 + }, + { + "epoch": 2.86, + "learning_rate": 8.887044008614313e-06, + "loss": 3.4116, + "step": 348500 + }, + { + "epoch": 2.86, + "learning_rate": 8.885400055894393e-06, + "loss": 3.4078, + "step": 349000 + }, + { + "epoch": 2.86, + "learning_rate": 8.883756103174474e-06, + "loss": 3.4109, + "step": 349500 + }, + { + "epoch": 2.87, + "learning_rate": 8.882112150454555e-06, + "loss": 3.4022, + "step": 350000 + }, + { + "epoch": 2.87, + "learning_rate": 8.880468197734634e-06, + "loss": 3.4004, + "step": 350500 + }, + { + "epoch": 2.88, + "learning_rate": 8.878824245014715e-06, + "loss": 3.4024, + "step": 351000 + }, + { + "epoch": 2.88, + "learning_rate": 8.877180292294794e-06, + "loss": 3.4056, + "step": 351500 + }, + { + "epoch": 2.88, + "learning_rate": 8.875536339574875e-06, + "loss": 3.3985, + "step": 352000 + }, + { + "epoch": 2.89, + "learning_rate": 8.873892386854954e-06, + "loss": 3.3945, + "step": 352500 + }, + { + "epoch": 2.89, + "learning_rate": 8.872248434135035e-06, + "loss": 3.3978, + "step": 353000 + }, + { + "epoch": 2.9, + "learning_rate": 8.870604481415115e-06, + "loss": 3.4052, + "step": 353500 + }, + { + "epoch": 2.9, + "learning_rate": 8.868960528695196e-06, + "loss": 3.4048, + "step": 354000 + }, + { + "epoch": 2.9, + "learning_rate": 8.867316575975275e-06, + "loss": 3.4055, + "step": 354500 + }, + { + "epoch": 2.91, + "learning_rate": 8.865672623255356e-06, + "loss": 3.4025, + "step": 355000 + }, + { + "epoch": 2.91, + "learning_rate": 8.864028670535437e-06, + "loss": 3.3976, + "step": 355500 + }, + { + "epoch": 2.92, + "learning_rate": 8.862384717815516e-06, + "loss": 3.3962, + "step": 356000 + }, + { + "epoch": 2.92, + "learning_rate": 8.860740765095597e-06, + "loss": 3.3927, + "step": 356500 + }, + { + "epoch": 2.92, + "learning_rate": 8.859096812375676e-06, + "loss": 3.4024, + "step": 357000 + }, + { + "epoch": 2.93, + "learning_rate": 8.857452859655757e-06, + "loss": 3.3929, + "step": 357500 + }, + { + "epoch": 2.93, + "learning_rate": 8.855808906935838e-06, + "loss": 3.3911, + "step": 358000 + }, + { + "epoch": 2.94, + "learning_rate": 8.854164954215918e-06, + "loss": 3.3976, + "step": 358500 + }, + { + "epoch": 2.94, + "learning_rate": 8.852521001495997e-06, + "loss": 3.3944, + "step": 359000 + }, + { + "epoch": 2.95, + "learning_rate": 8.850877048776078e-06, + "loss": 3.3953, + "step": 359500 + }, + { + "epoch": 2.95, + "learning_rate": 8.849233096056157e-06, + "loss": 3.3909, + "step": 360000 + }, + { + "epoch": 2.95, + "eval_accuracy": 0.43477661532857337, + "eval_loss": 3.254718542098999, + "eval_runtime": 406.1401, + "eval_samples_per_second": 759.236, + "eval_steps_per_second": 15.82, + "step": 360000 + }, + { + "epoch": 2.95, + "learning_rate": 8.847589143336238e-06, + "loss": 3.403, + "step": 360500 + }, + { + "epoch": 2.96, + "learning_rate": 8.845945190616319e-06, + "loss": 3.3966, + "step": 361000 + }, + { + "epoch": 2.96, + "learning_rate": 8.844301237896398e-06, + "loss": 3.3835, + "step": 361500 + }, + { + "epoch": 2.97, + "learning_rate": 8.84265728517648e-06, + "loss": 3.3896, + "step": 362000 + }, + { + "epoch": 2.97, + "learning_rate": 8.84101333245656e-06, + "loss": 3.3855, + "step": 362500 + }, + { + "epoch": 2.97, + "learning_rate": 8.83936937973664e-06, + "loss": 3.3851, + "step": 363000 + }, + { + "epoch": 2.98, + "learning_rate": 8.83772542701672e-06, + "loss": 3.3927, + "step": 363500 + }, + { + "epoch": 2.98, + "learning_rate": 8.8360814742968e-06, + "loss": 3.389, + "step": 364000 + }, + { + "epoch": 2.99, + "learning_rate": 8.83443752157688e-06, + "loss": 3.3861, + "step": 364500 + }, + { + "epoch": 2.99, + "learning_rate": 8.83279356885696e-06, + "loss": 3.3955, + "step": 365000 + }, + { + "epoch": 2.99, + "learning_rate": 8.831149616137041e-06, + "loss": 3.395, + "step": 365500 + }, + { + "epoch": 3.0, + "learning_rate": 8.82950566341712e-06, + "loss": 3.3881, + "step": 366000 + }, + { + "epoch": 3.0, + "learning_rate": 8.827861710697201e-06, + "loss": 3.3844, + "step": 366500 + }, + { + "epoch": 3.01, + "learning_rate": 8.82621775797728e-06, + "loss": 3.3886, + "step": 367000 + }, + { + "epoch": 3.01, + "learning_rate": 8.824573805257362e-06, + "loss": 3.3897, + "step": 367500 + }, + { + "epoch": 3.01, + "learning_rate": 8.822929852537443e-06, + "loss": 3.3882, + "step": 368000 + }, + { + "epoch": 3.02, + "learning_rate": 8.821285899817522e-06, + "loss": 3.3919, + "step": 368500 + }, + { + "epoch": 3.02, + "learning_rate": 8.819641947097603e-06, + "loss": 3.3815, + "step": 369000 + }, + { + "epoch": 3.03, + "learning_rate": 8.817997994377682e-06, + "loss": 3.3802, + "step": 369500 + }, + { + "epoch": 3.03, + "learning_rate": 8.816354041657763e-06, + "loss": 3.3817, + "step": 370000 + }, + { + "epoch": 3.04, + "learning_rate": 8.814710088937844e-06, + "loss": 3.3874, + "step": 370500 + }, + { + "epoch": 3.04, + "learning_rate": 8.813066136217923e-06, + "loss": 3.3875, + "step": 371000 + }, + { + "epoch": 3.04, + "learning_rate": 8.811422183498003e-06, + "loss": 3.3885, + "step": 371500 + }, + { + "epoch": 3.05, + "learning_rate": 8.809778230778084e-06, + "loss": 3.3884, + "step": 372000 + }, + { + "epoch": 3.05, + "learning_rate": 8.808134278058163e-06, + "loss": 3.3905, + "step": 372500 + }, + { + "epoch": 3.06, + "learning_rate": 8.806490325338244e-06, + "loss": 3.3829, + "step": 373000 + }, + { + "epoch": 3.06, + "learning_rate": 8.804846372618325e-06, + "loss": 3.3858, + "step": 373500 + }, + { + "epoch": 3.06, + "learning_rate": 8.803202419898404e-06, + "loss": 3.3852, + "step": 374000 + }, + { + "epoch": 3.07, + "learning_rate": 8.801558467178485e-06, + "loss": 3.3814, + "step": 374500 + }, + { + "epoch": 3.07, + "learning_rate": 8.799914514458564e-06, + "loss": 3.3849, + "step": 375000 + }, + { + "epoch": 3.08, + "learning_rate": 8.798270561738645e-06, + "loss": 3.3812, + "step": 375500 + }, + { + "epoch": 3.08, + "learning_rate": 8.796626609018726e-06, + "loss": 3.3815, + "step": 376000 + }, + { + "epoch": 3.08, + "learning_rate": 8.794982656298806e-06, + "loss": 3.3738, + "step": 376500 + }, + { + "epoch": 3.09, + "learning_rate": 8.793338703578885e-06, + "loss": 3.381, + "step": 377000 + }, + { + "epoch": 3.09, + "learning_rate": 8.791694750858966e-06, + "loss": 3.3762, + "step": 377500 + }, + { + "epoch": 3.1, + "learning_rate": 8.790050798139047e-06, + "loss": 3.3709, + "step": 378000 + }, + { + "epoch": 3.1, + "learning_rate": 8.788406845419126e-06, + "loss": 3.3779, + "step": 378500 + }, + { + "epoch": 3.11, + "learning_rate": 8.786762892699207e-06, + "loss": 3.3774, + "step": 379000 + }, + { + "epoch": 3.11, + "learning_rate": 8.785118939979286e-06, + "loss": 3.3692, + "step": 379500 + }, + { + "epoch": 3.11, + "learning_rate": 8.783474987259367e-06, + "loss": 3.3737, + "step": 380000 + }, + { + "epoch": 3.12, + "learning_rate": 8.781831034539448e-06, + "loss": 3.3717, + "step": 380500 + }, + { + "epoch": 3.12, + "learning_rate": 8.780187081819528e-06, + "loss": 3.3763, + "step": 381000 + }, + { + "epoch": 3.13, + "learning_rate": 8.778543129099608e-06, + "loss": 3.3748, + "step": 381500 + }, + { + "epoch": 3.13, + "learning_rate": 8.776899176379688e-06, + "loss": 3.3824, + "step": 382000 + }, + { + "epoch": 3.13, + "learning_rate": 8.775255223659769e-06, + "loss": 3.3739, + "step": 382500 + }, + { + "epoch": 3.14, + "learning_rate": 8.773611270939848e-06, + "loss": 3.3745, + "step": 383000 + }, + { + "epoch": 3.14, + "learning_rate": 8.771967318219929e-06, + "loss": 3.3775, + "step": 383500 + }, + { + "epoch": 3.15, + "learning_rate": 8.770323365500008e-06, + "loss": 3.3675, + "step": 384000 + }, + { + "epoch": 3.15, + "learning_rate": 8.76867941278009e-06, + "loss": 3.3676, + "step": 384500 + }, + { + "epoch": 3.15, + "learning_rate": 8.767035460060169e-06, + "loss": 3.3667, + "step": 385000 + }, + { + "epoch": 3.16, + "learning_rate": 8.76539150734025e-06, + "loss": 3.3638, + "step": 385500 + }, + { + "epoch": 3.16, + "learning_rate": 8.76374755462033e-06, + "loss": 3.3682, + "step": 386000 + }, + { + "epoch": 3.17, + "learning_rate": 8.76210360190041e-06, + "loss": 3.3698, + "step": 386500 + }, + { + "epoch": 3.17, + "learning_rate": 8.76045964918049e-06, + "loss": 3.371, + "step": 387000 + }, + { + "epoch": 3.17, + "learning_rate": 8.75881569646057e-06, + "loss": 3.3797, + "step": 387500 + }, + { + "epoch": 3.18, + "learning_rate": 8.757171743740651e-06, + "loss": 3.3701, + "step": 388000 + }, + { + "epoch": 3.18, + "learning_rate": 8.755527791020732e-06, + "loss": 3.3725, + "step": 388500 + }, + { + "epoch": 3.19, + "learning_rate": 8.753883838300811e-06, + "loss": 3.3784, + "step": 389000 + }, + { + "epoch": 3.19, + "learning_rate": 8.75223988558089e-06, + "loss": 3.3677, + "step": 389500 + }, + { + "epoch": 3.2, + "learning_rate": 8.750595932860971e-06, + "loss": 3.3635, + "step": 390000 + }, + { + "epoch": 3.2, + "eval_accuracy": 0.43791422082834985, + "eval_loss": 3.228388547897339, + "eval_runtime": 413.1728, + "eval_samples_per_second": 746.312, + "eval_steps_per_second": 15.55, + "step": 390000 + }, + { + "epoch": 3.2, + "learning_rate": 8.748951980141052e-06, + "loss": 3.364, + "step": 390500 + }, + { + "epoch": 3.2, + "learning_rate": 8.747308027421132e-06, + "loss": 3.3708, + "step": 391000 + }, + { + "epoch": 3.21, + "learning_rate": 8.745664074701213e-06, + "loss": 3.3714, + "step": 391500 + }, + { + "epoch": 3.21, + "learning_rate": 8.744020121981292e-06, + "loss": 3.3658, + "step": 392000 + }, + { + "epoch": 3.22, + "learning_rate": 8.742376169261373e-06, + "loss": 3.3653, + "step": 392500 + }, + { + "epoch": 3.22, + "learning_rate": 8.740732216541454e-06, + "loss": 3.3617, + "step": 393000 + }, + { + "epoch": 3.22, + "learning_rate": 8.739088263821533e-06, + "loss": 3.367, + "step": 393500 + }, + { + "epoch": 3.23, + "learning_rate": 8.737444311101614e-06, + "loss": 3.3608, + "step": 394000 + }, + { + "epoch": 3.23, + "learning_rate": 8.735800358381693e-06, + "loss": 3.3643, + "step": 394500 + }, + { + "epoch": 3.24, + "learning_rate": 8.734156405661773e-06, + "loss": 3.3607, + "step": 395000 + }, + { + "epoch": 3.24, + "learning_rate": 8.732512452941854e-06, + "loss": 3.3638, + "step": 395500 + }, + { + "epoch": 3.24, + "learning_rate": 8.730868500221935e-06, + "loss": 3.3687, + "step": 396000 + }, + { + "epoch": 3.25, + "learning_rate": 8.729224547502014e-06, + "loss": 3.3616, + "step": 396500 + }, + { + "epoch": 3.25, + "learning_rate": 8.727580594782095e-06, + "loss": 3.3678, + "step": 397000 + }, + { + "epoch": 3.26, + "learning_rate": 8.725936642062174e-06, + "loss": 3.3616, + "step": 397500 + }, + { + "epoch": 3.26, + "learning_rate": 8.724292689342255e-06, + "loss": 3.3573, + "step": 398000 + }, + { + "epoch": 3.26, + "learning_rate": 8.722648736622336e-06, + "loss": 3.3622, + "step": 398500 + }, + { + "epoch": 3.27, + "learning_rate": 8.721004783902415e-06, + "loss": 3.3588, + "step": 399000 + }, + { + "epoch": 3.27, + "learning_rate": 8.719360831182496e-06, + "loss": 3.3604, + "step": 399500 + }, + { + "epoch": 3.28, + "learning_rate": 8.717716878462576e-06, + "loss": 3.361, + "step": 400000 + }, + { + "epoch": 3.28, + "learning_rate": 8.716072925742657e-06, + "loss": 3.3546, + "step": 400500 + }, + { + "epoch": 3.29, + "learning_rate": 8.714428973022736e-06, + "loss": 3.3613, + "step": 401000 + }, + { + "epoch": 3.29, + "learning_rate": 8.712785020302817e-06, + "loss": 3.3619, + "step": 401500 + }, + { + "epoch": 3.29, + "learning_rate": 8.711141067582896e-06, + "loss": 3.3623, + "step": 402000 + }, + { + "epoch": 3.3, + "learning_rate": 8.709497114862977e-06, + "loss": 3.3552, + "step": 402500 + }, + { + "epoch": 3.3, + "learning_rate": 8.707853162143056e-06, + "loss": 3.3554, + "step": 403000 + }, + { + "epoch": 3.31, + "learning_rate": 8.706209209423137e-06, + "loss": 3.3587, + "step": 403500 + }, + { + "epoch": 3.31, + "learning_rate": 8.704565256703218e-06, + "loss": 3.3558, + "step": 404000 + }, + { + "epoch": 3.31, + "learning_rate": 8.702921303983298e-06, + "loss": 3.3582, + "step": 404500 + }, + { + "epoch": 3.32, + "learning_rate": 8.701277351263379e-06, + "loss": 3.3627, + "step": 405000 + }, + { + "epoch": 3.32, + "learning_rate": 8.69963339854346e-06, + "loss": 3.3572, + "step": 405500 + }, + { + "epoch": 3.33, + "learning_rate": 8.697989445823539e-06, + "loss": 3.3658, + "step": 406000 + }, + { + "epoch": 3.33, + "learning_rate": 8.69634549310362e-06, + "loss": 3.358, + "step": 406500 + }, + { + "epoch": 3.33, + "learning_rate": 8.6947015403837e-06, + "loss": 3.349, + "step": 407000 + }, + { + "epoch": 3.34, + "learning_rate": 8.693057587663778e-06, + "loss": 3.3534, + "step": 407500 + }, + { + "epoch": 3.34, + "learning_rate": 8.69141363494386e-06, + "loss": 3.3595, + "step": 408000 + }, + { + "epoch": 3.35, + "learning_rate": 8.68976968222394e-06, + "loss": 3.3551, + "step": 408500 + }, + { + "epoch": 3.35, + "learning_rate": 8.68812572950402e-06, + "loss": 3.3574, + "step": 409000 + }, + { + "epoch": 3.35, + "learning_rate": 8.6864817767841e-06, + "loss": 3.3534, + "step": 409500 + }, + { + "epoch": 3.36, + "learning_rate": 8.68483782406418e-06, + "loss": 3.3541, + "step": 410000 + }, + { + "epoch": 3.36, + "learning_rate": 8.683193871344261e-06, + "loss": 3.3545, + "step": 410500 + }, + { + "epoch": 3.37, + "learning_rate": 8.681549918624342e-06, + "loss": 3.3506, + "step": 411000 + }, + { + "epoch": 3.37, + "learning_rate": 8.679905965904421e-06, + "loss": 3.3454, + "step": 411500 + }, + { + "epoch": 3.38, + "learning_rate": 8.678262013184502e-06, + "loss": 3.3557, + "step": 412000 + }, + { + "epoch": 3.38, + "learning_rate": 8.676618060464581e-06, + "loss": 3.3449, + "step": 412500 + }, + { + "epoch": 3.38, + "learning_rate": 8.67497410774466e-06, + "loss": 3.3565, + "step": 413000 + }, + { + "epoch": 3.39, + "learning_rate": 8.673330155024742e-06, + "loss": 3.3423, + "step": 413500 + }, + { + "epoch": 3.39, + "learning_rate": 8.671686202304823e-06, + "loss": 3.3515, + "step": 414000 + }, + { + "epoch": 3.4, + "learning_rate": 8.670042249584902e-06, + "loss": 3.3462, + "step": 414500 + }, + { + "epoch": 3.4, + "learning_rate": 8.668398296864983e-06, + "loss": 3.3515, + "step": 415000 + }, + { + "epoch": 3.4, + "learning_rate": 8.666754344145062e-06, + "loss": 3.3514, + "step": 415500 + }, + { + "epoch": 3.41, + "learning_rate": 8.665110391425143e-06, + "loss": 3.3498, + "step": 416000 + }, + { + "epoch": 3.41, + "learning_rate": 8.663466438705224e-06, + "loss": 3.3514, + "step": 416500 + }, + { + "epoch": 3.42, + "learning_rate": 8.661822485985303e-06, + "loss": 3.3517, + "step": 417000 + }, + { + "epoch": 3.42, + "learning_rate": 8.660178533265384e-06, + "loss": 3.3476, + "step": 417500 + }, + { + "epoch": 3.42, + "learning_rate": 8.658534580545465e-06, + "loss": 3.3381, + "step": 418000 + }, + { + "epoch": 3.43, + "learning_rate": 8.656890627825545e-06, + "loss": 3.3461, + "step": 418500 + }, + { + "epoch": 3.43, + "learning_rate": 8.655246675105626e-06, + "loss": 3.3492, + "step": 419000 + }, + { + "epoch": 3.44, + "learning_rate": 8.653602722385705e-06, + "loss": 3.3478, + "step": 419500 + }, + { + "epoch": 3.44, + "learning_rate": 8.651958769665784e-06, + "loss": 3.3488, + "step": 420000 + }, + { + "epoch": 3.44, + "eval_accuracy": 0.4409229499038142, + "eval_loss": 3.20596981048584, + "eval_runtime": 409.4361, + "eval_samples_per_second": 753.124, + "eval_steps_per_second": 15.692, + "step": 420000 + }, + { + "epoch": 3.45, + "learning_rate": 8.650314816945865e-06, + "loss": 3.3408, + "step": 420500 + }, + { + "epoch": 3.45, + "learning_rate": 8.648670864225946e-06, + "loss": 3.3418, + "step": 421000 + }, + { + "epoch": 3.45, + "learning_rate": 8.647026911506025e-06, + "loss": 3.3481, + "step": 421500 + }, + { + "epoch": 3.46, + "learning_rate": 8.645382958786106e-06, + "loss": 3.3451, + "step": 422000 + }, + { + "epoch": 3.46, + "learning_rate": 8.643739006066186e-06, + "loss": 3.3391, + "step": 422500 + }, + { + "epoch": 3.47, + "learning_rate": 8.642095053346267e-06, + "loss": 3.3423, + "step": 423000 + }, + { + "epoch": 3.47, + "learning_rate": 8.640451100626348e-06, + "loss": 3.3375, + "step": 423500 + }, + { + "epoch": 3.47, + "learning_rate": 8.638807147906427e-06, + "loss": 3.3379, + "step": 424000 + }, + { + "epoch": 3.48, + "learning_rate": 8.637163195186508e-06, + "loss": 3.3446, + "step": 424500 + }, + { + "epoch": 3.48, + "learning_rate": 8.635519242466587e-06, + "loss": 3.3365, + "step": 425000 + }, + { + "epoch": 3.49, + "learning_rate": 8.633875289746666e-06, + "loss": 3.3405, + "step": 425500 + }, + { + "epoch": 3.49, + "learning_rate": 8.632231337026747e-06, + "loss": 3.3409, + "step": 426000 + }, + { + "epoch": 3.49, + "learning_rate": 8.630587384306828e-06, + "loss": 3.3369, + "step": 426500 + }, + { + "epoch": 3.5, + "learning_rate": 8.628943431586908e-06, + "loss": 3.3391, + "step": 427000 + }, + { + "epoch": 3.5, + "learning_rate": 8.627299478866989e-06, + "loss": 3.3426, + "step": 427500 + }, + { + "epoch": 3.51, + "learning_rate": 8.625655526147068e-06, + "loss": 3.3418, + "step": 428000 + }, + { + "epoch": 3.51, + "learning_rate": 8.624011573427149e-06, + "loss": 3.3402, + "step": 428500 + }, + { + "epoch": 3.51, + "learning_rate": 8.62236762070723e-06, + "loss": 3.3424, + "step": 429000 + }, + { + "epoch": 3.52, + "learning_rate": 8.620723667987309e-06, + "loss": 3.338, + "step": 429500 + }, + { + "epoch": 3.52, + "learning_rate": 8.61907971526739e-06, + "loss": 3.3335, + "step": 430000 + }, + { + "epoch": 3.53, + "learning_rate": 8.617435762547471e-06, + "loss": 3.3276, + "step": 430500 + }, + { + "epoch": 3.53, + "learning_rate": 8.61579180982755e-06, + "loss": 3.3285, + "step": 431000 + }, + { + "epoch": 3.54, + "learning_rate": 8.61414785710763e-06, + "loss": 3.3357, + "step": 431500 + }, + { + "epoch": 3.54, + "learning_rate": 8.61250390438771e-06, + "loss": 3.337, + "step": 432000 + }, + { + "epoch": 3.54, + "learning_rate": 8.61085995166779e-06, + "loss": 3.3378, + "step": 432500 + }, + { + "epoch": 3.55, + "learning_rate": 8.60921599894787e-06, + "loss": 3.3323, + "step": 433000 + }, + { + "epoch": 3.55, + "learning_rate": 8.607572046227952e-06, + "loss": 3.3337, + "step": 433500 + }, + { + "epoch": 3.56, + "learning_rate": 8.605928093508031e-06, + "loss": 3.3325, + "step": 434000 + }, + { + "epoch": 3.56, + "learning_rate": 8.604284140788112e-06, + "loss": 3.3287, + "step": 434500 + }, + { + "epoch": 3.56, + "learning_rate": 8.602640188068191e-06, + "loss": 3.3334, + "step": 435000 + }, + { + "epoch": 3.57, + "learning_rate": 8.600996235348272e-06, + "loss": 3.3308, + "step": 435500 + }, + { + "epoch": 3.57, + "learning_rate": 8.599352282628353e-06, + "loss": 3.3421, + "step": 436000 + }, + { + "epoch": 3.58, + "learning_rate": 8.597708329908433e-06, + "loss": 3.3361, + "step": 436500 + }, + { + "epoch": 3.58, + "learning_rate": 8.596064377188514e-06, + "loss": 3.3349, + "step": 437000 + }, + { + "epoch": 3.58, + "learning_rate": 8.594420424468593e-06, + "loss": 3.3278, + "step": 437500 + }, + { + "epoch": 3.59, + "learning_rate": 8.592776471748672e-06, + "loss": 3.3309, + "step": 438000 + }, + { + "epoch": 3.59, + "learning_rate": 8.591132519028753e-06, + "loss": 3.3285, + "step": 438500 + }, + { + "epoch": 3.6, + "learning_rate": 8.589488566308834e-06, + "loss": 3.322, + "step": 439000 + }, + { + "epoch": 3.6, + "learning_rate": 8.587844613588913e-06, + "loss": 3.3353, + "step": 439500 + }, + { + "epoch": 3.6, + "learning_rate": 8.586200660868994e-06, + "loss": 3.3251, + "step": 440000 + }, + { + "epoch": 3.61, + "learning_rate": 8.584556708149074e-06, + "loss": 3.3303, + "step": 440500 + }, + { + "epoch": 3.61, + "learning_rate": 8.582912755429155e-06, + "loss": 3.3266, + "step": 441000 + }, + { + "epoch": 3.62, + "learning_rate": 8.581268802709236e-06, + "loss": 3.3359, + "step": 441500 + }, + { + "epoch": 3.62, + "learning_rate": 8.579624849989315e-06, + "loss": 3.3264, + "step": 442000 + }, + { + "epoch": 3.63, + "learning_rate": 8.577980897269396e-06, + "loss": 3.3244, + "step": 442500 + }, + { + "epoch": 3.63, + "learning_rate": 8.576336944549477e-06, + "loss": 3.3266, + "step": 443000 + }, + { + "epoch": 3.63, + "learning_rate": 8.574692991829554e-06, + "loss": 3.3246, + "step": 443500 + }, + { + "epoch": 3.64, + "learning_rate": 8.573049039109635e-06, + "loss": 3.3264, + "step": 444000 + }, + { + "epoch": 3.64, + "learning_rate": 8.571405086389716e-06, + "loss": 3.3309, + "step": 444500 + }, + { + "epoch": 3.65, + "learning_rate": 8.569761133669796e-06, + "loss": 3.329, + "step": 445000 + }, + { + "epoch": 3.65, + "learning_rate": 8.568117180949877e-06, + "loss": 3.3208, + "step": 445500 + }, + { + "epoch": 3.65, + "learning_rate": 8.566473228229958e-06, + "loss": 3.3259, + "step": 446000 + }, + { + "epoch": 3.66, + "learning_rate": 8.564829275510037e-06, + "loss": 3.3272, + "step": 446500 + }, + { + "epoch": 3.66, + "learning_rate": 8.563185322790118e-06, + "loss": 3.3228, + "step": 447000 + }, + { + "epoch": 3.67, + "learning_rate": 8.561541370070197e-06, + "loss": 3.3259, + "step": 447500 + }, + { + "epoch": 3.67, + "learning_rate": 8.559897417350278e-06, + "loss": 3.3244, + "step": 448000 + }, + { + "epoch": 3.67, + "learning_rate": 8.558253464630359e-06, + "loss": 3.3245, + "step": 448500 + }, + { + "epoch": 3.68, + "learning_rate": 8.556609511910438e-06, + "loss": 3.3324, + "step": 449000 + }, + { + "epoch": 3.68, + "learning_rate": 8.554965559190518e-06, + "loss": 3.3236, + "step": 449500 + }, + { + "epoch": 3.69, + "learning_rate": 8.553321606470599e-06, + "loss": 3.3239, + "step": 450000 + }, + { + "epoch": 3.69, + "eval_accuracy": 0.443556698280814, + "eval_loss": 3.187194585800171, + "eval_runtime": 414.6673, + "eval_samples_per_second": 743.623, + "eval_steps_per_second": 15.494, + "step": 450000 + }, + { + "epoch": 3.69, + "learning_rate": 8.551677653750678e-06, + "loss": 3.3201, + "step": 450500 + }, + { + "epoch": 3.69, + "learning_rate": 8.550033701030759e-06, + "loss": 3.3214, + "step": 451000 + }, + { + "epoch": 3.7, + "learning_rate": 8.54838974831084e-06, + "loss": 3.3252, + "step": 451500 + }, + { + "epoch": 3.7, + "learning_rate": 8.546745795590919e-06, + "loss": 3.3185, + "step": 452000 + }, + { + "epoch": 3.71, + "learning_rate": 8.545101842871e-06, + "loss": 3.324, + "step": 452500 + }, + { + "epoch": 3.71, + "learning_rate": 8.54345789015108e-06, + "loss": 3.3222, + "step": 453000 + }, + { + "epoch": 3.72, + "learning_rate": 8.54181393743116e-06, + "loss": 3.3129, + "step": 453500 + }, + { + "epoch": 3.72, + "learning_rate": 8.540169984711241e-06, + "loss": 3.3245, + "step": 454000 + }, + { + "epoch": 3.72, + "learning_rate": 8.53852603199132e-06, + "loss": 3.3226, + "step": 454500 + }, + { + "epoch": 3.73, + "learning_rate": 8.536882079271401e-06, + "loss": 3.3162, + "step": 455000 + }, + { + "epoch": 3.73, + "learning_rate": 8.53523812655148e-06, + "loss": 3.3124, + "step": 455500 + }, + { + "epoch": 3.74, + "learning_rate": 8.53359417383156e-06, + "loss": 3.3155, + "step": 456000 + }, + { + "epoch": 3.74, + "learning_rate": 8.531950221111641e-06, + "loss": 3.3179, + "step": 456500 + }, + { + "epoch": 3.74, + "learning_rate": 8.530306268391722e-06, + "loss": 3.3139, + "step": 457000 + }, + { + "epoch": 3.75, + "learning_rate": 8.528662315671801e-06, + "loss": 3.3116, + "step": 457500 + }, + { + "epoch": 3.75, + "learning_rate": 8.527018362951882e-06, + "loss": 3.3204, + "step": 458000 + }, + { + "epoch": 3.76, + "learning_rate": 8.525374410231963e-06, + "loss": 3.3166, + "step": 458500 + }, + { + "epoch": 3.76, + "learning_rate": 8.523730457512042e-06, + "loss": 3.3175, + "step": 459000 + }, + { + "epoch": 3.76, + "learning_rate": 8.522086504792123e-06, + "loss": 3.3107, + "step": 459500 + }, + { + "epoch": 3.77, + "learning_rate": 8.520442552072203e-06, + "loss": 3.3189, + "step": 460000 + }, + { + "epoch": 3.77, + "learning_rate": 8.518798599352284e-06, + "loss": 3.3178, + "step": 460500 + }, + { + "epoch": 3.78, + "learning_rate": 8.517154646632365e-06, + "loss": 3.3001, + "step": 461000 + }, + { + "epoch": 3.78, + "learning_rate": 8.515510693912444e-06, + "loss": 3.3178, + "step": 461500 + }, + { + "epoch": 3.79, + "learning_rate": 8.513866741192523e-06, + "loss": 3.3168, + "step": 462000 + }, + { + "epoch": 3.79, + "learning_rate": 8.512222788472604e-06, + "loss": 3.3178, + "step": 462500 + }, + { + "epoch": 3.79, + "learning_rate": 8.510578835752684e-06, + "loss": 3.3187, + "step": 463000 + }, + { + "epoch": 3.8, + "learning_rate": 8.508934883032764e-06, + "loss": 3.3136, + "step": 463500 + }, + { + "epoch": 3.8, + "learning_rate": 8.507290930312845e-06, + "loss": 3.3138, + "step": 464000 + }, + { + "epoch": 3.81, + "learning_rate": 8.505646977592925e-06, + "loss": 3.3117, + "step": 464500 + }, + { + "epoch": 3.81, + "learning_rate": 8.504003024873006e-06, + "loss": 3.3167, + "step": 465000 + }, + { + "epoch": 3.81, + "learning_rate": 8.502359072153085e-06, + "loss": 3.3172, + "step": 465500 + }, + { + "epoch": 3.82, + "learning_rate": 8.500715119433166e-06, + "loss": 3.3083, + "step": 466000 + }, + { + "epoch": 3.82, + "learning_rate": 8.499071166713247e-06, + "loss": 3.31, + "step": 466500 + }, + { + "epoch": 3.83, + "learning_rate": 8.497427213993326e-06, + "loss": 3.3198, + "step": 467000 + }, + { + "epoch": 3.83, + "learning_rate": 8.495783261273407e-06, + "loss": 3.3153, + "step": 467500 + }, + { + "epoch": 3.83, + "learning_rate": 8.494139308553486e-06, + "loss": 3.3139, + "step": 468000 + }, + { + "epoch": 3.84, + "learning_rate": 8.492495355833566e-06, + "loss": 3.3044, + "step": 468500 + }, + { + "epoch": 3.84, + "learning_rate": 8.490851403113647e-06, + "loss": 3.306, + "step": 469000 + }, + { + "epoch": 3.85, + "learning_rate": 8.489207450393728e-06, + "loss": 3.3104, + "step": 469500 + }, + { + "epoch": 3.85, + "learning_rate": 8.487563497673807e-06, + "loss": 3.3154, + "step": 470000 + }, + { + "epoch": 3.85, + "learning_rate": 8.485919544953888e-06, + "loss": 3.3111, + "step": 470500 + }, + { + "epoch": 3.86, + "learning_rate": 8.484275592233969e-06, + "loss": 3.3065, + "step": 471000 + }, + { + "epoch": 3.86, + "learning_rate": 8.482631639514048e-06, + "loss": 3.3094, + "step": 471500 + }, + { + "epoch": 3.87, + "learning_rate": 8.48098768679413e-06, + "loss": 3.307, + "step": 472000 + }, + { + "epoch": 3.87, + "learning_rate": 8.479343734074208e-06, + "loss": 3.3099, + "step": 472500 + }, + { + "epoch": 3.88, + "learning_rate": 8.47769978135429e-06, + "loss": 3.3043, + "step": 473000 + }, + { + "epoch": 3.88, + "learning_rate": 8.47605582863437e-06, + "loss": 3.3068, + "step": 473500 + }, + { + "epoch": 3.88, + "learning_rate": 8.47441187591445e-06, + "loss": 3.3114, + "step": 474000 + }, + { + "epoch": 3.89, + "learning_rate": 8.472767923194529e-06, + "loss": 3.3066, + "step": 474500 + }, + { + "epoch": 3.89, + "learning_rate": 8.47112397047461e-06, + "loss": 3.3085, + "step": 475000 + }, + { + "epoch": 3.9, + "learning_rate": 8.46948001775469e-06, + "loss": 3.3108, + "step": 475500 + }, + { + "epoch": 3.9, + "learning_rate": 8.46783606503477e-06, + "loss": 3.3058, + "step": 476000 + }, + { + "epoch": 3.9, + "learning_rate": 8.466192112314851e-06, + "loss": 3.3152, + "step": 476500 + }, + { + "epoch": 3.91, + "learning_rate": 8.46454815959493e-06, + "loss": 3.2999, + "step": 477000 + }, + { + "epoch": 3.91, + "learning_rate": 8.462904206875011e-06, + "loss": 3.3049, + "step": 477500 + }, + { + "epoch": 3.92, + "learning_rate": 8.46126025415509e-06, + "loss": 3.3029, + "step": 478000 + }, + { + "epoch": 3.92, + "learning_rate": 8.459616301435172e-06, + "loss": 3.3006, + "step": 478500 + }, + { + "epoch": 3.92, + "learning_rate": 8.457972348715253e-06, + "loss": 3.3031, + "step": 479000 + }, + { + "epoch": 3.93, + "learning_rate": 8.456328395995332e-06, + "loss": 3.3033, + "step": 479500 + }, + { + "epoch": 3.93, + "learning_rate": 8.454684443275411e-06, + "loss": 3.3062, + "step": 480000 + }, + { + "epoch": 3.93, + "eval_accuracy": 0.4462195104271718, + "eval_loss": 3.166045904159546, + "eval_runtime": 409.7348, + "eval_samples_per_second": 752.575, + "eval_steps_per_second": 15.681, + "step": 480000 + }, + { + "epoch": 3.94, + "learning_rate": 8.453040490555492e-06, + "loss": 3.3111, + "step": 480500 + }, + { + "epoch": 3.94, + "learning_rate": 8.451396537835571e-06, + "loss": 3.2931, + "step": 481000 + }, + { + "epoch": 3.94, + "learning_rate": 8.449752585115652e-06, + "loss": 3.2991, + "step": 481500 + }, + { + "epoch": 3.95, + "learning_rate": 8.448108632395733e-06, + "loss": 3.3012, + "step": 482000 + }, + { + "epoch": 3.95, + "learning_rate": 8.446464679675813e-06, + "loss": 3.3012, + "step": 482500 + }, + { + "epoch": 3.96, + "learning_rate": 8.444820726955894e-06, + "loss": 3.3084, + "step": 483000 + }, + { + "epoch": 3.96, + "learning_rate": 8.443176774235975e-06, + "loss": 3.3038, + "step": 483500 + }, + { + "epoch": 3.97, + "learning_rate": 8.441532821516054e-06, + "loss": 3.2989, + "step": 484000 + }, + { + "epoch": 3.97, + "learning_rate": 8.439888868796135e-06, + "loss": 3.3014, + "step": 484500 + }, + { + "epoch": 3.97, + "learning_rate": 8.438244916076214e-06, + "loss": 3.3043, + "step": 485000 + }, + { + "epoch": 3.98, + "learning_rate": 8.436600963356295e-06, + "loss": 3.305, + "step": 485500 + }, + { + "epoch": 3.98, + "learning_rate": 8.434957010636374e-06, + "loss": 3.3027, + "step": 486000 + }, + { + "epoch": 3.99, + "learning_rate": 8.433313057916455e-06, + "loss": 3.3018, + "step": 486500 + }, + { + "epoch": 3.99, + "learning_rate": 8.431669105196535e-06, + "loss": 3.3057, + "step": 487000 + }, + { + "epoch": 3.99, + "learning_rate": 8.430025152476616e-06, + "loss": 3.2995, + "step": 487500 + }, + { + "epoch": 4.0, + "learning_rate": 8.428381199756695e-06, + "loss": 3.3044, + "step": 488000 + }, + { + "epoch": 4.0, + "learning_rate": 8.426737247036776e-06, + "loss": 3.2979, + "step": 488500 + }, + { + "epoch": 4.01, + "learning_rate": 8.425093294316857e-06, + "loss": 3.2995, + "step": 489000 + }, + { + "epoch": 4.01, + "learning_rate": 8.423449341596936e-06, + "loss": 3.2958, + "step": 489500 + }, + { + "epoch": 4.01, + "learning_rate": 8.421805388877017e-06, + "loss": 3.3006, + "step": 490000 + }, + { + "epoch": 4.02, + "learning_rate": 8.420161436157096e-06, + "loss": 3.3015, + "step": 490500 + }, + { + "epoch": 4.02, + "learning_rate": 8.418517483437177e-06, + "loss": 3.2939, + "step": 491000 + }, + { + "epoch": 4.03, + "learning_rate": 8.416873530717258e-06, + "loss": 3.2984, + "step": 491500 + }, + { + "epoch": 4.03, + "learning_rate": 8.415229577997338e-06, + "loss": 3.294, + "step": 492000 + }, + { + "epoch": 4.03, + "learning_rate": 8.413585625277417e-06, + "loss": 3.2979, + "step": 492500 + }, + { + "epoch": 4.04, + "learning_rate": 8.411941672557498e-06, + "loss": 3.2938, + "step": 493000 + }, + { + "epoch": 4.04, + "learning_rate": 8.410297719837577e-06, + "loss": 3.2961, + "step": 493500 + }, + { + "epoch": 4.05, + "learning_rate": 8.408653767117658e-06, + "loss": 3.286, + "step": 494000 + }, + { + "epoch": 4.05, + "learning_rate": 8.407009814397739e-06, + "loss": 3.2893, + "step": 494500 + }, + { + "epoch": 4.06, + "learning_rate": 8.405365861677818e-06, + "loss": 3.289, + "step": 495000 + }, + { + "epoch": 4.06, + "learning_rate": 8.4037219089579e-06, + "loss": 3.2949, + "step": 495500 + }, + { + "epoch": 4.06, + "learning_rate": 8.40207795623798e-06, + "loss": 3.2887, + "step": 496000 + }, + { + "epoch": 4.07, + "learning_rate": 8.40043400351806e-06, + "loss": 3.2963, + "step": 496500 + }, + { + "epoch": 4.07, + "learning_rate": 8.39879005079814e-06, + "loss": 3.2874, + "step": 497000 + }, + { + "epoch": 4.08, + "learning_rate": 8.39714609807822e-06, + "loss": 3.2941, + "step": 497500 + }, + { + "epoch": 4.08, + "learning_rate": 8.395502145358299e-06, + "loss": 3.2907, + "step": 498000 + }, + { + "epoch": 4.08, + "learning_rate": 8.39385819263838e-06, + "loss": 3.2968, + "step": 498500 + }, + { + "epoch": 4.09, + "learning_rate": 8.392214239918461e-06, + "loss": 3.2853, + "step": 499000 + }, + { + "epoch": 4.09, + "learning_rate": 8.39057028719854e-06, + "loss": 3.2883, + "step": 499500 + }, + { + "epoch": 4.1, + "learning_rate": 8.388926334478621e-06, + "loss": 3.2939, + "step": 500000 + }, + { + "epoch": 4.1, + "learning_rate": 8.3872823817587e-06, + "loss": 3.2897, + "step": 500500 + }, + { + "epoch": 4.1, + "learning_rate": 8.385638429038782e-06, + "loss": 3.299, + "step": 501000 + }, + { + "epoch": 4.11, + "learning_rate": 8.383994476318863e-06, + "loss": 3.2911, + "step": 501500 + }, + { + "epoch": 4.11, + "learning_rate": 8.382350523598942e-06, + "loss": 3.2926, + "step": 502000 + }, + { + "epoch": 4.12, + "learning_rate": 8.380706570879023e-06, + "loss": 3.2913, + "step": 502500 + }, + { + "epoch": 4.12, + "learning_rate": 8.379062618159102e-06, + "loss": 3.3006, + "step": 503000 + }, + { + "epoch": 4.13, + "learning_rate": 8.377418665439183e-06, + "loss": 3.2884, + "step": 503500 + }, + { + "epoch": 4.13, + "learning_rate": 8.375774712719262e-06, + "loss": 3.2926, + "step": 504000 + }, + { + "epoch": 4.13, + "learning_rate": 8.374130759999343e-06, + "loss": 3.2898, + "step": 504500 + }, + { + "epoch": 4.14, + "learning_rate": 8.372486807279423e-06, + "loss": 3.2941, + "step": 505000 + }, + { + "epoch": 4.14, + "learning_rate": 8.370842854559504e-06, + "loss": 3.2886, + "step": 505500 + }, + { + "epoch": 4.15, + "learning_rate": 8.369198901839583e-06, + "loss": 3.2888, + "step": 506000 + }, + { + "epoch": 4.15, + "learning_rate": 8.367554949119664e-06, + "loss": 3.2876, + "step": 506500 + }, + { + "epoch": 4.15, + "learning_rate": 8.365910996399745e-06, + "loss": 3.2868, + "step": 507000 + }, + { + "epoch": 4.16, + "learning_rate": 8.364267043679824e-06, + "loss": 3.2854, + "step": 507500 + }, + { + "epoch": 4.16, + "learning_rate": 8.362623090959905e-06, + "loss": 3.2951, + "step": 508000 + }, + { + "epoch": 4.17, + "learning_rate": 8.360979138239986e-06, + "loss": 3.2928, + "step": 508500 + }, + { + "epoch": 4.17, + "learning_rate": 8.359335185520065e-06, + "loss": 3.286, + "step": 509000 + }, + { + "epoch": 4.17, + "learning_rate": 8.357691232800146e-06, + "loss": 3.2894, + "step": 509500 + }, + { + "epoch": 4.18, + "learning_rate": 8.356047280080226e-06, + "loss": 3.2841, + "step": 510000 + }, + { + "epoch": 4.18, + "eval_accuracy": 0.4485220748826658, + "eval_loss": 3.1493306159973145, + "eval_runtime": 411.9758, + "eval_samples_per_second": 748.481, + "eval_steps_per_second": 15.596, + "step": 510000 + }, + { + "epoch": 4.18, + "learning_rate": 8.354403327360305e-06, + "loss": 3.2809, + "step": 510500 + }, + { + "epoch": 4.19, + "learning_rate": 8.352759374640386e-06, + "loss": 3.2861, + "step": 511000 + }, + { + "epoch": 4.19, + "learning_rate": 8.351115421920467e-06, + "loss": 3.2792, + "step": 511500 + }, + { + "epoch": 4.19, + "learning_rate": 8.349471469200546e-06, + "loss": 3.2864, + "step": 512000 + }, + { + "epoch": 4.2, + "learning_rate": 8.347827516480627e-06, + "loss": 3.2761, + "step": 512500 + }, + { + "epoch": 4.2, + "learning_rate": 8.346183563760706e-06, + "loss": 3.2821, + "step": 513000 + }, + { + "epoch": 4.21, + "learning_rate": 8.344539611040787e-06, + "loss": 3.2797, + "step": 513500 + }, + { + "epoch": 4.21, + "learning_rate": 8.342895658320868e-06, + "loss": 3.2854, + "step": 514000 + }, + { + "epoch": 4.22, + "learning_rate": 8.341251705600948e-06, + "loss": 3.2826, + "step": 514500 + }, + { + "epoch": 4.22, + "learning_rate": 8.339607752881029e-06, + "loss": 3.2923, + "step": 515000 + }, + { + "epoch": 4.22, + "learning_rate": 8.337963800161108e-06, + "loss": 3.2826, + "step": 515500 + }, + { + "epoch": 4.23, + "learning_rate": 8.336319847441187e-06, + "loss": 3.2886, + "step": 516000 + }, + { + "epoch": 4.23, + "learning_rate": 8.334675894721268e-06, + "loss": 3.2822, + "step": 516500 + }, + { + "epoch": 4.24, + "learning_rate": 8.333031942001349e-06, + "loss": 3.2828, + "step": 517000 + }, + { + "epoch": 4.24, + "learning_rate": 8.331387989281428e-06, + "loss": 3.2848, + "step": 517500 + }, + { + "epoch": 4.24, + "learning_rate": 8.32974403656151e-06, + "loss": 3.2878, + "step": 518000 + }, + { + "epoch": 4.25, + "learning_rate": 8.328100083841589e-06, + "loss": 3.2879, + "step": 518500 + }, + { + "epoch": 4.25, + "learning_rate": 8.32645613112167e-06, + "loss": 3.2825, + "step": 519000 + }, + { + "epoch": 4.26, + "learning_rate": 8.32481217840175e-06, + "loss": 3.2823, + "step": 519500 + }, + { + "epoch": 4.26, + "learning_rate": 8.32316822568183e-06, + "loss": 3.2841, + "step": 520000 + }, + { + "epoch": 4.26, + "learning_rate": 8.32152427296191e-06, + "loss": 3.2854, + "step": 520500 + }, + { + "epoch": 4.27, + "learning_rate": 8.31988032024199e-06, + "loss": 3.2817, + "step": 521000 + }, + { + "epoch": 4.27, + "learning_rate": 8.318236367522071e-06, + "loss": 3.277, + "step": 521500 + }, + { + "epoch": 4.28, + "learning_rate": 8.316592414802152e-06, + "loss": 3.2717, + "step": 522000 + }, + { + "epoch": 4.28, + "learning_rate": 8.314948462082231e-06, + "loss": 3.2838, + "step": 522500 + }, + { + "epoch": 4.28, + "learning_rate": 8.31330450936231e-06, + "loss": 3.2764, + "step": 523000 + }, + { + "epoch": 4.29, + "learning_rate": 8.311660556642392e-06, + "loss": 3.2824, + "step": 523500 + }, + { + "epoch": 4.29, + "learning_rate": 8.310016603922472e-06, + "loss": 3.2748, + "step": 524000 + }, + { + "epoch": 4.3, + "learning_rate": 8.308372651202552e-06, + "loss": 3.275, + "step": 524500 + }, + { + "epoch": 4.3, + "learning_rate": 8.306728698482633e-06, + "loss": 3.2802, + "step": 525000 + }, + { + "epoch": 4.31, + "learning_rate": 8.305084745762712e-06, + "loss": 3.2764, + "step": 525500 + }, + { + "epoch": 4.31, + "learning_rate": 8.303440793042793e-06, + "loss": 3.2746, + "step": 526000 + }, + { + "epoch": 4.31, + "learning_rate": 8.301796840322874e-06, + "loss": 3.2793, + "step": 526500 + }, + { + "epoch": 4.32, + "learning_rate": 8.300152887602953e-06, + "loss": 3.2819, + "step": 527000 + }, + { + "epoch": 4.32, + "learning_rate": 8.298508934883034e-06, + "loss": 3.2755, + "step": 527500 + }, + { + "epoch": 4.33, + "learning_rate": 8.296864982163114e-06, + "loss": 3.2831, + "step": 528000 + }, + { + "epoch": 4.33, + "learning_rate": 8.295221029443193e-06, + "loss": 3.2733, + "step": 528500 + }, + { + "epoch": 4.33, + "learning_rate": 8.293577076723274e-06, + "loss": 3.277, + "step": 529000 + }, + { + "epoch": 4.34, + "learning_rate": 8.291933124003355e-06, + "loss": 3.2783, + "step": 529500 + }, + { + "epoch": 4.34, + "learning_rate": 8.290289171283434e-06, + "loss": 3.2877, + "step": 530000 + }, + { + "epoch": 4.35, + "learning_rate": 8.288645218563515e-06, + "loss": 3.2762, + "step": 530500 + }, + { + "epoch": 4.35, + "learning_rate": 8.287001265843594e-06, + "loss": 3.2744, + "step": 531000 + }, + { + "epoch": 4.35, + "learning_rate": 8.285357313123675e-06, + "loss": 3.275, + "step": 531500 + }, + { + "epoch": 4.36, + "learning_rate": 8.283713360403756e-06, + "loss": 3.2724, + "step": 532000 + }, + { + "epoch": 4.36, + "learning_rate": 8.282069407683835e-06, + "loss": 3.2723, + "step": 532500 + }, + { + "epoch": 4.37, + "learning_rate": 8.280425454963916e-06, + "loss": 3.2662, + "step": 533000 + }, + { + "epoch": 4.37, + "learning_rate": 8.278781502243996e-06, + "loss": 3.2702, + "step": 533500 + }, + { + "epoch": 4.37, + "learning_rate": 8.277137549524077e-06, + "loss": 3.2736, + "step": 534000 + }, + { + "epoch": 4.38, + "learning_rate": 8.275493596804156e-06, + "loss": 3.2694, + "step": 534500 + }, + { + "epoch": 4.38, + "learning_rate": 8.273849644084237e-06, + "loss": 3.2722, + "step": 535000 + }, + { + "epoch": 4.39, + "learning_rate": 8.272205691364316e-06, + "loss": 3.2708, + "step": 535500 + }, + { + "epoch": 4.39, + "learning_rate": 8.270561738644397e-06, + "loss": 3.274, + "step": 536000 + }, + { + "epoch": 4.4, + "learning_rate": 8.268917785924478e-06, + "loss": 3.2798, + "step": 536500 + }, + { + "epoch": 4.4, + "learning_rate": 8.267273833204557e-06, + "loss": 3.2723, + "step": 537000 + }, + { + "epoch": 4.4, + "learning_rate": 8.265629880484638e-06, + "loss": 3.2728, + "step": 537500 + }, + { + "epoch": 4.41, + "learning_rate": 8.263985927764718e-06, + "loss": 3.2721, + "step": 538000 + }, + { + "epoch": 4.41, + "learning_rate": 8.262341975044799e-06, + "loss": 3.2774, + "step": 538500 + }, + { + "epoch": 4.42, + "learning_rate": 8.26069802232488e-06, + "loss": 3.2741, + "step": 539000 + }, + { + "epoch": 4.42, + "learning_rate": 8.259054069604959e-06, + "loss": 3.2655, + "step": 539500 + }, + { + "epoch": 4.42, + "learning_rate": 8.25741011688504e-06, + "loss": 3.2663, + "step": 540000 + }, + { + "epoch": 4.42, + "eval_accuracy": 0.4502872659892971, + "eval_loss": 3.1354901790618896, + "eval_runtime": 409.1172, + "eval_samples_per_second": 753.711, + "eval_steps_per_second": 15.705, + "step": 540000 + }, + { + "epoch": 4.43, + "learning_rate": 8.25576616416512e-06, + "loss": 3.2669, + "step": 540500 + }, + { + "epoch": 4.43, + "learning_rate": 8.254122211445198e-06, + "loss": 3.2673, + "step": 541000 + }, + { + "epoch": 4.44, + "learning_rate": 8.25247825872528e-06, + "loss": 3.2689, + "step": 541500 + }, + { + "epoch": 4.44, + "learning_rate": 8.25083430600536e-06, + "loss": 3.2659, + "step": 542000 + }, + { + "epoch": 4.44, + "learning_rate": 8.24919035328544e-06, + "loss": 3.2657, + "step": 542500 + }, + { + "epoch": 4.45, + "learning_rate": 8.24754640056552e-06, + "loss": 3.2651, + "step": 543000 + }, + { + "epoch": 4.45, + "learning_rate": 8.2459024478456e-06, + "loss": 3.2673, + "step": 543500 + }, + { + "epoch": 4.46, + "learning_rate": 8.244258495125681e-06, + "loss": 3.2719, + "step": 544000 + }, + { + "epoch": 4.46, + "learning_rate": 8.242614542405762e-06, + "loss": 3.2714, + "step": 544500 + }, + { + "epoch": 4.47, + "learning_rate": 8.240970589685841e-06, + "loss": 3.2702, + "step": 545000 + }, + { + "epoch": 4.47, + "learning_rate": 8.239326636965922e-06, + "loss": 3.2649, + "step": 545500 + }, + { + "epoch": 4.47, + "learning_rate": 8.237682684246001e-06, + "loss": 3.2791, + "step": 546000 + }, + { + "epoch": 4.48, + "learning_rate": 8.23603873152608e-06, + "loss": 3.2627, + "step": 546500 + }, + { + "epoch": 4.48, + "learning_rate": 8.234394778806162e-06, + "loss": 3.2707, + "step": 547000 + }, + { + "epoch": 4.49, + "learning_rate": 8.232750826086243e-06, + "loss": 3.2704, + "step": 547500 + }, + { + "epoch": 4.49, + "learning_rate": 8.231106873366322e-06, + "loss": 3.2624, + "step": 548000 + }, + { + "epoch": 4.49, + "learning_rate": 8.229462920646403e-06, + "loss": 3.2661, + "step": 548500 + }, + { + "epoch": 4.5, + "learning_rate": 8.227818967926482e-06, + "loss": 3.2724, + "step": 549000 + }, + { + "epoch": 4.5, + "learning_rate": 8.226175015206563e-06, + "loss": 3.2653, + "step": 549500 + }, + { + "epoch": 4.51, + "learning_rate": 8.224531062486644e-06, + "loss": 3.2676, + "step": 550000 + }, + { + "epoch": 4.51, + "learning_rate": 8.222887109766723e-06, + "loss": 3.2621, + "step": 550500 + }, + { + "epoch": 4.51, + "learning_rate": 8.221243157046804e-06, + "loss": 3.2705, + "step": 551000 + }, + { + "epoch": 4.52, + "learning_rate": 8.219599204326885e-06, + "loss": 3.2677, + "step": 551500 + }, + { + "epoch": 4.52, + "learning_rate": 8.217955251606965e-06, + "loss": 3.2643, + "step": 552000 + }, + { + "epoch": 4.53, + "learning_rate": 8.216311298887044e-06, + "loss": 3.2621, + "step": 552500 + }, + { + "epoch": 4.53, + "learning_rate": 8.214667346167125e-06, + "loss": 3.262, + "step": 553000 + }, + { + "epoch": 4.53, + "learning_rate": 8.213023393447204e-06, + "loss": 3.2641, + "step": 553500 + }, + { + "epoch": 4.54, + "learning_rate": 8.211379440727285e-06, + "loss": 3.2677, + "step": 554000 + }, + { + "epoch": 4.54, + "learning_rate": 8.209735488007366e-06, + "loss": 3.2587, + "step": 554500 + }, + { + "epoch": 4.55, + "learning_rate": 8.208091535287445e-06, + "loss": 3.2668, + "step": 555000 + }, + { + "epoch": 4.55, + "learning_rate": 8.206447582567526e-06, + "loss": 3.2543, + "step": 555500 + }, + { + "epoch": 4.56, + "learning_rate": 8.204803629847606e-06, + "loss": 3.2612, + "step": 556000 + }, + { + "epoch": 4.56, + "learning_rate": 8.203159677127687e-06, + "loss": 3.2631, + "step": 556500 + }, + { + "epoch": 4.56, + "learning_rate": 8.201515724407768e-06, + "loss": 3.2678, + "step": 557000 + }, + { + "epoch": 4.57, + "learning_rate": 8.199871771687847e-06, + "loss": 3.2658, + "step": 557500 + }, + { + "epoch": 4.57, + "learning_rate": 8.198227818967928e-06, + "loss": 3.2576, + "step": 558000 + }, + { + "epoch": 4.58, + "learning_rate": 8.196583866248007e-06, + "loss": 3.2639, + "step": 558500 + }, + { + "epoch": 4.58, + "learning_rate": 8.194939913528086e-06, + "loss": 3.2549, + "step": 559000 + }, + { + "epoch": 4.58, + "learning_rate": 8.193295960808167e-06, + "loss": 3.2605, + "step": 559500 + }, + { + "epoch": 4.59, + "learning_rate": 8.191652008088248e-06, + "loss": 3.2623, + "step": 560000 + }, + { + "epoch": 4.59, + "learning_rate": 8.190008055368328e-06, + "loss": 3.2635, + "step": 560500 + }, + { + "epoch": 4.6, + "learning_rate": 8.188364102648409e-06, + "loss": 3.2587, + "step": 561000 + }, + { + "epoch": 4.6, + "learning_rate": 8.186720149928488e-06, + "loss": 3.266, + "step": 561500 + }, + { + "epoch": 4.6, + "learning_rate": 8.185076197208569e-06, + "loss": 3.2584, + "step": 562000 + }, + { + "epoch": 4.61, + "learning_rate": 8.18343224448865e-06, + "loss": 3.2625, + "step": 562500 + }, + { + "epoch": 4.61, + "learning_rate": 8.181788291768729e-06, + "loss": 3.2598, + "step": 563000 + }, + { + "epoch": 4.62, + "learning_rate": 8.18014433904881e-06, + "loss": 3.2545, + "step": 563500 + }, + { + "epoch": 4.62, + "learning_rate": 8.178500386328891e-06, + "loss": 3.2591, + "step": 564000 + }, + { + "epoch": 4.62, + "learning_rate": 8.17685643360897e-06, + "loss": 3.2561, + "step": 564500 + }, + { + "epoch": 4.63, + "learning_rate": 8.17521248088905e-06, + "loss": 3.2604, + "step": 565000 + }, + { + "epoch": 4.63, + "learning_rate": 8.17356852816913e-06, + "loss": 3.2529, + "step": 565500 + }, + { + "epoch": 4.64, + "learning_rate": 8.17192457544921e-06, + "loss": 3.2568, + "step": 566000 + }, + { + "epoch": 4.64, + "learning_rate": 8.170280622729291e-06, + "loss": 3.2554, + "step": 566500 + }, + { + "epoch": 4.65, + "learning_rate": 8.168636670009372e-06, + "loss": 3.2528, + "step": 567000 + }, + { + "epoch": 4.65, + "learning_rate": 8.166992717289451e-06, + "loss": 3.2633, + "step": 567500 + }, + { + "epoch": 4.65, + "learning_rate": 8.165348764569532e-06, + "loss": 3.2545, + "step": 568000 + }, + { + "epoch": 4.66, + "learning_rate": 8.163704811849611e-06, + "loss": 3.257, + "step": 568500 + }, + { + "epoch": 4.66, + "learning_rate": 8.162060859129692e-06, + "loss": 3.2531, + "step": 569000 + }, + { + "epoch": 4.67, + "learning_rate": 8.160416906409773e-06, + "loss": 3.2573, + "step": 569500 + }, + { + "epoch": 4.67, + "learning_rate": 8.158772953689853e-06, + "loss": 3.259, + "step": 570000 + }, + { + "epoch": 4.67, + "eval_accuracy": 0.45185906617575416, + "eval_loss": 3.122938632965088, + "eval_runtime": 406.5956, + "eval_samples_per_second": 758.385, + "eval_steps_per_second": 15.802, + "step": 570000 + }, + { + "epoch": 4.67, + "learning_rate": 8.157129000969934e-06, + "loss": 3.2607, + "step": 570500 + }, + { + "epoch": 4.68, + "learning_rate": 8.155485048250013e-06, + "loss": 3.2596, + "step": 571000 + }, + { + "epoch": 4.68, + "learning_rate": 8.153841095530092e-06, + "loss": 3.2551, + "step": 571500 + }, + { + "epoch": 4.69, + "learning_rate": 8.152197142810173e-06, + "loss": 3.26, + "step": 572000 + }, + { + "epoch": 4.69, + "learning_rate": 8.150553190090254e-06, + "loss": 3.2566, + "step": 572500 + }, + { + "epoch": 4.69, + "learning_rate": 8.148909237370333e-06, + "loss": 3.2509, + "step": 573000 + }, + { + "epoch": 4.7, + "learning_rate": 8.147265284650414e-06, + "loss": 3.2506, + "step": 573500 + }, + { + "epoch": 4.7, + "learning_rate": 8.145621331930494e-06, + "loss": 3.2583, + "step": 574000 + }, + { + "epoch": 4.71, + "learning_rate": 8.143977379210575e-06, + "loss": 3.2572, + "step": 574500 + }, + { + "epoch": 4.71, + "learning_rate": 8.142333426490656e-06, + "loss": 3.2544, + "step": 575000 + }, + { + "epoch": 4.71, + "learning_rate": 8.140689473770735e-06, + "loss": 3.2544, + "step": 575500 + }, + { + "epoch": 4.72, + "learning_rate": 8.139045521050816e-06, + "loss": 3.2497, + "step": 576000 + }, + { + "epoch": 4.72, + "learning_rate": 8.137401568330897e-06, + "loss": 3.2496, + "step": 576500 + }, + { + "epoch": 4.73, + "learning_rate": 8.135757615610974e-06, + "loss": 3.2468, + "step": 577000 + }, + { + "epoch": 4.73, + "learning_rate": 8.134113662891055e-06, + "loss": 3.2497, + "step": 577500 + }, + { + "epoch": 4.74, + "learning_rate": 8.132469710171136e-06, + "loss": 3.249, + "step": 578000 + }, + { + "epoch": 4.74, + "learning_rate": 8.130825757451216e-06, + "loss": 3.2553, + "step": 578500 + }, + { + "epoch": 4.74, + "learning_rate": 8.129181804731297e-06, + "loss": 3.2604, + "step": 579000 + }, + { + "epoch": 4.75, + "learning_rate": 8.127537852011378e-06, + "loss": 3.2497, + "step": 579500 + }, + { + "epoch": 4.75, + "learning_rate": 8.125893899291457e-06, + "loss": 3.2503, + "step": 580000 + }, + { + "epoch": 4.76, + "learning_rate": 8.124249946571538e-06, + "loss": 3.2484, + "step": 580500 + }, + { + "epoch": 4.76, + "learning_rate": 8.122605993851617e-06, + "loss": 3.2596, + "step": 581000 + }, + { + "epoch": 4.76, + "learning_rate": 8.120962041131698e-06, + "loss": 3.2464, + "step": 581500 + }, + { + "epoch": 4.77, + "learning_rate": 8.119318088411779e-06, + "loss": 3.2508, + "step": 582000 + }, + { + "epoch": 4.77, + "learning_rate": 8.117674135691858e-06, + "loss": 3.249, + "step": 582500 + }, + { + "epoch": 4.78, + "learning_rate": 8.116030182971938e-06, + "loss": 3.2492, + "step": 583000 + }, + { + "epoch": 4.78, + "learning_rate": 8.114386230252019e-06, + "loss": 3.2457, + "step": 583500 + }, + { + "epoch": 4.78, + "learning_rate": 8.112742277532098e-06, + "loss": 3.2488, + "step": 584000 + }, + { + "epoch": 4.79, + "learning_rate": 8.111098324812179e-06, + "loss": 3.2475, + "step": 584500 + }, + { + "epoch": 4.79, + "learning_rate": 8.10945437209226e-06, + "loss": 3.2466, + "step": 585000 + }, + { + "epoch": 4.8, + "learning_rate": 8.107810419372339e-06, + "loss": 3.2452, + "step": 585500 + }, + { + "epoch": 4.8, + "learning_rate": 8.10616646665242e-06, + "loss": 3.2506, + "step": 586000 + }, + { + "epoch": 4.81, + "learning_rate": 8.1045225139325e-06, + "loss": 3.243, + "step": 586500 + }, + { + "epoch": 4.81, + "learning_rate": 8.10287856121258e-06, + "loss": 3.2476, + "step": 587000 + }, + { + "epoch": 4.81, + "learning_rate": 8.101234608492661e-06, + "loss": 3.2427, + "step": 587500 + }, + { + "epoch": 4.82, + "learning_rate": 8.09959065577274e-06, + "loss": 3.2548, + "step": 588000 + }, + { + "epoch": 4.82, + "learning_rate": 8.097946703052822e-06, + "loss": 3.2503, + "step": 588500 + }, + { + "epoch": 4.83, + "learning_rate": 8.0963027503329e-06, + "loss": 3.2469, + "step": 589000 + }, + { + "epoch": 4.83, + "learning_rate": 8.09465879761298e-06, + "loss": 3.239, + "step": 589500 + }, + { + "epoch": 4.83, + "learning_rate": 8.093014844893061e-06, + "loss": 3.2494, + "step": 590000 + }, + { + "epoch": 4.84, + "learning_rate": 8.091370892173142e-06, + "loss": 3.2423, + "step": 590500 + }, + { + "epoch": 4.84, + "learning_rate": 8.089726939453221e-06, + "loss": 3.2455, + "step": 591000 + }, + { + "epoch": 4.85, + "learning_rate": 8.088082986733302e-06, + "loss": 3.2395, + "step": 591500 + }, + { + "epoch": 4.85, + "learning_rate": 8.086439034013383e-06, + "loss": 3.244, + "step": 592000 + }, + { + "epoch": 4.85, + "learning_rate": 8.084795081293463e-06, + "loss": 3.2469, + "step": 592500 + }, + { + "epoch": 4.86, + "learning_rate": 8.083151128573544e-06, + "loss": 3.244, + "step": 593000 + }, + { + "epoch": 4.86, + "learning_rate": 8.081507175853623e-06, + "loss": 3.2457, + "step": 593500 + }, + { + "epoch": 4.87, + "learning_rate": 8.079863223133704e-06, + "loss": 3.2449, + "step": 594000 + }, + { + "epoch": 4.87, + "learning_rate": 8.078219270413785e-06, + "loss": 3.2407, + "step": 594500 + }, + { + "epoch": 4.87, + "learning_rate": 8.076575317693864e-06, + "loss": 3.2462, + "step": 595000 + }, + { + "epoch": 4.88, + "learning_rate": 8.074931364973943e-06, + "loss": 3.2402, + "step": 595500 + }, + { + "epoch": 4.88, + "learning_rate": 8.073287412254024e-06, + "loss": 3.2431, + "step": 596000 + }, + { + "epoch": 4.89, + "learning_rate": 8.071643459534104e-06, + "loss": 3.2429, + "step": 596500 + }, + { + "epoch": 4.89, + "learning_rate": 8.069999506814185e-06, + "loss": 3.2418, + "step": 597000 + }, + { + "epoch": 4.9, + "learning_rate": 8.068355554094265e-06, + "loss": 3.2403, + "step": 597500 + }, + { + "epoch": 4.9, + "learning_rate": 8.066711601374345e-06, + "loss": 3.2438, + "step": 598000 + }, + { + "epoch": 4.9, + "learning_rate": 8.065067648654426e-06, + "loss": 3.2456, + "step": 598500 + }, + { + "epoch": 4.91, + "learning_rate": 8.063423695934505e-06, + "loss": 3.2345, + "step": 599000 + }, + { + "epoch": 4.91, + "learning_rate": 8.061779743214586e-06, + "loss": 3.2393, + "step": 599500 + }, + { + "epoch": 4.92, + "learning_rate": 8.060135790494667e-06, + "loss": 3.2429, + "step": 600000 + }, + { + "epoch": 4.92, + "eval_accuracy": 0.45349973762719414, + "eval_loss": 3.1096389293670654, + "eval_runtime": 406.7173, + "eval_samples_per_second": 758.158, + "eval_steps_per_second": 15.797, + "step": 600000 + }, + { + "epoch": 4.92, + "learning_rate": 8.058491837774746e-06, + "loss": 3.2356, + "step": 600500 + }, + { + "epoch": 4.92, + "learning_rate": 8.056847885054826e-06, + "loss": 3.2468, + "step": 601000 + }, + { + "epoch": 4.93, + "learning_rate": 8.055203932334907e-06, + "loss": 3.2398, + "step": 601500 + }, + { + "epoch": 4.93, + "learning_rate": 8.053559979614986e-06, + "loss": 3.2352, + "step": 602000 + }, + { + "epoch": 4.94, + "learning_rate": 8.051916026895067e-06, + "loss": 3.2442, + "step": 602500 + }, + { + "epoch": 4.94, + "learning_rate": 8.050272074175148e-06, + "loss": 3.2401, + "step": 603000 + }, + { + "epoch": 4.94, + "learning_rate": 8.048628121455227e-06, + "loss": 3.2406, + "step": 603500 + }, + { + "epoch": 4.95, + "learning_rate": 8.046984168735308e-06, + "loss": 3.2348, + "step": 604000 + }, + { + "epoch": 4.95, + "learning_rate": 8.045340216015389e-06, + "loss": 3.2435, + "step": 604500 + }, + { + "epoch": 4.96, + "learning_rate": 8.043696263295468e-06, + "loss": 3.2372, + "step": 605000 + }, + { + "epoch": 4.96, + "learning_rate": 8.04205231057555e-06, + "loss": 3.2415, + "step": 605500 + }, + { + "epoch": 4.96, + "learning_rate": 8.040408357855628e-06, + "loss": 3.234, + "step": 606000 + }, + { + "epoch": 4.97, + "learning_rate": 8.03876440513571e-06, + "loss": 3.2412, + "step": 606500 + }, + { + "epoch": 4.97, + "learning_rate": 8.037120452415789e-06, + "loss": 3.2354, + "step": 607000 + }, + { + "epoch": 4.98, + "learning_rate": 8.03547649969587e-06, + "loss": 3.2394, + "step": 607500 + }, + { + "epoch": 4.98, + "learning_rate": 8.033832546975949e-06, + "loss": 3.2317, + "step": 608000 + }, + { + "epoch": 4.99, + "learning_rate": 8.03218859425603e-06, + "loss": 3.2447, + "step": 608500 + }, + { + "epoch": 4.99, + "learning_rate": 8.03054464153611e-06, + "loss": 3.2438, + "step": 609000 + }, + { + "epoch": 4.99, + "learning_rate": 8.02890068881619e-06, + "loss": 3.2373, + "step": 609500 + }, + { + "epoch": 5.0, + "learning_rate": 8.027256736096271e-06, + "loss": 3.2411, + "step": 610000 + }, + { + "epoch": 5.0, + "learning_rate": 8.02561278337635e-06, + "loss": 3.2358, + "step": 610500 + }, + { + "epoch": 5.01, + "learning_rate": 8.023968830656431e-06, + "loss": 3.2351, + "step": 611000 + }, + { + "epoch": 5.01, + "learning_rate": 8.02232487793651e-06, + "loss": 3.2341, + "step": 611500 + }, + { + "epoch": 5.01, + "learning_rate": 8.020680925216592e-06, + "loss": 3.2321, + "step": 612000 + }, + { + "epoch": 5.02, + "learning_rate": 8.019036972496673e-06, + "loss": 3.2408, + "step": 612500 + }, + { + "epoch": 5.02, + "learning_rate": 8.017393019776752e-06, + "loss": 3.2406, + "step": 613000 + }, + { + "epoch": 5.03, + "learning_rate": 8.015749067056831e-06, + "loss": 3.2375, + "step": 613500 + }, + { + "epoch": 5.03, + "learning_rate": 8.014105114336912e-06, + "loss": 3.2416, + "step": 614000 + }, + { + "epoch": 5.03, + "learning_rate": 8.012461161616991e-06, + "loss": 3.2331, + "step": 614500 + }, + { + "epoch": 5.04, + "learning_rate": 8.010817208897072e-06, + "loss": 3.2473, + "step": 615000 + }, + { + "epoch": 5.04, + "learning_rate": 8.009173256177153e-06, + "loss": 3.2345, + "step": 615500 + }, + { + "epoch": 5.05, + "learning_rate": 8.007529303457233e-06, + "loss": 3.2292, + "step": 616000 + }, + { + "epoch": 5.05, + "learning_rate": 8.005885350737314e-06, + "loss": 3.2327, + "step": 616500 + }, + { + "epoch": 5.05, + "learning_rate": 8.004241398017395e-06, + "loss": 3.2251, + "step": 617000 + }, + { + "epoch": 5.06, + "learning_rate": 8.002597445297474e-06, + "loss": 3.2326, + "step": 617500 + }, + { + "epoch": 5.06, + "learning_rate": 8.000953492577555e-06, + "loss": 3.2348, + "step": 618000 + }, + { + "epoch": 5.07, + "learning_rate": 7.999309539857634e-06, + "loss": 3.231, + "step": 618500 + }, + { + "epoch": 5.07, + "learning_rate": 7.997665587137715e-06, + "loss": 3.236, + "step": 619000 + }, + { + "epoch": 5.08, + "learning_rate": 7.996021634417794e-06, + "loss": 3.2324, + "step": 619500 + }, + { + "epoch": 5.08, + "learning_rate": 7.994377681697875e-06, + "loss": 3.2321, + "step": 620000 + }, + { + "epoch": 5.08, + "learning_rate": 7.992733728977955e-06, + "loss": 3.2326, + "step": 620500 + }, + { + "epoch": 5.09, + "learning_rate": 7.991089776258036e-06, + "loss": 3.2318, + "step": 621000 + }, + { + "epoch": 5.09, + "learning_rate": 7.989445823538115e-06, + "loss": 3.2291, + "step": 621500 + }, + { + "epoch": 5.1, + "learning_rate": 7.987801870818196e-06, + "loss": 3.2291, + "step": 622000 + }, + { + "epoch": 5.1, + "learning_rate": 7.986157918098277e-06, + "loss": 3.2264, + "step": 622500 + }, + { + "epoch": 5.1, + "learning_rate": 7.984513965378356e-06, + "loss": 3.2319, + "step": 623000 + }, + { + "epoch": 5.11, + "learning_rate": 7.982870012658437e-06, + "loss": 3.231, + "step": 623500 + }, + { + "epoch": 5.11, + "learning_rate": 7.981226059938516e-06, + "loss": 3.2294, + "step": 624000 + }, + { + "epoch": 5.12, + "learning_rate": 7.979582107218597e-06, + "loss": 3.2212, + "step": 624500 + }, + { + "epoch": 5.12, + "learning_rate": 7.977938154498678e-06, + "loss": 3.2315, + "step": 625000 + }, + { + "epoch": 5.12, + "learning_rate": 7.976294201778758e-06, + "loss": 3.2388, + "step": 625500 + }, + { + "epoch": 5.13, + "learning_rate": 7.974650249058837e-06, + "loss": 3.2373, + "step": 626000 + }, + { + "epoch": 5.13, + "learning_rate": 7.973006296338918e-06, + "loss": 3.2268, + "step": 626500 + }, + { + "epoch": 5.14, + "learning_rate": 7.971362343618997e-06, + "loss": 3.2271, + "step": 627000 + }, + { + "epoch": 5.14, + "learning_rate": 7.969718390899078e-06, + "loss": 3.23, + "step": 627500 + }, + { + "epoch": 5.15, + "learning_rate": 7.968074438179159e-06, + "loss": 3.2272, + "step": 628000 + }, + { + "epoch": 5.15, + "learning_rate": 7.966430485459238e-06, + "loss": 3.2258, + "step": 628500 + }, + { + "epoch": 5.15, + "learning_rate": 7.96478653273932e-06, + "loss": 3.2267, + "step": 629000 + }, + { + "epoch": 5.16, + "learning_rate": 7.9631425800194e-06, + "loss": 3.2273, + "step": 629500 + }, + { + "epoch": 5.16, + "learning_rate": 7.96149862729948e-06, + "loss": 3.2234, + "step": 630000 + }, + { + "epoch": 5.16, + "eval_accuracy": 0.4554354208134474, + "eval_loss": 3.0947325229644775, + "eval_runtime": 407.3576, + "eval_samples_per_second": 756.966, + "eval_steps_per_second": 15.772, + "step": 630000 + }, + { + "epoch": 5.17, + "learning_rate": 7.95985467457956e-06, + "loss": 3.2299, + "step": 630500 + }, + { + "epoch": 5.17, + "learning_rate": 7.95821072185964e-06, + "loss": 3.2245, + "step": 631000 + }, + { + "epoch": 5.17, + "learning_rate": 7.95656676913972e-06, + "loss": 3.2264, + "step": 631500 + }, + { + "epoch": 5.18, + "learning_rate": 7.9549228164198e-06, + "loss": 3.2198, + "step": 632000 + }, + { + "epoch": 5.18, + "learning_rate": 7.953278863699881e-06, + "loss": 3.2207, + "step": 632500 + }, + { + "epoch": 5.19, + "learning_rate": 7.95163491097996e-06, + "loss": 3.225, + "step": 633000 + }, + { + "epoch": 5.19, + "learning_rate": 7.949990958260041e-06, + "loss": 3.2346, + "step": 633500 + }, + { + "epoch": 5.19, + "learning_rate": 7.94834700554012e-06, + "loss": 3.228, + "step": 634000 + }, + { + "epoch": 5.2, + "learning_rate": 7.946703052820202e-06, + "loss": 3.2245, + "step": 634500 + }, + { + "epoch": 5.2, + "learning_rate": 7.945059100100283e-06, + "loss": 3.2281, + "step": 635000 + }, + { + "epoch": 5.21, + "learning_rate": 7.943415147380362e-06, + "loss": 3.2263, + "step": 635500 + }, + { + "epoch": 5.21, + "learning_rate": 7.941771194660443e-06, + "loss": 3.2242, + "step": 636000 + }, + { + "epoch": 5.21, + "learning_rate": 7.940127241940522e-06, + "loss": 3.2237, + "step": 636500 + }, + { + "epoch": 5.22, + "learning_rate": 7.938483289220603e-06, + "loss": 3.2275, + "step": 637000 + }, + { + "epoch": 5.22, + "learning_rate": 7.936839336500682e-06, + "loss": 3.223, + "step": 637500 + }, + { + "epoch": 5.23, + "learning_rate": 7.935195383780763e-06, + "loss": 3.2262, + "step": 638000 + }, + { + "epoch": 5.23, + "learning_rate": 7.933551431060843e-06, + "loss": 3.2343, + "step": 638500 + }, + { + "epoch": 5.24, + "learning_rate": 7.931907478340924e-06, + "loss": 3.225, + "step": 639000 + }, + { + "epoch": 5.24, + "learning_rate": 7.930263525621003e-06, + "loss": 3.2203, + "step": 639500 + }, + { + "epoch": 5.24, + "learning_rate": 7.928619572901084e-06, + "loss": 3.2276, + "step": 640000 + }, + { + "epoch": 5.25, + "learning_rate": 7.926975620181165e-06, + "loss": 3.2216, + "step": 640500 + }, + { + "epoch": 5.25, + "learning_rate": 7.925331667461244e-06, + "loss": 3.2199, + "step": 641000 + }, + { + "epoch": 5.26, + "learning_rate": 7.923687714741325e-06, + "loss": 3.226, + "step": 641500 + }, + { + "epoch": 5.26, + "learning_rate": 7.922043762021406e-06, + "loss": 3.2282, + "step": 642000 + }, + { + "epoch": 5.26, + "learning_rate": 7.920399809301485e-06, + "loss": 3.2232, + "step": 642500 + }, + { + "epoch": 5.27, + "learning_rate": 7.918755856581566e-06, + "loss": 3.2266, + "step": 643000 + }, + { + "epoch": 5.27, + "learning_rate": 7.917111903861646e-06, + "loss": 3.2136, + "step": 643500 + }, + { + "epoch": 5.28, + "learning_rate": 7.915467951141725e-06, + "loss": 3.2274, + "step": 644000 + }, + { + "epoch": 5.28, + "learning_rate": 7.913823998421806e-06, + "loss": 3.2235, + "step": 644500 + }, + { + "epoch": 5.28, + "learning_rate": 7.912180045701887e-06, + "loss": 3.2288, + "step": 645000 + }, + { + "epoch": 5.29, + "learning_rate": 7.910536092981966e-06, + "loss": 3.2166, + "step": 645500 + }, + { + "epoch": 5.29, + "learning_rate": 7.908892140262047e-06, + "loss": 3.2206, + "step": 646000 + }, + { + "epoch": 5.3, + "learning_rate": 7.907248187542126e-06, + "loss": 3.2212, + "step": 646500 + }, + { + "epoch": 5.3, + "learning_rate": 7.905604234822207e-06, + "loss": 3.2169, + "step": 647000 + }, + { + "epoch": 5.3, + "learning_rate": 7.903960282102288e-06, + "loss": 3.2286, + "step": 647500 + }, + { + "epoch": 5.31, + "learning_rate": 7.902316329382368e-06, + "loss": 3.2228, + "step": 648000 + }, + { + "epoch": 5.31, + "learning_rate": 7.900672376662449e-06, + "loss": 3.2177, + "step": 648500 + }, + { + "epoch": 5.32, + "learning_rate": 7.899028423942528e-06, + "loss": 3.2192, + "step": 649000 + }, + { + "epoch": 5.32, + "learning_rate": 7.897384471222607e-06, + "loss": 3.2179, + "step": 649500 + }, + { + "epoch": 5.33, + "learning_rate": 7.895740518502688e-06, + "loss": 3.227, + "step": 650000 + }, + { + "epoch": 5.33, + "learning_rate": 7.894096565782769e-06, + "loss": 3.2206, + "step": 650500 + }, + { + "epoch": 5.33, + "learning_rate": 7.892452613062848e-06, + "loss": 3.2183, + "step": 651000 + }, + { + "epoch": 5.34, + "learning_rate": 7.89080866034293e-06, + "loss": 3.2277, + "step": 651500 + }, + { + "epoch": 5.34, + "learning_rate": 7.889164707623009e-06, + "loss": 3.2174, + "step": 652000 + }, + { + "epoch": 5.35, + "learning_rate": 7.88752075490309e-06, + "loss": 3.2233, + "step": 652500 + }, + { + "epoch": 5.35, + "learning_rate": 7.88587680218317e-06, + "loss": 3.2165, + "step": 653000 + }, + { + "epoch": 5.35, + "learning_rate": 7.88423284946325e-06, + "loss": 3.2164, + "step": 653500 + }, + { + "epoch": 5.36, + "learning_rate": 7.88258889674333e-06, + "loss": 3.2157, + "step": 654000 + }, + { + "epoch": 5.36, + "learning_rate": 7.880944944023412e-06, + "loss": 3.2215, + "step": 654500 + }, + { + "epoch": 5.37, + "learning_rate": 7.879300991303491e-06, + "loss": 3.2217, + "step": 655000 + }, + { + "epoch": 5.37, + "learning_rate": 7.87765703858357e-06, + "loss": 3.2213, + "step": 655500 + }, + { + "epoch": 5.37, + "learning_rate": 7.876013085863651e-06, + "loss": 3.2122, + "step": 656000 + }, + { + "epoch": 5.38, + "learning_rate": 7.87436913314373e-06, + "loss": 3.2176, + "step": 656500 + }, + { + "epoch": 5.38, + "learning_rate": 7.872725180423812e-06, + "loss": 3.2147, + "step": 657000 + }, + { + "epoch": 5.39, + "learning_rate": 7.871081227703893e-06, + "loss": 3.217, + "step": 657500 + }, + { + "epoch": 5.39, + "learning_rate": 7.869437274983972e-06, + "loss": 3.219, + "step": 658000 + }, + { + "epoch": 5.39, + "learning_rate": 7.867793322264053e-06, + "loss": 3.2219, + "step": 658500 + }, + { + "epoch": 5.4, + "learning_rate": 7.866149369544132e-06, + "loss": 3.215, + "step": 659000 + }, + { + "epoch": 5.4, + "learning_rate": 7.864505416824213e-06, + "loss": 3.2098, + "step": 659500 + }, + { + "epoch": 5.41, + "learning_rate": 7.862861464104294e-06, + "loss": 3.2115, + "step": 660000 + }, + { + "epoch": 5.41, + "eval_accuracy": 0.4573150918223206, + "eval_loss": 3.0818052291870117, + "eval_runtime": 409.6703, + "eval_samples_per_second": 752.693, + "eval_steps_per_second": 15.683, + "step": 660000 + }, + { + "epoch": 5.41, + "learning_rate": 7.861217511384373e-06, + "loss": 3.214, + "step": 660500 + }, + { + "epoch": 5.42, + "learning_rate": 7.859573558664454e-06, + "loss": 3.2186, + "step": 661000 + }, + { + "epoch": 5.42, + "learning_rate": 7.857929605944534e-06, + "loss": 3.2234, + "step": 661500 + }, + { + "epoch": 5.42, + "learning_rate": 7.856285653224613e-06, + "loss": 3.22, + "step": 662000 + }, + { + "epoch": 5.43, + "learning_rate": 7.854641700504694e-06, + "loss": 3.2187, + "step": 662500 + }, + { + "epoch": 5.43, + "learning_rate": 7.852997747784775e-06, + "loss": 3.2166, + "step": 663000 + }, + { + "epoch": 5.44, + "learning_rate": 7.851353795064854e-06, + "loss": 3.218, + "step": 663500 + }, + { + "epoch": 5.44, + "learning_rate": 7.849709842344935e-06, + "loss": 3.2183, + "step": 664000 + }, + { + "epoch": 5.44, + "learning_rate": 7.848065889625014e-06, + "loss": 3.2065, + "step": 664500 + }, + { + "epoch": 5.45, + "learning_rate": 7.846421936905095e-06, + "loss": 3.2123, + "step": 665000 + }, + { + "epoch": 5.45, + "learning_rate": 7.844777984185176e-06, + "loss": 3.2178, + "step": 665500 + }, + { + "epoch": 5.46, + "learning_rate": 7.843134031465256e-06, + "loss": 3.215, + "step": 666000 + }, + { + "epoch": 5.46, + "learning_rate": 7.841490078745336e-06, + "loss": 3.212, + "step": 666500 + }, + { + "epoch": 5.46, + "learning_rate": 7.839846126025416e-06, + "loss": 3.2089, + "step": 667000 + }, + { + "epoch": 5.47, + "learning_rate": 7.838202173305495e-06, + "loss": 3.2153, + "step": 667500 + }, + { + "epoch": 5.47, + "learning_rate": 7.836558220585576e-06, + "loss": 3.2125, + "step": 668000 + }, + { + "epoch": 5.48, + "learning_rate": 7.834914267865657e-06, + "loss": 3.2162, + "step": 668500 + }, + { + "epoch": 5.48, + "learning_rate": 7.833270315145736e-06, + "loss": 3.2106, + "step": 669000 + }, + { + "epoch": 5.49, + "learning_rate": 7.831626362425817e-06, + "loss": 3.2122, + "step": 669500 + }, + { + "epoch": 5.49, + "learning_rate": 7.829982409705898e-06, + "loss": 3.2046, + "step": 670000 + }, + { + "epoch": 5.49, + "learning_rate": 7.828338456985978e-06, + "loss": 3.202, + "step": 670500 + }, + { + "epoch": 5.5, + "learning_rate": 7.826694504266058e-06, + "loss": 3.2076, + "step": 671000 + }, + { + "epoch": 5.5, + "learning_rate": 7.825050551546138e-06, + "loss": 3.2012, + "step": 671500 + }, + { + "epoch": 5.51, + "learning_rate": 7.823406598826219e-06, + "loss": 3.2134, + "step": 672000 + }, + { + "epoch": 5.51, + "learning_rate": 7.8217626461063e-06, + "loss": 3.2088, + "step": 672500 + }, + { + "epoch": 5.51, + "learning_rate": 7.820118693386379e-06, + "loss": 3.2135, + "step": 673000 + }, + { + "epoch": 5.52, + "learning_rate": 7.81847474066646e-06, + "loss": 3.206, + "step": 673500 + }, + { + "epoch": 5.52, + "learning_rate": 7.81683078794654e-06, + "loss": 3.2101, + "step": 674000 + }, + { + "epoch": 5.53, + "learning_rate": 7.815186835226619e-06, + "loss": 3.2208, + "step": 674500 + }, + { + "epoch": 5.53, + "learning_rate": 7.8135428825067e-06, + "loss": 3.2144, + "step": 675000 + }, + { + "epoch": 5.53, + "learning_rate": 7.81189892978678e-06, + "loss": 3.2144, + "step": 675500 + }, + { + "epoch": 5.54, + "learning_rate": 7.81025497706686e-06, + "loss": 3.2076, + "step": 676000 + }, + { + "epoch": 5.54, + "learning_rate": 7.80861102434694e-06, + "loss": 3.2067, + "step": 676500 + }, + { + "epoch": 5.55, + "learning_rate": 7.80696707162702e-06, + "loss": 3.2107, + "step": 677000 + }, + { + "epoch": 5.55, + "learning_rate": 7.805323118907101e-06, + "loss": 3.2109, + "step": 677500 + }, + { + "epoch": 5.55, + "learning_rate": 7.803679166187182e-06, + "loss": 3.2116, + "step": 678000 + }, + { + "epoch": 5.56, + "learning_rate": 7.802035213467261e-06, + "loss": 3.205, + "step": 678500 + }, + { + "epoch": 5.56, + "learning_rate": 7.800391260747342e-06, + "loss": 3.2019, + "step": 679000 + }, + { + "epoch": 5.57, + "learning_rate": 7.798747308027421e-06, + "loss": 3.2089, + "step": 679500 + }, + { + "epoch": 5.57, + "learning_rate": 7.7971033553075e-06, + "loss": 3.2096, + "step": 680000 + }, + { + "epoch": 5.58, + "learning_rate": 7.795459402587582e-06, + "loss": 3.2131, + "step": 680500 + }, + { + "epoch": 5.58, + "learning_rate": 7.793815449867663e-06, + "loss": 3.2079, + "step": 681000 + }, + { + "epoch": 5.58, + "learning_rate": 7.792171497147742e-06, + "loss": 3.2019, + "step": 681500 + }, + { + "epoch": 5.59, + "learning_rate": 7.790527544427823e-06, + "loss": 3.2092, + "step": 682000 + }, + { + "epoch": 5.59, + "learning_rate": 7.788883591707904e-06, + "loss": 3.2066, + "step": 682500 + }, + { + "epoch": 5.6, + "learning_rate": 7.787239638987983e-06, + "loss": 3.1992, + "step": 683000 + }, + { + "epoch": 5.6, + "learning_rate": 7.785595686268064e-06, + "loss": 3.2042, + "step": 683500 + }, + { + "epoch": 5.6, + "learning_rate": 7.783951733548143e-06, + "loss": 3.1997, + "step": 684000 + }, + { + "epoch": 5.61, + "learning_rate": 7.782307780828224e-06, + "loss": 3.2064, + "step": 684500 + }, + { + "epoch": 5.61, + "learning_rate": 7.780663828108305e-06, + "loss": 3.2102, + "step": 685000 + }, + { + "epoch": 5.62, + "learning_rate": 7.779019875388385e-06, + "loss": 3.202, + "step": 685500 + }, + { + "epoch": 5.62, + "learning_rate": 7.777375922668464e-06, + "loss": 3.2049, + "step": 686000 + }, + { + "epoch": 5.62, + "learning_rate": 7.775731969948545e-06, + "loss": 3.212, + "step": 686500 + }, + { + "epoch": 5.63, + "learning_rate": 7.774088017228624e-06, + "loss": 3.2008, + "step": 687000 + }, + { + "epoch": 5.63, + "learning_rate": 7.772444064508705e-06, + "loss": 3.2015, + "step": 687500 + }, + { + "epoch": 5.64, + "learning_rate": 7.770800111788786e-06, + "loss": 3.1986, + "step": 688000 + }, + { + "epoch": 5.64, + "learning_rate": 7.769156159068865e-06, + "loss": 3.2028, + "step": 688500 + }, + { + "epoch": 5.64, + "learning_rate": 7.767512206348946e-06, + "loss": 3.21, + "step": 689000 + }, + { + "epoch": 5.65, + "learning_rate": 7.765868253629026e-06, + "loss": 3.2104, + "step": 689500 + }, + { + "epoch": 5.65, + "learning_rate": 7.764224300909107e-06, + "loss": 3.2011, + "step": 690000 + }, + { + "epoch": 5.65, + "eval_accuracy": 0.4590029752907897, + "eval_loss": 3.068504810333252, + "eval_runtime": 404.8644, + "eval_samples_per_second": 761.628, + "eval_steps_per_second": 15.87, + "step": 690000 + }, + { + "epoch": 5.66, + "learning_rate": 7.762580348189188e-06, + "loss": 3.2015, + "step": 690500 + }, + { + "epoch": 5.66, + "learning_rate": 7.760936395469267e-06, + "loss": 3.2047, + "step": 691000 + }, + { + "epoch": 5.67, + "learning_rate": 7.759292442749348e-06, + "loss": 3.2034, + "step": 691500 + }, + { + "epoch": 5.67, + "learning_rate": 7.757648490029427e-06, + "loss": 3.1976, + "step": 692000 + }, + { + "epoch": 5.67, + "learning_rate": 7.756004537309506e-06, + "loss": 3.2029, + "step": 692500 + }, + { + "epoch": 5.68, + "learning_rate": 7.754360584589587e-06, + "loss": 3.1993, + "step": 693000 + }, + { + "epoch": 5.68, + "learning_rate": 7.752716631869668e-06, + "loss": 3.2092, + "step": 693500 + }, + { + "epoch": 5.69, + "learning_rate": 7.751072679149748e-06, + "loss": 3.2087, + "step": 694000 + }, + { + "epoch": 5.69, + "learning_rate": 7.749428726429829e-06, + "loss": 3.1983, + "step": 694500 + }, + { + "epoch": 5.69, + "learning_rate": 7.747784773709908e-06, + "loss": 3.209, + "step": 695000 + }, + { + "epoch": 5.7, + "learning_rate": 7.746140820989989e-06, + "loss": 3.1986, + "step": 695500 + }, + { + "epoch": 5.7, + "learning_rate": 7.74449686827007e-06, + "loss": 3.2066, + "step": 696000 + }, + { + "epoch": 5.71, + "learning_rate": 7.74285291555015e-06, + "loss": 3.2, + "step": 696500 + }, + { + "epoch": 5.71, + "learning_rate": 7.74120896283023e-06, + "loss": 3.1934, + "step": 697000 + }, + { + "epoch": 5.71, + "learning_rate": 7.739565010110311e-06, + "loss": 3.1966, + "step": 697500 + }, + { + "epoch": 5.72, + "learning_rate": 7.73792105739039e-06, + "loss": 3.1951, + "step": 698000 + }, + { + "epoch": 5.72, + "learning_rate": 7.73627710467047e-06, + "loss": 3.2011, + "step": 698500 + }, + { + "epoch": 5.73, + "learning_rate": 7.73463315195055e-06, + "loss": 3.2043, + "step": 699000 + }, + { + "epoch": 5.73, + "learning_rate": 7.73298919923063e-06, + "loss": 3.1936, + "step": 699500 + }, + { + "epoch": 5.73, + "learning_rate": 7.731345246510711e-06, + "loss": 3.1944, + "step": 700000 + }, + { + "epoch": 5.74, + "learning_rate": 7.729701293790792e-06, + "loss": 3.1976, + "step": 700500 + }, + { + "epoch": 5.74, + "learning_rate": 7.728057341070871e-06, + "loss": 3.1919, + "step": 701000 + }, + { + "epoch": 5.75, + "learning_rate": 7.726413388350952e-06, + "loss": 3.2013, + "step": 701500 + }, + { + "epoch": 5.75, + "learning_rate": 7.724769435631031e-06, + "loss": 3.1973, + "step": 702000 + }, + { + "epoch": 5.76, + "learning_rate": 7.723125482911112e-06, + "loss": 3.1948, + "step": 702500 + }, + { + "epoch": 5.76, + "learning_rate": 7.721481530191193e-06, + "loss": 3.1967, + "step": 703000 + }, + { + "epoch": 5.76, + "learning_rate": 7.719837577471273e-06, + "loss": 3.1862, + "step": 703500 + }, + { + "epoch": 5.77, + "learning_rate": 7.718193624751352e-06, + "loss": 3.1962, + "step": 704000 + }, + { + "epoch": 5.77, + "learning_rate": 7.716549672031433e-06, + "loss": 3.1963, + "step": 704500 + }, + { + "epoch": 5.78, + "learning_rate": 7.714905719311512e-06, + "loss": 3.1839, + "step": 705000 + }, + { + "epoch": 5.78, + "learning_rate": 7.713261766591593e-06, + "loss": 3.1969, + "step": 705500 + }, + { + "epoch": 5.78, + "learning_rate": 7.711617813871674e-06, + "loss": 3.1906, + "step": 706000 + }, + { + "epoch": 5.79, + "learning_rate": 7.709973861151753e-06, + "loss": 3.1996, + "step": 706500 + }, + { + "epoch": 5.79, + "learning_rate": 7.708329908431834e-06, + "loss": 3.1966, + "step": 707000 + }, + { + "epoch": 5.8, + "learning_rate": 7.706685955711914e-06, + "loss": 3.1923, + "step": 707500 + }, + { + "epoch": 5.8, + "learning_rate": 7.705042002991995e-06, + "loss": 3.1893, + "step": 708000 + }, + { + "epoch": 5.8, + "learning_rate": 7.703398050272076e-06, + "loss": 3.1887, + "step": 708500 + }, + { + "epoch": 5.81, + "learning_rate": 7.701754097552155e-06, + "loss": 3.1983, + "step": 709000 + }, + { + "epoch": 5.81, + "learning_rate": 7.700110144832236e-06, + "loss": 3.1923, + "step": 709500 + }, + { + "epoch": 5.82, + "learning_rate": 7.698466192112315e-06, + "loss": 3.1964, + "step": 710000 + }, + { + "epoch": 5.82, + "learning_rate": 7.696822239392396e-06, + "loss": 3.1965, + "step": 710500 + }, + { + "epoch": 5.83, + "learning_rate": 7.695178286672475e-06, + "loss": 3.1941, + "step": 711000 + }, + { + "epoch": 5.83, + "learning_rate": 7.693534333952556e-06, + "loss": 3.1893, + "step": 711500 + }, + { + "epoch": 5.83, + "learning_rate": 7.691890381232636e-06, + "loss": 3.1945, + "step": 712000 + }, + { + "epoch": 5.84, + "learning_rate": 7.690246428512717e-06, + "loss": 3.1931, + "step": 712500 + }, + { + "epoch": 5.84, + "learning_rate": 7.688602475792798e-06, + "loss": 3.1853, + "step": 713000 + }, + { + "epoch": 5.85, + "learning_rate": 7.686958523072877e-06, + "loss": 3.1902, + "step": 713500 + }, + { + "epoch": 5.85, + "learning_rate": 7.685314570352958e-06, + "loss": 3.2006, + "step": 714000 + }, + { + "epoch": 5.85, + "learning_rate": 7.683670617633037e-06, + "loss": 3.195, + "step": 714500 + }, + { + "epoch": 5.86, + "learning_rate": 7.682026664913118e-06, + "loss": 3.1975, + "step": 715000 + }, + { + "epoch": 5.86, + "learning_rate": 7.680382712193199e-06, + "loss": 3.1905, + "step": 715500 + }, + { + "epoch": 5.87, + "learning_rate": 7.678738759473278e-06, + "loss": 3.1872, + "step": 716000 + }, + { + "epoch": 5.87, + "learning_rate": 7.677094806753358e-06, + "loss": 3.1891, + "step": 716500 + }, + { + "epoch": 5.87, + "learning_rate": 7.675450854033439e-06, + "loss": 3.1857, + "step": 717000 + }, + { + "epoch": 5.88, + "learning_rate": 7.673806901313518e-06, + "loss": 3.1894, + "step": 717500 + }, + { + "epoch": 5.88, + "learning_rate": 7.672162948593599e-06, + "loss": 3.1856, + "step": 718000 + }, + { + "epoch": 5.89, + "learning_rate": 7.67051899587368e-06, + "loss": 3.1828, + "step": 718500 + }, + { + "epoch": 5.89, + "learning_rate": 7.668875043153759e-06, + "loss": 3.1812, + "step": 719000 + }, + { + "epoch": 5.89, + "learning_rate": 7.66723109043384e-06, + "loss": 3.1908, + "step": 719500 + }, + { + "epoch": 5.9, + "learning_rate": 7.66558713771392e-06, + "loss": 3.1898, + "step": 720000 + }, + { + "epoch": 5.9, + "eval_accuracy": 0.46191997629259224, + "eval_loss": 3.0464377403259277, + "eval_runtime": 413.3901, + "eval_samples_per_second": 745.92, + "eval_steps_per_second": 15.542, + "step": 720000 + }, + { + "epoch": 5.9, + "learning_rate": 7.663943184994e-06, + "loss": 3.1823, + "step": 720500 + }, + { + "epoch": 5.91, + "learning_rate": 7.662299232274081e-06, + "loss": 3.1837, + "step": 721000 + }, + { + "epoch": 5.91, + "learning_rate": 7.66065527955416e-06, + "loss": 3.1796, + "step": 721500 + }, + { + "epoch": 5.92, + "learning_rate": 7.659011326834242e-06, + "loss": 3.188, + "step": 722000 + }, + { + "epoch": 5.92, + "learning_rate": 7.65736737411432e-06, + "loss": 3.1883, + "step": 722500 + }, + { + "epoch": 5.92, + "learning_rate": 7.6557234213944e-06, + "loss": 3.1769, + "step": 723000 + }, + { + "epoch": 5.93, + "learning_rate": 7.654079468674481e-06, + "loss": 3.1887, + "step": 723500 + }, + { + "epoch": 5.93, + "learning_rate": 7.652435515954562e-06, + "loss": 3.1806, + "step": 724000 + }, + { + "epoch": 5.94, + "learning_rate": 7.650791563234641e-06, + "loss": 3.1816, + "step": 724500 + }, + { + "epoch": 5.94, + "learning_rate": 7.649147610514722e-06, + "loss": 3.1748, + "step": 725000 + }, + { + "epoch": 5.94, + "learning_rate": 7.647503657794803e-06, + "loss": 3.1888, + "step": 725500 + }, + { + "epoch": 5.95, + "learning_rate": 7.645859705074883e-06, + "loss": 3.1823, + "step": 726000 + }, + { + "epoch": 5.95, + "learning_rate": 7.644215752354964e-06, + "loss": 3.1763, + "step": 726500 + }, + { + "epoch": 5.96, + "learning_rate": 7.642571799635043e-06, + "loss": 3.1843, + "step": 727000 + }, + { + "epoch": 5.96, + "learning_rate": 7.640927846915124e-06, + "loss": 3.1856, + "step": 727500 + }, + { + "epoch": 5.96, + "learning_rate": 7.639283894195205e-06, + "loss": 3.1812, + "step": 728000 + }, + { + "epoch": 5.97, + "learning_rate": 7.637639941475284e-06, + "loss": 3.176, + "step": 728500 + }, + { + "epoch": 5.97, + "learning_rate": 7.635995988755363e-06, + "loss": 3.1842, + "step": 729000 + }, + { + "epoch": 5.98, + "learning_rate": 7.634352036035444e-06, + "loss": 3.1775, + "step": 729500 + }, + { + "epoch": 5.98, + "learning_rate": 7.632708083315524e-06, + "loss": 3.1803, + "step": 730000 + }, + { + "epoch": 5.98, + "learning_rate": 7.631064130595605e-06, + "loss": 3.1818, + "step": 730500 + }, + { + "epoch": 5.99, + "learning_rate": 7.629420177875686e-06, + "loss": 3.1754, + "step": 731000 + }, + { + "epoch": 5.99, + "learning_rate": 7.627776225155765e-06, + "loss": 3.1827, + "step": 731500 + }, + { + "epoch": 6.0, + "learning_rate": 7.626132272435846e-06, + "loss": 3.1745, + "step": 732000 + }, + { + "epoch": 6.0, + "learning_rate": 7.624488319715926e-06, + "loss": 3.1769, + "step": 732500 + }, + { + "epoch": 6.01, + "learning_rate": 7.622844366996006e-06, + "loss": 3.1717, + "step": 733000 + }, + { + "epoch": 6.01, + "learning_rate": 7.621200414276086e-06, + "loss": 3.1804, + "step": 733500 + }, + { + "epoch": 6.01, + "learning_rate": 7.619556461556167e-06, + "loss": 3.1726, + "step": 734000 + }, + { + "epoch": 6.02, + "learning_rate": 7.617912508836246e-06, + "loss": 3.1754, + "step": 734500 + }, + { + "epoch": 6.02, + "learning_rate": 7.6162685561163265e-06, + "loss": 3.177, + "step": 735000 + }, + { + "epoch": 6.03, + "learning_rate": 7.614624603396407e-06, + "loss": 3.1697, + "step": 735500 + }, + { + "epoch": 6.03, + "learning_rate": 7.612980650676487e-06, + "loss": 3.1712, + "step": 736000 + }, + { + "epoch": 6.03, + "learning_rate": 7.611336697956567e-06, + "loss": 3.1734, + "step": 736500 + }, + { + "epoch": 6.04, + "learning_rate": 7.609692745236648e-06, + "loss": 3.1702, + "step": 737000 + }, + { + "epoch": 6.04, + "learning_rate": 7.608048792516728e-06, + "loss": 3.1723, + "step": 737500 + }, + { + "epoch": 6.05, + "learning_rate": 7.606404839796808e-06, + "loss": 3.1815, + "step": 738000 + }, + { + "epoch": 6.05, + "learning_rate": 7.604760887076888e-06, + "loss": 3.1775, + "step": 738500 + }, + { + "epoch": 6.05, + "learning_rate": 7.603116934356969e-06, + "loss": 3.1654, + "step": 739000 + }, + { + "epoch": 6.06, + "learning_rate": 7.601472981637049e-06, + "loss": 3.1639, + "step": 739500 + }, + { + "epoch": 6.06, + "learning_rate": 7.5998290289171295e-06, + "loss": 3.1757, + "step": 740000 + }, + { + "epoch": 6.07, + "learning_rate": 7.598185076197209e-06, + "loss": 3.176, + "step": 740500 + }, + { + "epoch": 6.07, + "learning_rate": 7.596541123477289e-06, + "loss": 3.1781, + "step": 741000 + }, + { + "epoch": 6.07, + "learning_rate": 7.594897170757369e-06, + "loss": 3.1671, + "step": 741500 + }, + { + "epoch": 6.08, + "learning_rate": 7.59325321803745e-06, + "loss": 3.1691, + "step": 742000 + }, + { + "epoch": 6.08, + "learning_rate": 7.59160926531753e-06, + "loss": 3.1701, + "step": 742500 + }, + { + "epoch": 6.09, + "learning_rate": 7.58996531259761e-06, + "loss": 3.1767, + "step": 743000 + }, + { + "epoch": 6.09, + "learning_rate": 7.58832135987769e-06, + "loss": 3.1719, + "step": 743500 + }, + { + "epoch": 6.1, + "learning_rate": 7.5866774071577705e-06, + "loss": 3.1576, + "step": 744000 + }, + { + "epoch": 6.1, + "learning_rate": 7.5850334544378515e-06, + "loss": 3.167, + "step": 744500 + }, + { + "epoch": 6.1, + "learning_rate": 7.583389501717932e-06, + "loss": 3.172, + "step": 745000 + }, + { + "epoch": 6.11, + "learning_rate": 7.581745548998012e-06, + "loss": 3.17, + "step": 745500 + }, + { + "epoch": 6.11, + "learning_rate": 7.580101596278092e-06, + "loss": 3.1622, + "step": 746000 + }, + { + "epoch": 6.12, + "learning_rate": 7.578457643558171e-06, + "loss": 3.1708, + "step": 746500 + }, + { + "epoch": 6.12, + "learning_rate": 7.576813690838252e-06, + "loss": 3.1658, + "step": 747000 + }, + { + "epoch": 6.12, + "learning_rate": 7.575169738118332e-06, + "loss": 3.1665, + "step": 747500 + }, + { + "epoch": 6.13, + "learning_rate": 7.573525785398412e-06, + "loss": 3.157, + "step": 748000 + }, + { + "epoch": 6.13, + "learning_rate": 7.5718818326784925e-06, + "loss": 3.164, + "step": 748500 + }, + { + "epoch": 6.14, + "learning_rate": 7.570237879958573e-06, + "loss": 3.1626, + "step": 749000 + }, + { + "epoch": 6.14, + "learning_rate": 7.568593927238654e-06, + "loss": 3.162, + "step": 749500 + }, + { + "epoch": 6.14, + "learning_rate": 7.566949974518734e-06, + "loss": 3.1651, + "step": 750000 + }, + { + "epoch": 6.14, + "eval_accuracy": 0.4657812440113609, + "eval_loss": 3.022564172744751, + "eval_runtime": 408.1772, + "eval_samples_per_second": 755.446, + "eval_steps_per_second": 15.741, + "step": 750000 + }, + { + "epoch": 6.15, + "learning_rate": 7.565306021798814e-06, + "loss": 3.1641, + "step": 750500 + }, + { + "epoch": 6.15, + "learning_rate": 7.563662069078894e-06, + "loss": 3.153, + "step": 751000 + }, + { + "epoch": 6.16, + "learning_rate": 7.562018116358975e-06, + "loss": 3.1609, + "step": 751500 + }, + { + "epoch": 6.16, + "learning_rate": 7.560374163639055e-06, + "loss": 3.1574, + "step": 752000 + }, + { + "epoch": 6.17, + "learning_rate": 7.558730210919134e-06, + "loss": 3.1566, + "step": 752500 + }, + { + "epoch": 6.17, + "learning_rate": 7.5570862581992145e-06, + "loss": 3.162, + "step": 753000 + }, + { + "epoch": 6.17, + "learning_rate": 7.555442305479295e-06, + "loss": 3.1615, + "step": 753500 + }, + { + "epoch": 6.18, + "learning_rate": 7.553798352759375e-06, + "loss": 3.1636, + "step": 754000 + }, + { + "epoch": 6.18, + "learning_rate": 7.552154400039456e-06, + "loss": 3.1591, + "step": 754500 + }, + { + "epoch": 6.19, + "learning_rate": 7.550510447319536e-06, + "loss": 3.1607, + "step": 755000 + }, + { + "epoch": 6.19, + "learning_rate": 7.548866494599616e-06, + "loss": 3.1572, + "step": 755500 + }, + { + "epoch": 6.19, + "learning_rate": 7.547222541879696e-06, + "loss": 3.1628, + "step": 756000 + }, + { + "epoch": 6.2, + "learning_rate": 7.545578589159776e-06, + "loss": 3.1555, + "step": 756500 + }, + { + "epoch": 6.2, + "learning_rate": 7.543934636439857e-06, + "loss": 3.1581, + "step": 757000 + }, + { + "epoch": 6.21, + "learning_rate": 7.542290683719937e-06, + "loss": 3.1646, + "step": 757500 + }, + { + "epoch": 6.21, + "learning_rate": 7.5406467310000174e-06, + "loss": 3.1626, + "step": 758000 + }, + { + "epoch": 6.21, + "learning_rate": 7.539002778280097e-06, + "loss": 3.1525, + "step": 758500 + }, + { + "epoch": 6.22, + "learning_rate": 7.537358825560177e-06, + "loss": 3.1567, + "step": 759000 + }, + { + "epoch": 6.22, + "learning_rate": 7.535714872840257e-06, + "loss": 3.1599, + "step": 759500 + }, + { + "epoch": 6.23, + "learning_rate": 7.534070920120338e-06, + "loss": 3.1513, + "step": 760000 + }, + { + "epoch": 6.23, + "learning_rate": 7.532426967400418e-06, + "loss": 3.1621, + "step": 760500 + }, + { + "epoch": 6.23, + "learning_rate": 7.530783014680498e-06, + "loss": 3.1586, + "step": 761000 + }, + { + "epoch": 6.24, + "learning_rate": 7.529139061960578e-06, + "loss": 3.1493, + "step": 761500 + }, + { + "epoch": 6.24, + "learning_rate": 7.527495109240659e-06, + "loss": 3.1613, + "step": 762000 + }, + { + "epoch": 6.25, + "learning_rate": 7.525851156520739e-06, + "loss": 3.1542, + "step": 762500 + }, + { + "epoch": 6.25, + "learning_rate": 7.5242072038008195e-06, + "loss": 3.1486, + "step": 763000 + }, + { + "epoch": 6.26, + "learning_rate": 7.5225632510809e-06, + "loss": 3.1542, + "step": 763500 + }, + { + "epoch": 6.26, + "learning_rate": 7.520919298360981e-06, + "loss": 3.1495, + "step": 764000 + }, + { + "epoch": 6.26, + "learning_rate": 7.519275345641059e-06, + "loss": 3.1548, + "step": 764500 + }, + { + "epoch": 6.27, + "learning_rate": 7.51763139292114e-06, + "loss": 3.1525, + "step": 765000 + }, + { + "epoch": 6.27, + "learning_rate": 7.51598744020122e-06, + "loss": 3.1562, + "step": 765500 + }, + { + "epoch": 6.28, + "learning_rate": 7.5143434874813e-06, + "loss": 3.163, + "step": 766000 + }, + { + "epoch": 6.28, + "learning_rate": 7.5126995347613804e-06, + "loss": 3.1509, + "step": 766500 + }, + { + "epoch": 6.28, + "learning_rate": 7.511055582041461e-06, + "loss": 3.1502, + "step": 767000 + }, + { + "epoch": 6.29, + "learning_rate": 7.5094116293215415e-06, + "loss": 3.1499, + "step": 767500 + }, + { + "epoch": 6.29, + "learning_rate": 7.507767676601622e-06, + "loss": 3.1461, + "step": 768000 + }, + { + "epoch": 6.3, + "learning_rate": 7.506123723881702e-06, + "loss": 3.1437, + "step": 768500 + }, + { + "epoch": 6.3, + "learning_rate": 7.504479771161782e-06, + "loss": 3.1509, + "step": 769000 + }, + { + "epoch": 6.3, + "learning_rate": 7.502835818441863e-06, + "loss": 3.1525, + "step": 769500 + }, + { + "epoch": 6.31, + "learning_rate": 7.501191865721943e-06, + "loss": 3.1488, + "step": 770000 + }, + { + "epoch": 6.31, + "learning_rate": 7.499547913002023e-06, + "loss": 3.1541, + "step": 770500 + }, + { + "epoch": 6.32, + "learning_rate": 7.497903960282102e-06, + "loss": 3.147, + "step": 771000 + }, + { + "epoch": 6.32, + "learning_rate": 7.4962600075621825e-06, + "loss": 3.1472, + "step": 771500 + }, + { + "epoch": 6.32, + "learning_rate": 7.494616054842263e-06, + "loss": 3.15, + "step": 772000 + }, + { + "epoch": 6.33, + "learning_rate": 7.492972102122344e-06, + "loss": 3.1486, + "step": 772500 + }, + { + "epoch": 6.33, + "learning_rate": 7.491328149402424e-06, + "loss": 3.1611, + "step": 773000 + }, + { + "epoch": 6.34, + "learning_rate": 7.489684196682504e-06, + "loss": 3.1517, + "step": 773500 + }, + { + "epoch": 6.34, + "learning_rate": 7.488040243962584e-06, + "loss": 3.1473, + "step": 774000 + }, + { + "epoch": 6.35, + "learning_rate": 7.486396291242665e-06, + "loss": 3.1438, + "step": 774500 + }, + { + "epoch": 6.35, + "learning_rate": 7.484752338522745e-06, + "loss": 3.137, + "step": 775000 + }, + { + "epoch": 6.35, + "learning_rate": 7.483108385802825e-06, + "loss": 3.15, + "step": 775500 + }, + { + "epoch": 6.36, + "learning_rate": 7.481464433082905e-06, + "loss": 3.1464, + "step": 776000 + }, + { + "epoch": 6.36, + "learning_rate": 7.4798204803629855e-06, + "loss": 3.1438, + "step": 776500 + }, + { + "epoch": 6.37, + "learning_rate": 7.478176527643065e-06, + "loss": 3.1437, + "step": 777000 + }, + { + "epoch": 6.37, + "learning_rate": 7.476532574923146e-06, + "loss": 3.1401, + "step": 777500 + }, + { + "epoch": 6.37, + "learning_rate": 7.474888622203226e-06, + "loss": 3.144, + "step": 778000 + }, + { + "epoch": 6.38, + "learning_rate": 7.473244669483306e-06, + "loss": 3.1513, + "step": 778500 + }, + { + "epoch": 6.38, + "learning_rate": 7.471600716763386e-06, + "loss": 3.1559, + "step": 779000 + }, + { + "epoch": 6.39, + "learning_rate": 7.469956764043467e-06, + "loss": 3.1461, + "step": 779500 + }, + { + "epoch": 6.39, + "learning_rate": 7.468312811323547e-06, + "loss": 3.1477, + "step": 780000 + }, + { + "epoch": 6.39, + "eval_accuracy": 0.4689297446428191, + "eval_loss": 3.002525568008423, + "eval_runtime": 411.5642, + "eval_samples_per_second": 749.229, + "eval_steps_per_second": 15.611, + "step": 780000 + }, + { + "epoch": 6.39, + "learning_rate": 7.466668858603627e-06, + "loss": 3.1411, + "step": 780500 + }, + { + "epoch": 6.4, + "learning_rate": 7.4650249058837075e-06, + "loss": 3.1422, + "step": 781000 + }, + { + "epoch": 6.4, + "learning_rate": 7.463380953163788e-06, + "loss": 3.1464, + "step": 781500 + }, + { + "epoch": 6.41, + "learning_rate": 7.461737000443869e-06, + "loss": 3.1429, + "step": 782000 + }, + { + "epoch": 6.41, + "learning_rate": 7.460093047723949e-06, + "loss": 3.1451, + "step": 782500 + }, + { + "epoch": 6.41, + "learning_rate": 7.458449095004028e-06, + "loss": 3.1436, + "step": 783000 + }, + { + "epoch": 6.42, + "learning_rate": 7.456805142284108e-06, + "loss": 3.1398, + "step": 783500 + }, + { + "epoch": 6.42, + "learning_rate": 7.455161189564188e-06, + "loss": 3.1447, + "step": 784000 + }, + { + "epoch": 6.43, + "learning_rate": 7.453517236844268e-06, + "loss": 3.1414, + "step": 784500 + }, + { + "epoch": 6.43, + "learning_rate": 7.451873284124349e-06, + "loss": 3.1408, + "step": 785000 + }, + { + "epoch": 6.44, + "learning_rate": 7.4502293314044295e-06, + "loss": 3.1384, + "step": 785500 + }, + { + "epoch": 6.44, + "learning_rate": 7.44858537868451e-06, + "loss": 3.1389, + "step": 786000 + }, + { + "epoch": 6.44, + "learning_rate": 7.44694142596459e-06, + "loss": 3.149, + "step": 786500 + }, + { + "epoch": 6.45, + "learning_rate": 7.445297473244671e-06, + "loss": 3.1404, + "step": 787000 + }, + { + "epoch": 6.45, + "learning_rate": 7.443653520524751e-06, + "loss": 3.1353, + "step": 787500 + }, + { + "epoch": 6.46, + "learning_rate": 7.442009567804831e-06, + "loss": 3.134, + "step": 788000 + }, + { + "epoch": 6.46, + "learning_rate": 7.440365615084911e-06, + "loss": 3.1423, + "step": 788500 + }, + { + "epoch": 6.46, + "learning_rate": 7.43872166236499e-06, + "loss": 3.14, + "step": 789000 + }, + { + "epoch": 6.47, + "learning_rate": 7.4370777096450705e-06, + "loss": 3.1309, + "step": 789500 + }, + { + "epoch": 6.47, + "learning_rate": 7.4354337569251515e-06, + "loss": 3.1384, + "step": 790000 + }, + { + "epoch": 6.48, + "learning_rate": 7.433789804205232e-06, + "loss": 3.1371, + "step": 790500 + }, + { + "epoch": 6.48, + "learning_rate": 7.432145851485312e-06, + "loss": 3.136, + "step": 791000 + }, + { + "epoch": 6.48, + "learning_rate": 7.430501898765392e-06, + "loss": 3.1433, + "step": 791500 + }, + { + "epoch": 6.49, + "learning_rate": 7.428857946045473e-06, + "loss": 3.1398, + "step": 792000 + }, + { + "epoch": 6.49, + "learning_rate": 7.427213993325553e-06, + "loss": 3.1428, + "step": 792500 + }, + { + "epoch": 6.5, + "learning_rate": 7.425570040605633e-06, + "loss": 3.1316, + "step": 793000 + }, + { + "epoch": 6.5, + "learning_rate": 7.423926087885713e-06, + "loss": 3.1392, + "step": 793500 + }, + { + "epoch": 6.51, + "learning_rate": 7.422282135165793e-06, + "loss": 3.1288, + "step": 794000 + }, + { + "epoch": 6.51, + "learning_rate": 7.420638182445874e-06, + "loss": 3.1354, + "step": 794500 + }, + { + "epoch": 6.51, + "learning_rate": 7.4189942297259536e-06, + "loss": 3.1339, + "step": 795000 + }, + { + "epoch": 6.52, + "learning_rate": 7.417350277006034e-06, + "loss": 3.1419, + "step": 795500 + }, + { + "epoch": 6.52, + "learning_rate": 7.415706324286114e-06, + "loss": 3.1363, + "step": 796000 + }, + { + "epoch": 6.53, + "learning_rate": 7.414062371566194e-06, + "loss": 3.134, + "step": 796500 + }, + { + "epoch": 6.53, + "learning_rate": 7.412418418846274e-06, + "loss": 3.13, + "step": 797000 + }, + { + "epoch": 6.53, + "learning_rate": 7.410774466126355e-06, + "loss": 3.1382, + "step": 797500 + }, + { + "epoch": 6.54, + "learning_rate": 7.409130513406435e-06, + "loss": 3.1341, + "step": 798000 + }, + { + "epoch": 6.54, + "learning_rate": 7.407486560686515e-06, + "loss": 3.1306, + "step": 798500 + }, + { + "epoch": 6.55, + "learning_rate": 7.4058426079665954e-06, + "loss": 3.134, + "step": 799000 + }, + { + "epoch": 6.55, + "learning_rate": 7.404198655246676e-06, + "loss": 3.1329, + "step": 799500 + }, + { + "epoch": 6.55, + "learning_rate": 7.4025547025267565e-06, + "loss": 3.1382, + "step": 800000 + }, + { + "epoch": 6.56, + "learning_rate": 7.400910749806837e-06, + "loss": 3.1293, + "step": 800500 + }, + { + "epoch": 6.56, + "learning_rate": 7.399266797086916e-06, + "loss": 3.135, + "step": 801000 + }, + { + "epoch": 6.57, + "learning_rate": 7.397622844366996e-06, + "loss": 3.1407, + "step": 801500 + }, + { + "epoch": 6.57, + "learning_rate": 7.395978891647076e-06, + "loss": 3.1354, + "step": 802000 + }, + { + "epoch": 6.57, + "learning_rate": 7.394334938927157e-06, + "loss": 3.1371, + "step": 802500 + }, + { + "epoch": 6.58, + "learning_rate": 7.392690986207237e-06, + "loss": 3.1362, + "step": 803000 + }, + { + "epoch": 6.58, + "learning_rate": 7.391047033487317e-06, + "loss": 3.129, + "step": 803500 + }, + { + "epoch": 6.59, + "learning_rate": 7.3894030807673975e-06, + "loss": 3.1378, + "step": 804000 + }, + { + "epoch": 6.59, + "learning_rate": 7.387759128047478e-06, + "loss": 3.125, + "step": 804500 + }, + { + "epoch": 6.6, + "learning_rate": 7.386115175327559e-06, + "loss": 3.1285, + "step": 805000 + }, + { + "epoch": 6.6, + "learning_rate": 7.384471222607639e-06, + "loss": 3.1344, + "step": 805500 + }, + { + "epoch": 6.6, + "learning_rate": 7.382827269887719e-06, + "loss": 3.1241, + "step": 806000 + }, + { + "epoch": 6.61, + "learning_rate": 7.381183317167799e-06, + "loss": 3.1264, + "step": 806500 + }, + { + "epoch": 6.61, + "learning_rate": 7.379539364447878e-06, + "loss": 3.1278, + "step": 807000 + }, + { + "epoch": 6.62, + "learning_rate": 7.377895411727959e-06, + "loss": 3.1363, + "step": 807500 + }, + { + "epoch": 6.62, + "learning_rate": 7.376251459008039e-06, + "loss": 3.124, + "step": 808000 + }, + { + "epoch": 6.62, + "learning_rate": 7.3746075062881195e-06, + "loss": 3.1246, + "step": 808500 + }, + { + "epoch": 6.63, + "learning_rate": 7.3729635535682e-06, + "loss": 3.1281, + "step": 809000 + }, + { + "epoch": 6.63, + "learning_rate": 7.37131960084828e-06, + "loss": 3.1198, + "step": 809500 + }, + { + "epoch": 6.64, + "learning_rate": 7.369675648128361e-06, + "loss": 3.1276, + "step": 810000 + }, + { + "epoch": 6.64, + "eval_accuracy": 0.47175398712230254, + "eval_loss": 2.9837634563446045, + "eval_runtime": 427.1705, + "eval_samples_per_second": 721.857, + "eval_steps_per_second": 15.041, + "step": 810000 + }, + { + "epoch": 6.64, + "learning_rate": 7.368031695408441e-06, + "loss": 3.1275, + "step": 810500 + }, + { + "epoch": 6.64, + "learning_rate": 7.366387742688521e-06, + "loss": 3.1262, + "step": 811000 + }, + { + "epoch": 6.65, + "learning_rate": 7.364743789968601e-06, + "loss": 3.1216, + "step": 811500 + }, + { + "epoch": 6.65, + "learning_rate": 7.363099837248682e-06, + "loss": 3.1212, + "step": 812000 + }, + { + "epoch": 6.66, + "learning_rate": 7.361455884528762e-06, + "loss": 3.1169, + "step": 812500 + }, + { + "epoch": 6.66, + "learning_rate": 7.3598119318088415e-06, + "loss": 3.1221, + "step": 813000 + }, + { + "epoch": 6.66, + "learning_rate": 7.358167979088922e-06, + "loss": 3.128, + "step": 813500 + }, + { + "epoch": 6.67, + "learning_rate": 7.356524026369002e-06, + "loss": 3.1245, + "step": 814000 + }, + { + "epoch": 6.67, + "learning_rate": 7.354880073649082e-06, + "loss": 3.1248, + "step": 814500 + }, + { + "epoch": 6.68, + "learning_rate": 7.353236120929163e-06, + "loss": 3.1219, + "step": 815000 + }, + { + "epoch": 6.68, + "learning_rate": 7.351592168209243e-06, + "loss": 3.1285, + "step": 815500 + }, + { + "epoch": 6.69, + "learning_rate": 7.349948215489323e-06, + "loss": 3.1275, + "step": 816000 + }, + { + "epoch": 6.69, + "learning_rate": 7.348304262769403e-06, + "loss": 3.1213, + "step": 816500 + }, + { + "epoch": 6.69, + "learning_rate": 7.346660310049483e-06, + "loss": 3.1211, + "step": 817000 + }, + { + "epoch": 6.7, + "learning_rate": 7.345016357329564e-06, + "loss": 3.1231, + "step": 817500 + }, + { + "epoch": 6.7, + "learning_rate": 7.3433724046096445e-06, + "loss": 3.1306, + "step": 818000 + }, + { + "epoch": 6.71, + "learning_rate": 7.341728451889725e-06, + "loss": 3.1169, + "step": 818500 + }, + { + "epoch": 6.71, + "learning_rate": 7.340084499169804e-06, + "loss": 3.1254, + "step": 819000 + }, + { + "epoch": 6.71, + "learning_rate": 7.338440546449884e-06, + "loss": 3.1182, + "step": 819500 + }, + { + "epoch": 6.72, + "learning_rate": 7.336796593729965e-06, + "loss": 3.1185, + "step": 820000 + }, + { + "epoch": 6.72, + "learning_rate": 7.335152641010045e-06, + "loss": 3.1197, + "step": 820500 + }, + { + "epoch": 6.73, + "learning_rate": 7.333508688290125e-06, + "loss": 3.1243, + "step": 821000 + }, + { + "epoch": 6.73, + "learning_rate": 7.331864735570205e-06, + "loss": 3.1223, + "step": 821500 + }, + { + "epoch": 6.73, + "learning_rate": 7.3302207828502855e-06, + "loss": 3.1259, + "step": 822000 + }, + { + "epoch": 6.74, + "learning_rate": 7.3285768301303665e-06, + "loss": 3.1265, + "step": 822500 + }, + { + "epoch": 6.74, + "learning_rate": 7.326932877410447e-06, + "loss": 3.1141, + "step": 823000 + }, + { + "epoch": 6.75, + "learning_rate": 7.325288924690527e-06, + "loss": 3.1206, + "step": 823500 + }, + { + "epoch": 6.75, + "learning_rate": 7.323644971970607e-06, + "loss": 3.1178, + "step": 824000 + }, + { + "epoch": 6.75, + "learning_rate": 7.322001019250688e-06, + "loss": 3.1122, + "step": 824500 + }, + { + "epoch": 6.76, + "learning_rate": 7.320357066530768e-06, + "loss": 3.1187, + "step": 825000 + }, + { + "epoch": 6.76, + "learning_rate": 7.318713113810847e-06, + "loss": 3.118, + "step": 825500 + }, + { + "epoch": 6.77, + "learning_rate": 7.317069161090927e-06, + "loss": 3.1143, + "step": 826000 + }, + { + "epoch": 6.77, + "learning_rate": 7.3154252083710075e-06, + "loss": 3.1176, + "step": 826500 + }, + { + "epoch": 6.78, + "learning_rate": 7.313781255651088e-06, + "loss": 3.1117, + "step": 827000 + }, + { + "epoch": 6.78, + "learning_rate": 7.3121373029311686e-06, + "loss": 3.1185, + "step": 827500 + }, + { + "epoch": 6.78, + "learning_rate": 7.310493350211249e-06, + "loss": 3.1208, + "step": 828000 + }, + { + "epoch": 6.79, + "learning_rate": 7.308849397491329e-06, + "loss": 3.1171, + "step": 828500 + }, + { + "epoch": 6.79, + "learning_rate": 7.307205444771409e-06, + "loss": 3.1127, + "step": 829000 + }, + { + "epoch": 6.8, + "learning_rate": 7.305561492051489e-06, + "loss": 3.1146, + "step": 829500 + }, + { + "epoch": 6.8, + "learning_rate": 7.30391753933157e-06, + "loss": 3.1141, + "step": 830000 + }, + { + "epoch": 6.8, + "learning_rate": 7.30227358661165e-06, + "loss": 3.1191, + "step": 830500 + }, + { + "epoch": 6.81, + "learning_rate": 7.30062963389173e-06, + "loss": 3.123, + "step": 831000 + }, + { + "epoch": 6.81, + "learning_rate": 7.29898568117181e-06, + "loss": 3.1178, + "step": 831500 + }, + { + "epoch": 6.82, + "learning_rate": 7.29734172845189e-06, + "loss": 3.1133, + "step": 832000 + }, + { + "epoch": 6.82, + "learning_rate": 7.29569777573197e-06, + "loss": 3.1174, + "step": 832500 + }, + { + "epoch": 6.82, + "learning_rate": 7.294053823012051e-06, + "loss": 3.1201, + "step": 833000 + }, + { + "epoch": 6.83, + "learning_rate": 7.292409870292131e-06, + "loss": 3.1199, + "step": 833500 + }, + { + "epoch": 6.83, + "learning_rate": 7.290765917572211e-06, + "loss": 3.1157, + "step": 834000 + }, + { + "epoch": 6.84, + "learning_rate": 7.289121964852291e-06, + "loss": 3.1183, + "step": 834500 + }, + { + "epoch": 6.84, + "learning_rate": 7.287478012132372e-06, + "loss": 3.1141, + "step": 835000 + }, + { + "epoch": 6.85, + "learning_rate": 7.285834059412452e-06, + "loss": 3.1165, + "step": 835500 + }, + { + "epoch": 6.85, + "learning_rate": 7.284190106692532e-06, + "loss": 3.1112, + "step": 836000 + }, + { + "epoch": 6.85, + "learning_rate": 7.2825461539726125e-06, + "loss": 3.1217, + "step": 836500 + }, + { + "epoch": 6.86, + "learning_rate": 7.2809022012526935e-06, + "loss": 3.1089, + "step": 837000 + }, + { + "epoch": 6.86, + "learning_rate": 7.279258248532772e-06, + "loss": 3.118, + "step": 837500 + }, + { + "epoch": 6.87, + "learning_rate": 7.277614295812853e-06, + "loss": 3.1127, + "step": 838000 + }, + { + "epoch": 6.87, + "learning_rate": 7.275970343092933e-06, + "loss": 3.1117, + "step": 838500 + }, + { + "epoch": 6.87, + "learning_rate": 7.274326390373013e-06, + "loss": 3.1112, + "step": 839000 + }, + { + "epoch": 6.88, + "learning_rate": 7.272682437653093e-06, + "loss": 3.1084, + "step": 839500 + }, + { + "epoch": 6.88, + "learning_rate": 7.271038484933174e-06, + "loss": 3.1102, + "step": 840000 + }, + { + "epoch": 6.88, + "eval_accuracy": 0.47402583913951796, + "eval_loss": 2.969046115875244, + "eval_runtime": 428.0247, + "eval_samples_per_second": 720.416, + "eval_steps_per_second": 15.011, + "step": 840000 + }, + { + "epoch": 6.89, + "learning_rate": 7.269394532213254e-06, + "loss": 3.1116, + "step": 840500 + }, + { + "epoch": 6.89, + "learning_rate": 7.2677505794933345e-06, + "loss": 3.1169, + "step": 841000 + }, + { + "epoch": 6.89, + "learning_rate": 7.266106626773415e-06, + "loss": 3.1087, + "step": 841500 + }, + { + "epoch": 6.9, + "learning_rate": 7.264462674053495e-06, + "loss": 3.1085, + "step": 842000 + }, + { + "epoch": 6.9, + "learning_rate": 7.262818721333576e-06, + "loss": 3.1109, + "step": 842500 + }, + { + "epoch": 6.91, + "learning_rate": 7.261174768613656e-06, + "loss": 3.112, + "step": 843000 + }, + { + "epoch": 6.91, + "learning_rate": 7.259530815893735e-06, + "loss": 3.1072, + "step": 843500 + }, + { + "epoch": 6.91, + "learning_rate": 7.257886863173815e-06, + "loss": 3.1142, + "step": 844000 + }, + { + "epoch": 6.92, + "learning_rate": 7.256242910453895e-06, + "loss": 3.1146, + "step": 844500 + }, + { + "epoch": 6.92, + "learning_rate": 7.2545989577339755e-06, + "loss": 3.1098, + "step": 845000 + }, + { + "epoch": 6.93, + "learning_rate": 7.2529550050140565e-06, + "loss": 3.1138, + "step": 845500 + }, + { + "epoch": 6.93, + "learning_rate": 7.251311052294137e-06, + "loss": 3.1111, + "step": 846000 + }, + { + "epoch": 6.94, + "learning_rate": 7.249667099574217e-06, + "loss": 3.1045, + "step": 846500 + }, + { + "epoch": 6.94, + "learning_rate": 7.248023146854297e-06, + "loss": 3.1108, + "step": 847000 + }, + { + "epoch": 6.94, + "learning_rate": 7.246379194134378e-06, + "loss": 3.109, + "step": 847500 + }, + { + "epoch": 6.95, + "learning_rate": 7.244735241414458e-06, + "loss": 3.118, + "step": 848000 + }, + { + "epoch": 6.95, + "learning_rate": 7.243091288694538e-06, + "loss": 3.1119, + "step": 848500 + }, + { + "epoch": 6.96, + "learning_rate": 7.241447335974618e-06, + "loss": 3.1125, + "step": 849000 + }, + { + "epoch": 6.96, + "learning_rate": 7.2398033832546975e-06, + "loss": 3.1131, + "step": 849500 + }, + { + "epoch": 6.96, + "learning_rate": 7.238159430534778e-06, + "loss": 3.1072, + "step": 850000 + }, + { + "epoch": 6.97, + "learning_rate": 7.236515477814859e-06, + "loss": 3.1102, + "step": 850500 + }, + { + "epoch": 6.97, + "learning_rate": 7.234871525094939e-06, + "loss": 3.1095, + "step": 851000 + }, + { + "epoch": 6.98, + "learning_rate": 7.233227572375019e-06, + "loss": 3.1029, + "step": 851500 + }, + { + "epoch": 6.98, + "learning_rate": 7.231583619655099e-06, + "loss": 3.1064, + "step": 852000 + }, + { + "epoch": 6.98, + "learning_rate": 7.22993966693518e-06, + "loss": 3.1105, + "step": 852500 + }, + { + "epoch": 6.99, + "learning_rate": 7.22829571421526e-06, + "loss": 3.1084, + "step": 853000 + }, + { + "epoch": 6.99, + "learning_rate": 7.22665176149534e-06, + "loss": 3.107, + "step": 853500 + }, + { + "epoch": 7.0, + "learning_rate": 7.22500780877542e-06, + "loss": 3.1091, + "step": 854000 + }, + { + "epoch": 7.0, + "learning_rate": 7.2233638560555005e-06, + "loss": 3.1021, + "step": 854500 + }, + { + "epoch": 7.0, + "learning_rate": 7.2217199033355815e-06, + "loss": 3.0996, + "step": 855000 + }, + { + "epoch": 7.01, + "learning_rate": 7.220075950615661e-06, + "loss": 3.0913, + "step": 855500 + }, + { + "epoch": 7.01, + "learning_rate": 7.218431997895741e-06, + "loss": 3.1022, + "step": 856000 + }, + { + "epoch": 7.02, + "learning_rate": 7.216788045175821e-06, + "loss": 3.1077, + "step": 856500 + }, + { + "epoch": 7.02, + "learning_rate": 7.215144092455901e-06, + "loss": 3.1003, + "step": 857000 + }, + { + "epoch": 7.03, + "learning_rate": 7.213500139735981e-06, + "loss": 3.1036, + "step": 857500 + }, + { + "epoch": 7.03, + "learning_rate": 7.211856187016062e-06, + "loss": 3.1037, + "step": 858000 + }, + { + "epoch": 7.03, + "learning_rate": 7.210212234296142e-06, + "loss": 3.1082, + "step": 858500 + }, + { + "epoch": 7.04, + "learning_rate": 7.2085682815762225e-06, + "loss": 3.1052, + "step": 859000 + }, + { + "epoch": 7.04, + "learning_rate": 7.206924328856303e-06, + "loss": 3.1078, + "step": 859500 + }, + { + "epoch": 7.05, + "learning_rate": 7.2052803761363836e-06, + "loss": 3.1049, + "step": 860000 + }, + { + "epoch": 7.05, + "learning_rate": 7.203636423416464e-06, + "loss": 3.0981, + "step": 860500 + }, + { + "epoch": 7.05, + "learning_rate": 7.201992470696544e-06, + "loss": 3.1124, + "step": 861000 + }, + { + "epoch": 7.06, + "learning_rate": 7.200348517976623e-06, + "loss": 3.1003, + "step": 861500 + }, + { + "epoch": 7.06, + "learning_rate": 7.198704565256703e-06, + "loss": 3.1041, + "step": 862000 + }, + { + "epoch": 7.07, + "learning_rate": 7.197060612536783e-06, + "loss": 3.0975, + "step": 862500 + }, + { + "epoch": 7.07, + "learning_rate": 7.195416659816864e-06, + "loss": 3.1068, + "step": 863000 + }, + { + "epoch": 7.07, + "learning_rate": 7.1937727070969445e-06, + "loss": 3.1015, + "step": 863500 + }, + { + "epoch": 7.08, + "learning_rate": 7.192128754377025e-06, + "loss": 3.1072, + "step": 864000 + }, + { + "epoch": 7.08, + "learning_rate": 7.190484801657105e-06, + "loss": 3.0992, + "step": 864500 + }, + { + "epoch": 7.09, + "learning_rate": 7.188840848937186e-06, + "loss": 3.1031, + "step": 865000 + }, + { + "epoch": 7.09, + "learning_rate": 7.187196896217266e-06, + "loss": 3.0964, + "step": 865500 + }, + { + "epoch": 7.09, + "learning_rate": 7.185552943497346e-06, + "loss": 3.1047, + "step": 866000 + }, + { + "epoch": 7.1, + "learning_rate": 7.183908990777426e-06, + "loss": 3.103, + "step": 866500 + }, + { + "epoch": 7.1, + "learning_rate": 7.182265038057506e-06, + "loss": 3.0944, + "step": 867000 + }, + { + "epoch": 7.11, + "learning_rate": 7.1806210853375855e-06, + "loss": 3.0977, + "step": 867500 + }, + { + "epoch": 7.11, + "learning_rate": 7.1789771326176664e-06, + "loss": 3.1003, + "step": 868000 + }, + { + "epoch": 7.12, + "learning_rate": 7.1773331798977466e-06, + "loss": 3.1025, + "step": 868500 + }, + { + "epoch": 7.12, + "learning_rate": 7.175689227177827e-06, + "loss": 3.097, + "step": 869000 + }, + { + "epoch": 7.12, + "learning_rate": 7.174045274457907e-06, + "loss": 3.0947, + "step": 869500 + }, + { + "epoch": 7.13, + "learning_rate": 7.172401321737987e-06, + "loss": 3.1046, + "step": 870000 + }, + { + "epoch": 7.13, + "eval_accuracy": 0.4757021230011519, + "eval_loss": 2.956329584121704, + "eval_runtime": 428.2951, + "eval_samples_per_second": 719.962, + "eval_steps_per_second": 15.001, + "step": 870000 + }, + { + "epoch": 7.13, + "learning_rate": 7.170757369018068e-06, + "loss": 3.1, + "step": 870500 + }, + { + "epoch": 7.14, + "learning_rate": 7.169113416298148e-06, + "loss": 3.0954, + "step": 871000 + }, + { + "epoch": 7.14, + "learning_rate": 7.167469463578228e-06, + "loss": 3.0978, + "step": 871500 + }, + { + "epoch": 7.14, + "learning_rate": 7.165825510858308e-06, + "loss": 3.097, + "step": 872000 + }, + { + "epoch": 7.15, + "learning_rate": 7.164181558138389e-06, + "loss": 3.1062, + "step": 872500 + }, + { + "epoch": 7.15, + "learning_rate": 7.162537605418469e-06, + "loss": 3.0963, + "step": 873000 + }, + { + "epoch": 7.16, + "learning_rate": 7.1608936526985495e-06, + "loss": 3.0909, + "step": 873500 + }, + { + "epoch": 7.16, + "learning_rate": 7.159249699978629e-06, + "loss": 3.0979, + "step": 874000 + }, + { + "epoch": 7.16, + "learning_rate": 7.157605747258709e-06, + "loss": 3.0919, + "step": 874500 + }, + { + "epoch": 7.17, + "learning_rate": 7.155961794538789e-06, + "loss": 3.0945, + "step": 875000 + }, + { + "epoch": 7.17, + "learning_rate": 7.15431784181887e-06, + "loss": 3.0981, + "step": 875500 + }, + { + "epoch": 7.18, + "learning_rate": 7.15267388909895e-06, + "loss": 3.0912, + "step": 876000 + }, + { + "epoch": 7.18, + "learning_rate": 7.15102993637903e-06, + "loss": 3.0948, + "step": 876500 + }, + { + "epoch": 7.19, + "learning_rate": 7.14938598365911e-06, + "loss": 3.1008, + "step": 877000 + }, + { + "epoch": 7.19, + "learning_rate": 7.1477420309391905e-06, + "loss": 3.103, + "step": 877500 + }, + { + "epoch": 7.19, + "learning_rate": 7.1460980782192715e-06, + "loss": 3.0908, + "step": 878000 + }, + { + "epoch": 7.2, + "learning_rate": 7.144454125499352e-06, + "loss": 3.0975, + "step": 878500 + }, + { + "epoch": 7.2, + "learning_rate": 7.142810172779432e-06, + "loss": 3.0972, + "step": 879000 + }, + { + "epoch": 7.21, + "learning_rate": 7.141166220059512e-06, + "loss": 3.0939, + "step": 879500 + }, + { + "epoch": 7.21, + "learning_rate": 7.139522267339591e-06, + "loss": 3.099, + "step": 880000 + }, + { + "epoch": 7.21, + "learning_rate": 7.137878314619672e-06, + "loss": 3.0973, + "step": 880500 + }, + { + "epoch": 7.22, + "learning_rate": 7.136234361899752e-06, + "loss": 3.0922, + "step": 881000 + }, + { + "epoch": 7.22, + "learning_rate": 7.134590409179832e-06, + "loss": 3.0964, + "step": 881500 + }, + { + "epoch": 7.23, + "learning_rate": 7.1329464564599125e-06, + "loss": 3.0954, + "step": 882000 + }, + { + "epoch": 7.23, + "learning_rate": 7.131302503739993e-06, + "loss": 3.1022, + "step": 882500 + }, + { + "epoch": 7.23, + "learning_rate": 7.129658551020074e-06, + "loss": 3.099, + "step": 883000 + }, + { + "epoch": 7.24, + "learning_rate": 7.128014598300154e-06, + "loss": 3.0887, + "step": 883500 + }, + { + "epoch": 7.24, + "learning_rate": 7.126370645580234e-06, + "loss": 3.0924, + "step": 884000 + }, + { + "epoch": 7.25, + "learning_rate": 7.124726692860314e-06, + "loss": 3.0895, + "step": 884500 + }, + { + "epoch": 7.25, + "learning_rate": 7.123082740140395e-06, + "loss": 3.0968, + "step": 885000 + }, + { + "epoch": 7.25, + "learning_rate": 7.121438787420475e-06, + "loss": 3.0948, + "step": 885500 + }, + { + "epoch": 7.26, + "learning_rate": 7.119794834700554e-06, + "loss": 3.0969, + "step": 886000 + }, + { + "epoch": 7.26, + "learning_rate": 7.1181508819806345e-06, + "loss": 3.0952, + "step": 886500 + }, + { + "epoch": 7.27, + "learning_rate": 7.116506929260715e-06, + "loss": 3.0957, + "step": 887000 + }, + { + "epoch": 7.27, + "learning_rate": 7.114862976540795e-06, + "loss": 3.0874, + "step": 887500 + }, + { + "epoch": 7.28, + "learning_rate": 7.113219023820876e-06, + "loss": 3.0911, + "step": 888000 + }, + { + "epoch": 7.28, + "learning_rate": 7.111575071100956e-06, + "loss": 3.0898, + "step": 888500 + }, + { + "epoch": 7.28, + "learning_rate": 7.109931118381036e-06, + "loss": 3.0941, + "step": 889000 + }, + { + "epoch": 7.29, + "learning_rate": 7.108287165661116e-06, + "loss": 3.097, + "step": 889500 + }, + { + "epoch": 7.29, + "learning_rate": 7.106643212941196e-06, + "loss": 3.0977, + "step": 890000 + }, + { + "epoch": 7.3, + "learning_rate": 7.104999260221277e-06, + "loss": 3.0923, + "step": 890500 + }, + { + "epoch": 7.3, + "learning_rate": 7.103355307501357e-06, + "loss": 3.0844, + "step": 891000 + }, + { + "epoch": 7.3, + "learning_rate": 7.1017113547814375e-06, + "loss": 3.0889, + "step": 891500 + }, + { + "epoch": 7.31, + "learning_rate": 7.100067402061517e-06, + "loss": 3.0952, + "step": 892000 + }, + { + "epoch": 7.31, + "learning_rate": 7.098423449341597e-06, + "loss": 3.0884, + "step": 892500 + }, + { + "epoch": 7.32, + "learning_rate": 7.096779496621678e-06, + "loss": 3.0956, + "step": 893000 + }, + { + "epoch": 7.32, + "learning_rate": 7.095135543901758e-06, + "loss": 3.0888, + "step": 893500 + }, + { + "epoch": 7.32, + "learning_rate": 7.093491591181838e-06, + "loss": 3.092, + "step": 894000 + }, + { + "epoch": 7.33, + "learning_rate": 7.091847638461918e-06, + "loss": 3.0842, + "step": 894500 + }, + { + "epoch": 7.33, + "learning_rate": 7.090203685741998e-06, + "loss": 3.092, + "step": 895000 + }, + { + "epoch": 7.34, + "learning_rate": 7.088559733022079e-06, + "loss": 3.0886, + "step": 895500 + }, + { + "epoch": 7.34, + "learning_rate": 7.0869157803021594e-06, + "loss": 3.0845, + "step": 896000 + }, + { + "epoch": 7.34, + "learning_rate": 7.0852718275822396e-06, + "loss": 3.0871, + "step": 896500 + }, + { + "epoch": 7.35, + "learning_rate": 7.08362787486232e-06, + "loss": 3.0804, + "step": 897000 + }, + { + "epoch": 7.35, + "learning_rate": 7.081983922142401e-06, + "loss": 3.0817, + "step": 897500 + }, + { + "epoch": 7.36, + "learning_rate": 7.080339969422479e-06, + "loss": 3.0935, + "step": 898000 + }, + { + "epoch": 7.36, + "learning_rate": 7.07869601670256e-06, + "loss": 3.0893, + "step": 898500 + }, + { + "epoch": 7.37, + "learning_rate": 7.07705206398264e-06, + "loss": 3.0876, + "step": 899000 + }, + { + "epoch": 7.37, + "learning_rate": 7.07540811126272e-06, + "loss": 3.0946, + "step": 899500 + }, + { + "epoch": 7.37, + "learning_rate": 7.0737641585428005e-06, + "loss": 3.0817, + "step": 900000 + }, + { + "epoch": 7.37, + "eval_accuracy": 0.47711464421124683, + "eval_loss": 2.9477081298828125, + "eval_runtime": 415.7627, + "eval_samples_per_second": 741.663, + "eval_steps_per_second": 15.454, + "step": 900000 + }, + { + "epoch": 7.38, + "learning_rate": 7.0721202058228814e-06, + "loss": 3.0775, + "step": 900500 + }, + { + "epoch": 7.38, + "learning_rate": 7.0704762531029616e-06, + "loss": 3.0839, + "step": 901000 + }, + { + "epoch": 7.39, + "learning_rate": 7.068832300383042e-06, + "loss": 3.086, + "step": 901500 + }, + { + "epoch": 7.39, + "learning_rate": 7.067188347663122e-06, + "loss": 3.0927, + "step": 902000 + }, + { + "epoch": 7.39, + "learning_rate": 7.065544394943202e-06, + "loss": 3.0884, + "step": 902500 + }, + { + "epoch": 7.4, + "learning_rate": 7.063900442223283e-06, + "loss": 3.0848, + "step": 903000 + }, + { + "epoch": 7.4, + "learning_rate": 7.062256489503363e-06, + "loss": 3.0805, + "step": 903500 + }, + { + "epoch": 7.41, + "learning_rate": 7.060612536783442e-06, + "loss": 3.0891, + "step": 904000 + }, + { + "epoch": 7.41, + "learning_rate": 7.0589685840635224e-06, + "loss": 3.0887, + "step": 904500 + }, + { + "epoch": 7.41, + "learning_rate": 7.0573246313436026e-06, + "loss": 3.0889, + "step": 905000 + }, + { + "epoch": 7.42, + "learning_rate": 7.055680678623683e-06, + "loss": 3.0918, + "step": 905500 + }, + { + "epoch": 7.42, + "learning_rate": 7.054036725903764e-06, + "loss": 3.0735, + "step": 906000 + }, + { + "epoch": 7.43, + "learning_rate": 7.052392773183844e-06, + "loss": 3.0821, + "step": 906500 + }, + { + "epoch": 7.43, + "learning_rate": 7.050748820463924e-06, + "loss": 3.0843, + "step": 907000 + }, + { + "epoch": 7.43, + "learning_rate": 7.049104867744004e-06, + "loss": 3.088, + "step": 907500 + }, + { + "epoch": 7.44, + "learning_rate": 7.047460915024085e-06, + "loss": 3.0885, + "step": 908000 + }, + { + "epoch": 7.44, + "learning_rate": 7.045816962304165e-06, + "loss": 3.0836, + "step": 908500 + }, + { + "epoch": 7.45, + "learning_rate": 7.044173009584245e-06, + "loss": 3.083, + "step": 909000 + }, + { + "epoch": 7.45, + "learning_rate": 7.042529056864325e-06, + "loss": 3.0838, + "step": 909500 + }, + { + "epoch": 7.46, + "learning_rate": 7.040885104144405e-06, + "loss": 3.0789, + "step": 910000 + }, + { + "epoch": 7.46, + "learning_rate": 7.039241151424485e-06, + "loss": 3.082, + "step": 910500 + }, + { + "epoch": 7.46, + "learning_rate": 7.037597198704566e-06, + "loss": 3.08, + "step": 911000 + }, + { + "epoch": 7.47, + "learning_rate": 7.035953245984646e-06, + "loss": 3.0853, + "step": 911500 + }, + { + "epoch": 7.47, + "learning_rate": 7.034309293264726e-06, + "loss": 3.0808, + "step": 912000 + }, + { + "epoch": 7.48, + "learning_rate": 7.032665340544806e-06, + "loss": 3.0855, + "step": 912500 + }, + { + "epoch": 7.48, + "learning_rate": 7.031021387824887e-06, + "loss": 3.0833, + "step": 913000 + }, + { + "epoch": 7.48, + "learning_rate": 7.029377435104967e-06, + "loss": 3.0754, + "step": 913500 + }, + { + "epoch": 7.49, + "learning_rate": 7.027733482385047e-06, + "loss": 3.0819, + "step": 914000 + }, + { + "epoch": 7.49, + "learning_rate": 7.0260895296651275e-06, + "loss": 3.083, + "step": 914500 + }, + { + "epoch": 7.5, + "learning_rate": 7.024445576945208e-06, + "loss": 3.0783, + "step": 915000 + }, + { + "epoch": 7.5, + "learning_rate": 7.022801624225289e-06, + "loss": 3.0812, + "step": 915500 + }, + { + "epoch": 7.5, + "learning_rate": 7.021157671505368e-06, + "loss": 3.0782, + "step": 916000 + }, + { + "epoch": 7.51, + "learning_rate": 7.019513718785448e-06, + "loss": 3.0797, + "step": 916500 + }, + { + "epoch": 7.51, + "learning_rate": 7.017869766065528e-06, + "loss": 3.0876, + "step": 917000 + }, + { + "epoch": 7.52, + "learning_rate": 7.016225813345608e-06, + "loss": 3.0857, + "step": 917500 + }, + { + "epoch": 7.52, + "learning_rate": 7.014581860625688e-06, + "loss": 3.0776, + "step": 918000 + }, + { + "epoch": 7.53, + "learning_rate": 7.012937907905769e-06, + "loss": 3.085, + "step": 918500 + }, + { + "epoch": 7.53, + "learning_rate": 7.0112939551858495e-06, + "loss": 3.0817, + "step": 919000 + }, + { + "epoch": 7.53, + "learning_rate": 7.00965000246593e-06, + "loss": 3.0807, + "step": 919500 + }, + { + "epoch": 7.54, + "learning_rate": 7.00800604974601e-06, + "loss": 3.0892, + "step": 920000 + }, + { + "epoch": 7.54, + "learning_rate": 7.006362097026091e-06, + "loss": 3.0781, + "step": 920500 + }, + { + "epoch": 7.55, + "learning_rate": 7.004718144306171e-06, + "loss": 3.0726, + "step": 921000 + }, + { + "epoch": 7.55, + "learning_rate": 7.003074191586251e-06, + "loss": 3.0824, + "step": 921500 + }, + { + "epoch": 7.55, + "learning_rate": 7.001430238866331e-06, + "loss": 3.0816, + "step": 922000 + }, + { + "epoch": 7.56, + "learning_rate": 6.99978628614641e-06, + "loss": 3.0747, + "step": 922500 + }, + { + "epoch": 7.56, + "learning_rate": 6.9981423334264905e-06, + "loss": 3.0819, + "step": 923000 + }, + { + "epoch": 7.57, + "learning_rate": 6.9964983807065715e-06, + "loss": 3.0806, + "step": 923500 + }, + { + "epoch": 7.57, + "learning_rate": 6.994854427986652e-06, + "loss": 3.0842, + "step": 924000 + }, + { + "epoch": 7.57, + "learning_rate": 6.993210475266732e-06, + "loss": 3.0732, + "step": 924500 + }, + { + "epoch": 7.58, + "learning_rate": 6.991566522546812e-06, + "loss": 3.0746, + "step": 925000 + }, + { + "epoch": 7.58, + "learning_rate": 6.989922569826893e-06, + "loss": 3.0789, + "step": 925500 + }, + { + "epoch": 7.59, + "learning_rate": 6.988278617106973e-06, + "loss": 3.0817, + "step": 926000 + }, + { + "epoch": 7.59, + "learning_rate": 6.986634664387053e-06, + "loss": 3.0718, + "step": 926500 + }, + { + "epoch": 7.59, + "learning_rate": 6.984990711667133e-06, + "loss": 3.0824, + "step": 927000 + }, + { + "epoch": 7.6, + "learning_rate": 6.983346758947213e-06, + "loss": 3.0776, + "step": 927500 + }, + { + "epoch": 7.6, + "learning_rate": 6.981702806227294e-06, + "loss": 3.075, + "step": 928000 + }, + { + "epoch": 7.61, + "learning_rate": 6.980058853507374e-06, + "loss": 3.0787, + "step": 928500 + }, + { + "epoch": 7.61, + "learning_rate": 6.978414900787454e-06, + "loss": 3.0708, + "step": 929000 + }, + { + "epoch": 7.62, + "learning_rate": 6.976770948067534e-06, + "loss": 3.086, + "step": 929500 + }, + { + "epoch": 7.62, + "learning_rate": 6.975126995347614e-06, + "loss": 3.0813, + "step": 930000 + }, + { + "epoch": 7.62, + "eval_accuracy": 0.478479420881626, + "eval_loss": 2.939741611480713, + "eval_runtime": 412.5363, + "eval_samples_per_second": 747.464, + "eval_steps_per_second": 15.574, + "step": 930000 + }, + { + "epoch": 7.62, + "learning_rate": 6.973483042627694e-06, + "loss": 3.0788, + "step": 930500 + }, + { + "epoch": 7.63, + "learning_rate": 6.971839089907775e-06, + "loss": 3.0747, + "step": 931000 + }, + { + "epoch": 7.63, + "learning_rate": 6.970195137187855e-06, + "loss": 3.0767, + "step": 931500 + }, + { + "epoch": 7.64, + "learning_rate": 6.968551184467935e-06, + "loss": 3.0797, + "step": 932000 + }, + { + "epoch": 7.64, + "learning_rate": 6.9669072317480155e-06, + "loss": 3.0759, + "step": 932500 + }, + { + "epoch": 7.64, + "learning_rate": 6.9652632790280964e-06, + "loss": 3.0809, + "step": 933000 + }, + { + "epoch": 7.65, + "learning_rate": 6.9636193263081766e-06, + "loss": 3.0756, + "step": 933500 + }, + { + "epoch": 7.65, + "learning_rate": 6.961975373588257e-06, + "loss": 3.0814, + "step": 934000 + }, + { + "epoch": 7.66, + "learning_rate": 6.960331420868336e-06, + "loss": 3.0757, + "step": 934500 + }, + { + "epoch": 7.66, + "learning_rate": 6.958687468148416e-06, + "loss": 3.0666, + "step": 935000 + }, + { + "epoch": 7.66, + "learning_rate": 6.957043515428496e-06, + "loss": 3.0683, + "step": 935500 + }, + { + "epoch": 7.67, + "learning_rate": 6.955399562708577e-06, + "loss": 3.0742, + "step": 936000 + }, + { + "epoch": 7.67, + "learning_rate": 6.953755609988657e-06, + "loss": 3.0645, + "step": 936500 + }, + { + "epoch": 7.68, + "learning_rate": 6.9521116572687374e-06, + "loss": 3.0785, + "step": 937000 + }, + { + "epoch": 7.68, + "learning_rate": 6.9504677045488176e-06, + "loss": 3.0819, + "step": 937500 + }, + { + "epoch": 7.68, + "learning_rate": 6.9488237518288985e-06, + "loss": 3.0803, + "step": 938000 + }, + { + "epoch": 7.69, + "learning_rate": 6.947179799108979e-06, + "loss": 3.0805, + "step": 938500 + }, + { + "epoch": 7.69, + "learning_rate": 6.945535846389059e-06, + "loss": 3.0775, + "step": 939000 + }, + { + "epoch": 7.7, + "learning_rate": 6.943891893669139e-06, + "loss": 3.0725, + "step": 939500 + }, + { + "epoch": 7.7, + "learning_rate": 6.942247940949219e-06, + "loss": 3.0744, + "step": 940000 + }, + { + "epoch": 7.71, + "learning_rate": 6.940603988229298e-06, + "loss": 3.0714, + "step": 940500 + }, + { + "epoch": 7.71, + "learning_rate": 6.938960035509379e-06, + "loss": 3.0715, + "step": 941000 + }, + { + "epoch": 7.71, + "learning_rate": 6.9373160827894594e-06, + "loss": 3.0773, + "step": 941500 + }, + { + "epoch": 7.72, + "learning_rate": 6.9356721300695396e-06, + "loss": 3.0694, + "step": 942000 + }, + { + "epoch": 7.72, + "learning_rate": 6.93402817734962e-06, + "loss": 3.0711, + "step": 942500 + }, + { + "epoch": 7.73, + "learning_rate": 6.9323842246297e-06, + "loss": 3.0744, + "step": 943000 + }, + { + "epoch": 7.73, + "learning_rate": 6.930740271909781e-06, + "loss": 3.0734, + "step": 943500 + }, + { + "epoch": 7.73, + "learning_rate": 6.929096319189861e-06, + "loss": 3.0716, + "step": 944000 + }, + { + "epoch": 7.74, + "learning_rate": 6.927452366469941e-06, + "loss": 3.0772, + "step": 944500 + }, + { + "epoch": 7.74, + "learning_rate": 6.925808413750021e-06, + "loss": 3.0681, + "step": 945000 + }, + { + "epoch": 7.75, + "learning_rate": 6.924164461030102e-06, + "loss": 3.0786, + "step": 945500 + }, + { + "epoch": 7.75, + "learning_rate": 6.922520508310182e-06, + "loss": 3.075, + "step": 946000 + }, + { + "epoch": 7.75, + "learning_rate": 6.9208765555902615e-06, + "loss": 3.0774, + "step": 946500 + }, + { + "epoch": 7.76, + "learning_rate": 6.919232602870342e-06, + "loss": 3.0651, + "step": 947000 + }, + { + "epoch": 7.76, + "learning_rate": 6.917588650150422e-06, + "loss": 3.0708, + "step": 947500 + }, + { + "epoch": 7.77, + "learning_rate": 6.915944697430502e-06, + "loss": 3.0813, + "step": 948000 + }, + { + "epoch": 7.77, + "learning_rate": 6.914300744710583e-06, + "loss": 3.0719, + "step": 948500 + }, + { + "epoch": 7.77, + "learning_rate": 6.912656791990663e-06, + "loss": 3.0739, + "step": 949000 + }, + { + "epoch": 7.78, + "learning_rate": 6.911012839270743e-06, + "loss": 3.07, + "step": 949500 + }, + { + "epoch": 7.78, + "learning_rate": 6.909368886550823e-06, + "loss": 3.0715, + "step": 950000 + }, + { + "epoch": 7.79, + "learning_rate": 6.907724933830903e-06, + "loss": 3.0675, + "step": 950500 + }, + { + "epoch": 7.79, + "learning_rate": 6.906080981110984e-06, + "loss": 3.0693, + "step": 951000 + }, + { + "epoch": 7.8, + "learning_rate": 6.9044370283910645e-06, + "loss": 3.0755, + "step": 951500 + }, + { + "epoch": 7.8, + "learning_rate": 6.902793075671145e-06, + "loss": 3.0741, + "step": 952000 + }, + { + "epoch": 7.8, + "learning_rate": 6.901149122951224e-06, + "loss": 3.068, + "step": 952500 + }, + { + "epoch": 7.81, + "learning_rate": 6.899505170231304e-06, + "loss": 3.0707, + "step": 953000 + }, + { + "epoch": 7.81, + "learning_rate": 6.897861217511385e-06, + "loss": 3.0731, + "step": 953500 + }, + { + "epoch": 7.82, + "learning_rate": 6.896217264791465e-06, + "loss": 3.0658, + "step": 954000 + }, + { + "epoch": 7.82, + "learning_rate": 6.894573312071545e-06, + "loss": 3.064, + "step": 954500 + }, + { + "epoch": 7.82, + "learning_rate": 6.892929359351625e-06, + "loss": 3.0747, + "step": 955000 + }, + { + "epoch": 7.83, + "learning_rate": 6.8912854066317055e-06, + "loss": 3.0731, + "step": 955500 + }, + { + "epoch": 7.83, + "learning_rate": 6.8896414539117865e-06, + "loss": 3.0695, + "step": 956000 + }, + { + "epoch": 7.84, + "learning_rate": 6.887997501191867e-06, + "loss": 3.0668, + "step": 956500 + }, + { + "epoch": 7.84, + "learning_rate": 6.886353548471947e-06, + "loss": 3.0691, + "step": 957000 + }, + { + "epoch": 7.84, + "learning_rate": 6.884709595752027e-06, + "loss": 3.068, + "step": 957500 + }, + { + "epoch": 7.85, + "learning_rate": 6.883065643032108e-06, + "loss": 3.0675, + "step": 958000 + }, + { + "epoch": 7.85, + "learning_rate": 6.881421690312186e-06, + "loss": 3.0692, + "step": 958500 + }, + { + "epoch": 7.86, + "learning_rate": 6.879777737592267e-06, + "loss": 3.0684, + "step": 959000 + }, + { + "epoch": 7.86, + "learning_rate": 6.878133784872347e-06, + "loss": 3.0657, + "step": 959500 + }, + { + "epoch": 7.87, + "learning_rate": 6.8764898321524275e-06, + "loss": 3.0709, + "step": 960000 + }, + { + "epoch": 7.87, + "eval_accuracy": 0.4803523362943657, + "eval_loss": 2.925929307937622, + "eval_runtime": 423.4884, + "eval_samples_per_second": 728.133, + "eval_steps_per_second": 15.172, + "step": 960000 + }, + { + "epoch": 7.87, + "learning_rate": 6.874845879432508e-06, + "loss": 3.0705, + "step": 960500 + }, + { + "epoch": 7.87, + "learning_rate": 6.873201926712589e-06, + "loss": 3.0672, + "step": 961000 + }, + { + "epoch": 7.88, + "learning_rate": 6.871557973992669e-06, + "loss": 3.064, + "step": 961500 + }, + { + "epoch": 7.88, + "learning_rate": 6.869914021272749e-06, + "loss": 3.0644, + "step": 962000 + }, + { + "epoch": 7.89, + "learning_rate": 6.868270068552829e-06, + "loss": 3.0636, + "step": 962500 + }, + { + "epoch": 7.89, + "learning_rate": 6.866626115832909e-06, + "loss": 3.0693, + "step": 963000 + }, + { + "epoch": 7.89, + "learning_rate": 6.86498216311299e-06, + "loss": 3.0597, + "step": 963500 + }, + { + "epoch": 7.9, + "learning_rate": 6.86333821039307e-06, + "loss": 3.0646, + "step": 964000 + }, + { + "epoch": 7.9, + "learning_rate": 6.8616942576731495e-06, + "loss": 3.0692, + "step": 964500 + }, + { + "epoch": 7.91, + "learning_rate": 6.86005030495323e-06, + "loss": 3.0657, + "step": 965000 + }, + { + "epoch": 7.91, + "learning_rate": 6.85840635223331e-06, + "loss": 3.061, + "step": 965500 + }, + { + "epoch": 7.91, + "learning_rate": 6.856762399513391e-06, + "loss": 3.0604, + "step": 966000 + }, + { + "epoch": 7.92, + "learning_rate": 6.855118446793471e-06, + "loss": 3.062, + "step": 966500 + }, + { + "epoch": 7.92, + "learning_rate": 6.853474494073551e-06, + "loss": 3.0679, + "step": 967000 + }, + { + "epoch": 7.93, + "learning_rate": 6.851830541353631e-06, + "loss": 3.0556, + "step": 967500 + }, + { + "epoch": 7.93, + "learning_rate": 6.850186588633711e-06, + "loss": 3.0666, + "step": 968000 + }, + { + "epoch": 7.93, + "learning_rate": 6.848542635913792e-06, + "loss": 3.0748, + "step": 968500 + }, + { + "epoch": 7.94, + "learning_rate": 6.846898683193872e-06, + "loss": 3.0643, + "step": 969000 + }, + { + "epoch": 7.94, + "learning_rate": 6.8452547304739524e-06, + "loss": 3.0599, + "step": 969500 + }, + { + "epoch": 7.95, + "learning_rate": 6.8436107777540326e-06, + "loss": 3.0642, + "step": 970000 + }, + { + "epoch": 7.95, + "learning_rate": 6.841966825034112e-06, + "loss": 3.0588, + "step": 970500 + }, + { + "epoch": 7.96, + "learning_rate": 6.840322872314192e-06, + "loss": 3.058, + "step": 971000 + }, + { + "epoch": 7.96, + "learning_rate": 6.838678919594273e-06, + "loss": 3.0621, + "step": 971500 + }, + { + "epoch": 7.96, + "learning_rate": 6.837034966874353e-06, + "loss": 3.0656, + "step": 972000 + }, + { + "epoch": 7.97, + "learning_rate": 6.835391014154433e-06, + "loss": 3.0681, + "step": 972500 + }, + { + "epoch": 7.97, + "learning_rate": 6.833747061434513e-06, + "loss": 3.0672, + "step": 973000 + }, + { + "epoch": 7.98, + "learning_rate": 6.832103108714594e-06, + "loss": 3.068, + "step": 973500 + }, + { + "epoch": 7.98, + "learning_rate": 6.8304591559946744e-06, + "loss": 3.0616, + "step": 974000 + }, + { + "epoch": 7.98, + "learning_rate": 6.8288152032747546e-06, + "loss": 3.0651, + "step": 974500 + }, + { + "epoch": 7.99, + "learning_rate": 6.827171250554835e-06, + "loss": 3.0669, + "step": 975000 + }, + { + "epoch": 7.99, + "learning_rate": 6.825527297834915e-06, + "loss": 3.0668, + "step": 975500 + }, + { + "epoch": 8.0, + "learning_rate": 6.823883345114996e-06, + "loss": 3.0692, + "step": 976000 + }, + { + "epoch": 8.0, + "learning_rate": 6.822239392395076e-06, + "loss": 3.0622, + "step": 976500 + }, + { + "epoch": 8.0, + "learning_rate": 6.820595439675155e-06, + "loss": 3.0618, + "step": 977000 + }, + { + "epoch": 8.01, + "learning_rate": 6.818951486955235e-06, + "loss": 3.0672, + "step": 977500 + }, + { + "epoch": 8.01, + "learning_rate": 6.8173075342353154e-06, + "loss": 3.0686, + "step": 978000 + }, + { + "epoch": 8.02, + "learning_rate": 6.8156635815153956e-06, + "loss": 3.065, + "step": 978500 + }, + { + "epoch": 8.02, + "learning_rate": 6.8140196287954765e-06, + "loss": 3.0568, + "step": 979000 + }, + { + "epoch": 8.02, + "learning_rate": 6.812375676075557e-06, + "loss": 3.0605, + "step": 979500 + }, + { + "epoch": 8.03, + "learning_rate": 6.810731723355637e-06, + "loss": 3.0586, + "step": 980000 + }, + { + "epoch": 8.03, + "learning_rate": 6.809087770635717e-06, + "loss": 3.061, + "step": 980500 + }, + { + "epoch": 8.04, + "learning_rate": 6.807443817915798e-06, + "loss": 3.0603, + "step": 981000 + }, + { + "epoch": 8.04, + "learning_rate": 6.805799865195878e-06, + "loss": 3.0591, + "step": 981500 + }, + { + "epoch": 8.05, + "learning_rate": 6.804155912475958e-06, + "loss": 3.0645, + "step": 982000 + }, + { + "epoch": 8.05, + "learning_rate": 6.802511959756038e-06, + "loss": 3.0609, + "step": 982500 + }, + { + "epoch": 8.05, + "learning_rate": 6.8008680070361176e-06, + "loss": 3.0596, + "step": 983000 + }, + { + "epoch": 8.06, + "learning_rate": 6.799224054316198e-06, + "loss": 3.0609, + "step": 983500 + }, + { + "epoch": 8.06, + "learning_rate": 6.797580101596279e-06, + "loss": 3.0547, + "step": 984000 + }, + { + "epoch": 8.07, + "learning_rate": 6.795936148876359e-06, + "loss": 3.0566, + "step": 984500 + }, + { + "epoch": 8.07, + "learning_rate": 6.794292196156439e-06, + "loss": 3.0511, + "step": 985000 + }, + { + "epoch": 8.07, + "learning_rate": 6.792648243436519e-06, + "loss": 3.0621, + "step": 985500 + }, + { + "epoch": 8.08, + "learning_rate": 6.7910042907166e-06, + "loss": 3.0607, + "step": 986000 + }, + { + "epoch": 8.08, + "learning_rate": 6.78936033799668e-06, + "loss": 3.0596, + "step": 986500 + }, + { + "epoch": 8.09, + "learning_rate": 6.78771638527676e-06, + "loss": 3.0615, + "step": 987000 + }, + { + "epoch": 8.09, + "learning_rate": 6.78607243255684e-06, + "loss": 3.0581, + "step": 987500 + }, + { + "epoch": 8.09, + "learning_rate": 6.7844284798369205e-06, + "loss": 3.0586, + "step": 988000 + }, + { + "epoch": 8.1, + "learning_rate": 6.7827845271170015e-06, + "loss": 3.0569, + "step": 988500 + }, + { + "epoch": 8.1, + "learning_rate": 6.781140574397081e-06, + "loss": 3.0522, + "step": 989000 + }, + { + "epoch": 8.11, + "learning_rate": 6.779496621677161e-06, + "loss": 3.0566, + "step": 989500 + }, + { + "epoch": 8.11, + "learning_rate": 6.777852668957241e-06, + "loss": 3.0528, + "step": 990000 + }, + { + "epoch": 8.11, + "eval_accuracy": 0.4812049123013876, + "eval_loss": 2.920849323272705, + "eval_runtime": 415.0385, + "eval_samples_per_second": 742.958, + "eval_steps_per_second": 15.48, + "step": 990000 + }, + { + "epoch": 8.11, + "learning_rate": 6.776208716237321e-06, + "loss": 3.0549, + "step": 990500 + }, + { + "epoch": 8.12, + "learning_rate": 6.774564763517401e-06, + "loss": 3.0566, + "step": 991000 + }, + { + "epoch": 8.12, + "learning_rate": 6.772920810797482e-06, + "loss": 3.0576, + "step": 991500 + }, + { + "epoch": 8.13, + "learning_rate": 6.771276858077562e-06, + "loss": 3.0539, + "step": 992000 + }, + { + "epoch": 8.13, + "learning_rate": 6.7696329053576425e-06, + "loss": 3.0525, + "step": 992500 + }, + { + "epoch": 8.14, + "learning_rate": 6.767988952637723e-06, + "loss": 3.0545, + "step": 993000 + }, + { + "epoch": 8.14, + "learning_rate": 6.766344999917804e-06, + "loss": 3.0543, + "step": 993500 + }, + { + "epoch": 8.14, + "learning_rate": 6.764701047197884e-06, + "loss": 3.0492, + "step": 994000 + }, + { + "epoch": 8.15, + "learning_rate": 6.763057094477964e-06, + "loss": 3.0608, + "step": 994500 + }, + { + "epoch": 8.15, + "learning_rate": 6.761413141758043e-06, + "loss": 3.0615, + "step": 995000 + }, + { + "epoch": 8.16, + "learning_rate": 6.759769189038123e-06, + "loss": 3.0605, + "step": 995500 + }, + { + "epoch": 8.16, + "learning_rate": 6.758125236318203e-06, + "loss": 3.0584, + "step": 996000 + }, + { + "epoch": 8.16, + "learning_rate": 6.756481283598284e-06, + "loss": 3.0554, + "step": 996500 + }, + { + "epoch": 8.17, + "learning_rate": 6.7548373308783645e-06, + "loss": 3.0558, + "step": 997000 + }, + { + "epoch": 8.17, + "learning_rate": 6.753193378158445e-06, + "loss": 3.056, + "step": 997500 + }, + { + "epoch": 8.18, + "learning_rate": 6.751549425438525e-06, + "loss": 3.0517, + "step": 998000 + }, + { + "epoch": 8.18, + "learning_rate": 6.749905472718606e-06, + "loss": 3.0549, + "step": 998500 + }, + { + "epoch": 8.18, + "learning_rate": 6.748261519998686e-06, + "loss": 3.0613, + "step": 999000 + }, + { + "epoch": 8.19, + "learning_rate": 6.746617567278766e-06, + "loss": 3.0511, + "step": 999500 + }, + { + "epoch": 8.19, + "learning_rate": 6.744973614558846e-06, + "loss": 3.0535, + "step": 1000000 + }, + { + "epoch": 8.2, + "learning_rate": 6.743329661838926e-06, + "loss": 3.0498, + "step": 1000500 + }, + { + "epoch": 8.2, + "learning_rate": 6.7416857091190055e-06, + "loss": 3.0545, + "step": 1001000 + }, + { + "epoch": 8.21, + "learning_rate": 6.7400417563990865e-06, + "loss": 3.0522, + "step": 1001500 + }, + { + "epoch": 8.21, + "learning_rate": 6.738397803679167e-06, + "loss": 3.0562, + "step": 1002000 + }, + { + "epoch": 8.21, + "learning_rate": 6.736753850959247e-06, + "loss": 3.0551, + "step": 1002500 + }, + { + "epoch": 8.22, + "learning_rate": 6.735109898239327e-06, + "loss": 3.0594, + "step": 1003000 + }, + { + "epoch": 8.22, + "learning_rate": 6.733465945519407e-06, + "loss": 3.0588, + "step": 1003500 + }, + { + "epoch": 8.23, + "learning_rate": 6.731821992799488e-06, + "loss": 3.0536, + "step": 1004000 + }, + { + "epoch": 8.23, + "learning_rate": 6.730178040079568e-06, + "loss": 3.0492, + "step": 1004500 + }, + { + "epoch": 8.23, + "learning_rate": 6.728534087359648e-06, + "loss": 3.049, + "step": 1005000 + }, + { + "epoch": 8.24, + "learning_rate": 6.726890134639728e-06, + "loss": 3.0574, + "step": 1005500 + }, + { + "epoch": 8.24, + "learning_rate": 6.725246181919809e-06, + "loss": 3.0467, + "step": 1006000 + }, + { + "epoch": 8.25, + "learning_rate": 6.7236022291998894e-06, + "loss": 3.0529, + "step": 1006500 + }, + { + "epoch": 8.25, + "learning_rate": 6.721958276479969e-06, + "loss": 3.0568, + "step": 1007000 + }, + { + "epoch": 8.25, + "learning_rate": 6.720314323760049e-06, + "loss": 3.0447, + "step": 1007500 + }, + { + "epoch": 8.26, + "learning_rate": 6.718670371040129e-06, + "loss": 3.0549, + "step": 1008000 + }, + { + "epoch": 8.26, + "learning_rate": 6.717026418320209e-06, + "loss": 3.0504, + "step": 1008500 + }, + { + "epoch": 8.27, + "learning_rate": 6.71538246560029e-06, + "loss": 3.0499, + "step": 1009000 + }, + { + "epoch": 8.27, + "learning_rate": 6.71373851288037e-06, + "loss": 3.0537, + "step": 1009500 + }, + { + "epoch": 8.27, + "learning_rate": 6.71209456016045e-06, + "loss": 3.0496, + "step": 1010000 + }, + { + "epoch": 8.28, + "learning_rate": 6.7104506074405304e-06, + "loss": 3.0458, + "step": 1010500 + }, + { + "epoch": 8.28, + "learning_rate": 6.708806654720611e-06, + "loss": 3.047, + "step": 1011000 + }, + { + "epoch": 8.29, + "learning_rate": 6.7071627020006915e-06, + "loss": 3.0525, + "step": 1011500 + }, + { + "epoch": 8.29, + "learning_rate": 6.705518749280772e-06, + "loss": 3.0566, + "step": 1012000 + }, + { + "epoch": 8.3, + "learning_rate": 6.703874796560852e-06, + "loss": 3.0503, + "step": 1012500 + }, + { + "epoch": 8.3, + "learning_rate": 6.702230843840931e-06, + "loss": 3.0531, + "step": 1013000 + }, + { + "epoch": 8.3, + "learning_rate": 6.700586891121011e-06, + "loss": 3.0495, + "step": 1013500 + }, + { + "epoch": 8.31, + "learning_rate": 6.698942938401092e-06, + "loss": 3.052, + "step": 1014000 + }, + { + "epoch": 8.31, + "learning_rate": 6.697298985681172e-06, + "loss": 3.0462, + "step": 1014500 + }, + { + "epoch": 8.32, + "learning_rate": 6.695655032961252e-06, + "loss": 3.0589, + "step": 1015000 + }, + { + "epoch": 8.32, + "learning_rate": 6.6940110802413325e-06, + "loss": 3.0518, + "step": 1015500 + }, + { + "epoch": 8.32, + "learning_rate": 6.692367127521413e-06, + "loss": 3.0572, + "step": 1016000 + }, + { + "epoch": 8.33, + "learning_rate": 6.690723174801494e-06, + "loss": 3.0515, + "step": 1016500 + }, + { + "epoch": 8.33, + "learning_rate": 6.689079222081574e-06, + "loss": 3.0498, + "step": 1017000 + }, + { + "epoch": 8.34, + "learning_rate": 6.687435269361654e-06, + "loss": 3.0501, + "step": 1017500 + }, + { + "epoch": 8.34, + "learning_rate": 6.685791316641734e-06, + "loss": 3.0552, + "step": 1018000 + }, + { + "epoch": 8.34, + "learning_rate": 6.684147363921815e-06, + "loss": 3.049, + "step": 1018500 + }, + { + "epoch": 8.35, + "learning_rate": 6.6825034112018934e-06, + "loss": 3.0508, + "step": 1019000 + }, + { + "epoch": 8.35, + "learning_rate": 6.680859458481974e-06, + "loss": 3.0454, + "step": 1019500 + }, + { + "epoch": 8.36, + "learning_rate": 6.6792155057620545e-06, + "loss": 3.0541, + "step": 1020000 + }, + { + "epoch": 8.36, + "eval_accuracy": 0.4829413695036341, + "eval_loss": 2.90887713432312, + "eval_runtime": 413.3015, + "eval_samples_per_second": 746.08, + "eval_steps_per_second": 15.546, + "step": 1020000 + }, + { + "epoch": 8.36, + "learning_rate": 6.677571553042135e-06, + "loss": 3.0473, + "step": 1020500 + }, + { + "epoch": 8.36, + "learning_rate": 6.675927600322215e-06, + "loss": 3.046, + "step": 1021000 + }, + { + "epoch": 8.37, + "learning_rate": 6.674283647602296e-06, + "loss": 3.0498, + "step": 1021500 + }, + { + "epoch": 8.37, + "learning_rate": 6.672639694882376e-06, + "loss": 3.0504, + "step": 1022000 + }, + { + "epoch": 8.38, + "learning_rate": 6.670995742162456e-06, + "loss": 3.0417, + "step": 1022500 + }, + { + "epoch": 8.38, + "learning_rate": 6.669351789442536e-06, + "loss": 3.0592, + "step": 1023000 + }, + { + "epoch": 8.39, + "learning_rate": 6.667707836722616e-06, + "loss": 3.0561, + "step": 1023500 + }, + { + "epoch": 8.39, + "learning_rate": 6.666063884002697e-06, + "loss": 3.0394, + "step": 1024000 + }, + { + "epoch": 8.39, + "learning_rate": 6.664419931282777e-06, + "loss": 3.0459, + "step": 1024500 + }, + { + "epoch": 8.4, + "learning_rate": 6.6627759785628575e-06, + "loss": 3.0518, + "step": 1025000 + }, + { + "epoch": 8.4, + "learning_rate": 6.661132025842937e-06, + "loss": 3.0447, + "step": 1025500 + }, + { + "epoch": 8.41, + "learning_rate": 6.659488073123017e-06, + "loss": 3.0462, + "step": 1026000 + }, + { + "epoch": 8.41, + "learning_rate": 6.657844120403098e-06, + "loss": 3.052, + "step": 1026500 + }, + { + "epoch": 8.41, + "learning_rate": 6.656200167683178e-06, + "loss": 3.0463, + "step": 1027000 + }, + { + "epoch": 8.42, + "learning_rate": 6.654556214963258e-06, + "loss": 3.0511, + "step": 1027500 + }, + { + "epoch": 8.42, + "learning_rate": 6.652912262243338e-06, + "loss": 3.0508, + "step": 1028000 + }, + { + "epoch": 8.43, + "learning_rate": 6.651268309523418e-06, + "loss": 3.0476, + "step": 1028500 + }, + { + "epoch": 8.43, + "learning_rate": 6.649624356803499e-06, + "loss": 3.0537, + "step": 1029000 + }, + { + "epoch": 8.43, + "learning_rate": 6.6479804040835795e-06, + "loss": 3.046, + "step": 1029500 + }, + { + "epoch": 8.44, + "learning_rate": 6.64633645136366e-06, + "loss": 3.0525, + "step": 1030000 + }, + { + "epoch": 8.44, + "learning_rate": 6.64469249864374e-06, + "loss": 3.0457, + "step": 1030500 + }, + { + "epoch": 8.45, + "learning_rate": 6.643048545923821e-06, + "loss": 3.0466, + "step": 1031000 + }, + { + "epoch": 8.45, + "learning_rate": 6.641404593203899e-06, + "loss": 3.0431, + "step": 1031500 + }, + { + "epoch": 8.45, + "learning_rate": 6.63976064048398e-06, + "loss": 3.0475, + "step": 1032000 + }, + { + "epoch": 8.46, + "learning_rate": 6.63811668776406e-06, + "loss": 3.0473, + "step": 1032500 + }, + { + "epoch": 8.46, + "learning_rate": 6.63647273504414e-06, + "loss": 3.0418, + "step": 1033000 + }, + { + "epoch": 8.47, + "learning_rate": 6.6348287823242205e-06, + "loss": 3.0426, + "step": 1033500 + }, + { + "epoch": 8.47, + "learning_rate": 6.6331848296043015e-06, + "loss": 3.0471, + "step": 1034000 + }, + { + "epoch": 8.48, + "learning_rate": 6.631540876884382e-06, + "loss": 3.0357, + "step": 1034500 + }, + { + "epoch": 8.48, + "learning_rate": 6.629896924164462e-06, + "loss": 3.0484, + "step": 1035000 + }, + { + "epoch": 8.48, + "learning_rate": 6.628252971444542e-06, + "loss": 3.051, + "step": 1035500 + }, + { + "epoch": 8.49, + "learning_rate": 6.626609018724622e-06, + "loss": 3.0439, + "step": 1036000 + }, + { + "epoch": 8.49, + "learning_rate": 6.624965066004703e-06, + "loss": 3.0461, + "step": 1036500 + }, + { + "epoch": 8.5, + "learning_rate": 6.623321113284783e-06, + "loss": 3.0467, + "step": 1037000 + }, + { + "epoch": 8.5, + "learning_rate": 6.621677160564862e-06, + "loss": 3.047, + "step": 1037500 + }, + { + "epoch": 8.5, + "learning_rate": 6.6200332078449425e-06, + "loss": 3.0444, + "step": 1038000 + }, + { + "epoch": 8.51, + "learning_rate": 6.618389255125023e-06, + "loss": 3.0433, + "step": 1038500 + }, + { + "epoch": 8.51, + "learning_rate": 6.616745302405104e-06, + "loss": 3.0416, + "step": 1039000 + }, + { + "epoch": 8.52, + "learning_rate": 6.615101349685184e-06, + "loss": 3.042, + "step": 1039500 + }, + { + "epoch": 8.52, + "learning_rate": 6.613457396965264e-06, + "loss": 3.042, + "step": 1040000 + }, + { + "epoch": 8.52, + "learning_rate": 6.611813444245344e-06, + "loss": 3.0503, + "step": 1040500 + }, + { + "epoch": 8.53, + "learning_rate": 6.610169491525424e-06, + "loss": 3.0503, + "step": 1041000 + }, + { + "epoch": 8.53, + "learning_rate": 6.608525538805505e-06, + "loss": 3.0391, + "step": 1041500 + }, + { + "epoch": 8.54, + "learning_rate": 6.606881586085585e-06, + "loss": 3.0456, + "step": 1042000 + }, + { + "epoch": 8.54, + "learning_rate": 6.605237633365665e-06, + "loss": 3.0408, + "step": 1042500 + }, + { + "epoch": 8.55, + "learning_rate": 6.6035936806457454e-06, + "loss": 3.0393, + "step": 1043000 + }, + { + "epoch": 8.55, + "learning_rate": 6.601949727925825e-06, + "loss": 3.048, + "step": 1043500 + }, + { + "epoch": 8.55, + "learning_rate": 6.600305775205905e-06, + "loss": 3.0361, + "step": 1044000 + }, + { + "epoch": 8.56, + "learning_rate": 6.598661822485986e-06, + "loss": 3.0415, + "step": 1044500 + }, + { + "epoch": 8.56, + "learning_rate": 6.597017869766066e-06, + "loss": 3.0402, + "step": 1045000 + }, + { + "epoch": 8.57, + "learning_rate": 6.595373917046146e-06, + "loss": 3.0475, + "step": 1045500 + }, + { + "epoch": 8.57, + "learning_rate": 6.593729964326226e-06, + "loss": 3.044, + "step": 1046000 + }, + { + "epoch": 8.57, + "learning_rate": 6.592086011606307e-06, + "loss": 3.0376, + "step": 1046500 + }, + { + "epoch": 8.58, + "learning_rate": 6.590442058886387e-06, + "loss": 3.0411, + "step": 1047000 + }, + { + "epoch": 8.58, + "learning_rate": 6.588798106166467e-06, + "loss": 3.0327, + "step": 1047500 + }, + { + "epoch": 8.59, + "learning_rate": 6.5871541534465475e-06, + "loss": 3.038, + "step": 1048000 + }, + { + "epoch": 8.59, + "learning_rate": 6.585510200726628e-06, + "loss": 3.0352, + "step": 1048500 + }, + { + "epoch": 8.59, + "learning_rate": 6.583866248006709e-06, + "loss": 3.0367, + "step": 1049000 + }, + { + "epoch": 8.6, + "learning_rate": 6.582222295286788e-06, + "loss": 3.0446, + "step": 1049500 + }, + { + "epoch": 8.6, + "learning_rate": 6.580578342566868e-06, + "loss": 3.0469, + "step": 1050000 + }, + { + "epoch": 8.6, + "eval_accuracy": 0.4838990642400208, + "eval_loss": 2.9014947414398193, + "eval_runtime": 404.752, + "eval_samples_per_second": 761.839, + "eval_steps_per_second": 15.874, + "step": 1050000 + }, + { + "epoch": 8.61, + "learning_rate": 6.578934389846948e-06, + "loss": 3.0436, + "step": 1050500 + }, + { + "epoch": 8.61, + "learning_rate": 6.577290437127028e-06, + "loss": 3.0516, + "step": 1051000 + }, + { + "epoch": 8.61, + "learning_rate": 6.5756464844071084e-06, + "loss": 3.046, + "step": 1051500 + }, + { + "epoch": 8.62, + "learning_rate": 6.574002531687189e-06, + "loss": 3.0435, + "step": 1052000 + }, + { + "epoch": 8.62, + "learning_rate": 6.5723585789672695e-06, + "loss": 3.0383, + "step": 1052500 + }, + { + "epoch": 8.63, + "learning_rate": 6.57071462624735e-06, + "loss": 3.0484, + "step": 1053000 + }, + { + "epoch": 8.63, + "learning_rate": 6.56907067352743e-06, + "loss": 3.0375, + "step": 1053500 + }, + { + "epoch": 8.64, + "learning_rate": 6.567426720807511e-06, + "loss": 3.0459, + "step": 1054000 + }, + { + "epoch": 8.64, + "learning_rate": 6.565782768087591e-06, + "loss": 3.0424, + "step": 1054500 + }, + { + "epoch": 8.64, + "learning_rate": 6.564138815367671e-06, + "loss": 3.0355, + "step": 1055000 + }, + { + "epoch": 8.65, + "learning_rate": 6.56249486264775e-06, + "loss": 3.0374, + "step": 1055500 + }, + { + "epoch": 8.65, + "learning_rate": 6.56085090992783e-06, + "loss": 3.0422, + "step": 1056000 + }, + { + "epoch": 8.66, + "learning_rate": 6.5592069572079105e-06, + "loss": 3.0372, + "step": 1056500 + }, + { + "epoch": 8.66, + "learning_rate": 6.5575630044879915e-06, + "loss": 3.0446, + "step": 1057000 + }, + { + "epoch": 8.66, + "learning_rate": 6.555919051768072e-06, + "loss": 3.0456, + "step": 1057500 + }, + { + "epoch": 8.67, + "learning_rate": 6.554275099048152e-06, + "loss": 3.0372, + "step": 1058000 + }, + { + "epoch": 8.67, + "learning_rate": 6.552631146328232e-06, + "loss": 3.0409, + "step": 1058500 + }, + { + "epoch": 8.68, + "learning_rate": 6.550987193608313e-06, + "loss": 3.0376, + "step": 1059000 + }, + { + "epoch": 8.68, + "learning_rate": 6.549343240888393e-06, + "loss": 3.0434, + "step": 1059500 + }, + { + "epoch": 8.68, + "learning_rate": 6.547699288168473e-06, + "loss": 3.0451, + "step": 1060000 + }, + { + "epoch": 8.69, + "learning_rate": 6.546055335448553e-06, + "loss": 3.0341, + "step": 1060500 + }, + { + "epoch": 8.69, + "learning_rate": 6.544411382728633e-06, + "loss": 3.0415, + "step": 1061000 + }, + { + "epoch": 8.7, + "learning_rate": 6.542767430008713e-06, + "loss": 3.0404, + "step": 1061500 + }, + { + "epoch": 8.7, + "learning_rate": 6.541123477288794e-06, + "loss": 3.0347, + "step": 1062000 + }, + { + "epoch": 8.7, + "learning_rate": 6.539479524568874e-06, + "loss": 3.0389, + "step": 1062500 + }, + { + "epoch": 8.71, + "learning_rate": 6.537835571848954e-06, + "loss": 3.044, + "step": 1063000 + }, + { + "epoch": 8.71, + "learning_rate": 6.536191619129034e-06, + "loss": 3.0347, + "step": 1063500 + }, + { + "epoch": 8.72, + "learning_rate": 6.534547666409114e-06, + "loss": 3.0444, + "step": 1064000 + }, + { + "epoch": 8.72, + "learning_rate": 6.532903713689195e-06, + "loss": 3.0326, + "step": 1064500 + }, + { + "epoch": 8.73, + "learning_rate": 6.531259760969275e-06, + "loss": 3.0371, + "step": 1065000 + }, + { + "epoch": 8.73, + "learning_rate": 6.529615808249355e-06, + "loss": 3.0326, + "step": 1065500 + }, + { + "epoch": 8.73, + "learning_rate": 6.5279718555294355e-06, + "loss": 3.0373, + "step": 1066000 + }, + { + "epoch": 8.74, + "learning_rate": 6.5263279028095165e-06, + "loss": 3.0276, + "step": 1066500 + }, + { + "epoch": 8.74, + "learning_rate": 6.524683950089597e-06, + "loss": 3.0328, + "step": 1067000 + }, + { + "epoch": 8.75, + "learning_rate": 6.523039997369676e-06, + "loss": 3.0332, + "step": 1067500 + }, + { + "epoch": 8.75, + "learning_rate": 6.521396044649756e-06, + "loss": 3.0334, + "step": 1068000 + }, + { + "epoch": 8.75, + "learning_rate": 6.519752091929836e-06, + "loss": 3.0387, + "step": 1068500 + }, + { + "epoch": 8.76, + "learning_rate": 6.518108139209916e-06, + "loss": 3.0343, + "step": 1069000 + }, + { + "epoch": 8.76, + "learning_rate": 6.516464186489997e-06, + "loss": 3.0377, + "step": 1069500 + }, + { + "epoch": 8.77, + "learning_rate": 6.514820233770077e-06, + "loss": 3.0383, + "step": 1070000 + }, + { + "epoch": 8.77, + "learning_rate": 6.5131762810501575e-06, + "loss": 3.0366, + "step": 1070500 + }, + { + "epoch": 8.77, + "learning_rate": 6.511532328330238e-06, + "loss": 3.0359, + "step": 1071000 + }, + { + "epoch": 8.78, + "learning_rate": 6.5098883756103186e-06, + "loss": 3.0398, + "step": 1071500 + }, + { + "epoch": 8.78, + "learning_rate": 6.508244422890399e-06, + "loss": 3.037, + "step": 1072000 + }, + { + "epoch": 8.79, + "learning_rate": 6.506600470170479e-06, + "loss": 3.0314, + "step": 1072500 + }, + { + "epoch": 8.79, + "learning_rate": 6.504956517450559e-06, + "loss": 3.0387, + "step": 1073000 + }, + { + "epoch": 8.79, + "learning_rate": 6.503312564730639e-06, + "loss": 3.0297, + "step": 1073500 + }, + { + "epoch": 8.8, + "learning_rate": 6.501668612010718e-06, + "loss": 3.0405, + "step": 1074000 + }, + { + "epoch": 8.8, + "learning_rate": 6.500024659290799e-06, + "loss": 3.0421, + "step": 1074500 + }, + { + "epoch": 8.81, + "learning_rate": 6.4983807065708795e-06, + "loss": 3.0287, + "step": 1075000 + }, + { + "epoch": 8.81, + "learning_rate": 6.49673675385096e-06, + "loss": 3.0283, + "step": 1075500 + }, + { + "epoch": 8.82, + "learning_rate": 6.49509280113104e-06, + "loss": 3.0377, + "step": 1076000 + }, + { + "epoch": 8.82, + "learning_rate": 6.49344884841112e-06, + "loss": 3.0367, + "step": 1076500 + }, + { + "epoch": 8.82, + "learning_rate": 6.491804895691201e-06, + "loss": 3.0373, + "step": 1077000 + }, + { + "epoch": 8.83, + "learning_rate": 6.490160942971281e-06, + "loss": 3.0327, + "step": 1077500 + }, + { + "epoch": 8.83, + "learning_rate": 6.488516990251361e-06, + "loss": 3.0261, + "step": 1078000 + }, + { + "epoch": 8.84, + "learning_rate": 6.486873037531441e-06, + "loss": 3.0347, + "step": 1078500 + }, + { + "epoch": 8.84, + "learning_rate": 6.485229084811522e-06, + "loss": 3.0382, + "step": 1079000 + }, + { + "epoch": 8.84, + "learning_rate": 6.483585132091602e-06, + "loss": 3.0369, + "step": 1079500 + }, + { + "epoch": 8.85, + "learning_rate": 6.4819411793716816e-06, + "loss": 3.0377, + "step": 1080000 + }, + { + "epoch": 8.85, + "eval_accuracy": 0.4847903545689289, + "eval_loss": 2.895965814590454, + "eval_runtime": 407.6957, + "eval_samples_per_second": 756.339, + "eval_steps_per_second": 15.759, + "step": 1080000 + }, + { + "epoch": 8.85, + "learning_rate": 6.480297226651762e-06, + "loss": 3.0378, + "step": 1080500 + }, + { + "epoch": 8.86, + "learning_rate": 6.478653273931842e-06, + "loss": 3.0374, + "step": 1081000 + }, + { + "epoch": 8.86, + "learning_rate": 6.477009321211922e-06, + "loss": 3.0315, + "step": 1081500 + }, + { + "epoch": 8.86, + "learning_rate": 6.475365368492003e-06, + "loss": 3.032, + "step": 1082000 + }, + { + "epoch": 8.87, + "learning_rate": 6.473721415772083e-06, + "loss": 3.0362, + "step": 1082500 + }, + { + "epoch": 8.87, + "learning_rate": 6.472077463052163e-06, + "loss": 3.0368, + "step": 1083000 + }, + { + "epoch": 8.88, + "learning_rate": 6.470433510332243e-06, + "loss": 3.0297, + "step": 1083500 + }, + { + "epoch": 8.88, + "learning_rate": 6.468789557612324e-06, + "loss": 3.031, + "step": 1084000 + }, + { + "epoch": 8.89, + "learning_rate": 6.467145604892404e-06, + "loss": 3.0321, + "step": 1084500 + }, + { + "epoch": 8.89, + "learning_rate": 6.4655016521724845e-06, + "loss": 3.0335, + "step": 1085000 + }, + { + "epoch": 8.89, + "learning_rate": 6.463857699452565e-06, + "loss": 3.0344, + "step": 1085500 + }, + { + "epoch": 8.9, + "learning_rate": 6.462213746732644e-06, + "loss": 3.0378, + "step": 1086000 + }, + { + "epoch": 8.9, + "learning_rate": 6.460569794012724e-06, + "loss": 3.0306, + "step": 1086500 + }, + { + "epoch": 8.91, + "learning_rate": 6.458925841292805e-06, + "loss": 3.0363, + "step": 1087000 + }, + { + "epoch": 8.91, + "learning_rate": 6.457281888572885e-06, + "loss": 3.0303, + "step": 1087500 + }, + { + "epoch": 8.91, + "learning_rate": 6.455637935852965e-06, + "loss": 3.0305, + "step": 1088000 + }, + { + "epoch": 8.92, + "learning_rate": 6.453993983133045e-06, + "loss": 3.0352, + "step": 1088500 + }, + { + "epoch": 8.92, + "learning_rate": 6.4523500304131255e-06, + "loss": 3.0338, + "step": 1089000 + }, + { + "epoch": 8.93, + "learning_rate": 6.4507060776932065e-06, + "loss": 3.036, + "step": 1089500 + }, + { + "epoch": 8.93, + "learning_rate": 6.449062124973287e-06, + "loss": 3.0388, + "step": 1090000 + }, + { + "epoch": 8.93, + "learning_rate": 6.447418172253367e-06, + "loss": 3.0292, + "step": 1090500 + }, + { + "epoch": 8.94, + "learning_rate": 6.445774219533447e-06, + "loss": 3.0351, + "step": 1091000 + }, + { + "epoch": 8.94, + "learning_rate": 6.444130266813528e-06, + "loss": 3.0325, + "step": 1091500 + }, + { + "epoch": 8.95, + "learning_rate": 6.442486314093606e-06, + "loss": 3.0258, + "step": 1092000 + }, + { + "epoch": 8.95, + "learning_rate": 6.440842361373687e-06, + "loss": 3.0293, + "step": 1092500 + }, + { + "epoch": 8.95, + "learning_rate": 6.439198408653767e-06, + "loss": 3.0273, + "step": 1093000 + }, + { + "epoch": 8.96, + "learning_rate": 6.4375544559338475e-06, + "loss": 3.0372, + "step": 1093500 + }, + { + "epoch": 8.96, + "learning_rate": 6.435910503213928e-06, + "loss": 3.0329, + "step": 1094000 + }, + { + "epoch": 8.97, + "learning_rate": 6.434266550494009e-06, + "loss": 3.0329, + "step": 1094500 + }, + { + "epoch": 8.97, + "learning_rate": 6.432622597774089e-06, + "loss": 3.0295, + "step": 1095000 + }, + { + "epoch": 8.98, + "learning_rate": 6.430978645054169e-06, + "loss": 3.0264, + "step": 1095500 + }, + { + "epoch": 8.98, + "learning_rate": 6.429334692334249e-06, + "loss": 3.028, + "step": 1096000 + }, + { + "epoch": 8.98, + "learning_rate": 6.427690739614329e-06, + "loss": 3.0408, + "step": 1096500 + }, + { + "epoch": 8.99, + "learning_rate": 6.42604678689441e-06, + "loss": 3.0269, + "step": 1097000 + }, + { + "epoch": 8.99, + "learning_rate": 6.42440283417449e-06, + "loss": 3.0304, + "step": 1097500 + }, + { + "epoch": 9.0, + "learning_rate": 6.4227588814545695e-06, + "loss": 3.0328, + "step": 1098000 + }, + { + "epoch": 9.0, + "learning_rate": 6.42111492873465e-06, + "loss": 3.0254, + "step": 1098500 + }, + { + "epoch": 9.0, + "learning_rate": 6.41947097601473e-06, + "loss": 3.0365, + "step": 1099000 + }, + { + "epoch": 9.01, + "learning_rate": 6.417827023294811e-06, + "loss": 3.0218, + "step": 1099500 + }, + { + "epoch": 9.01, + "learning_rate": 6.416183070574891e-06, + "loss": 3.0276, + "step": 1100000 + }, + { + "epoch": 9.02, + "learning_rate": 6.414539117854971e-06, + "loss": 3.0299, + "step": 1100500 + }, + { + "epoch": 9.02, + "learning_rate": 6.412895165135051e-06, + "loss": 3.0222, + "step": 1101000 + }, + { + "epoch": 9.02, + "learning_rate": 6.411251212415131e-06, + "loss": 3.0279, + "step": 1101500 + }, + { + "epoch": 9.03, + "learning_rate": 6.409607259695212e-06, + "loss": 3.0317, + "step": 1102000 + }, + { + "epoch": 9.03, + "learning_rate": 6.407963306975292e-06, + "loss": 3.0244, + "step": 1102500 + }, + { + "epoch": 9.04, + "learning_rate": 6.4063193542553725e-06, + "loss": 3.0241, + "step": 1103000 + }, + { + "epoch": 9.04, + "learning_rate": 6.404675401535453e-06, + "loss": 3.0223, + "step": 1103500 + }, + { + "epoch": 9.04, + "learning_rate": 6.403031448815532e-06, + "loss": 3.0291, + "step": 1104000 + }, + { + "epoch": 9.05, + "learning_rate": 6.401387496095612e-06, + "loss": 3.0236, + "step": 1104500 + }, + { + "epoch": 9.05, + "learning_rate": 6.399743543375693e-06, + "loss": 3.0248, + "step": 1105000 + }, + { + "epoch": 9.06, + "learning_rate": 6.398099590655773e-06, + "loss": 3.0271, + "step": 1105500 + }, + { + "epoch": 9.06, + "learning_rate": 6.396455637935853e-06, + "loss": 3.0282, + "step": 1106000 + }, + { + "epoch": 9.07, + "learning_rate": 6.394811685215933e-06, + "loss": 3.0316, + "step": 1106500 + }, + { + "epoch": 9.07, + "learning_rate": 6.393167732496014e-06, + "loss": 3.0257, + "step": 1107000 + }, + { + "epoch": 9.07, + "learning_rate": 6.3915237797760945e-06, + "loss": 3.0205, + "step": 1107500 + }, + { + "epoch": 9.08, + "learning_rate": 6.389879827056175e-06, + "loss": 3.0294, + "step": 1108000 + }, + { + "epoch": 9.08, + "learning_rate": 6.388235874336255e-06, + "loss": 3.0295, + "step": 1108500 + }, + { + "epoch": 9.09, + "learning_rate": 6.386591921616335e-06, + "loss": 3.0257, + "step": 1109000 + }, + { + "epoch": 9.09, + "learning_rate": 6.384947968896416e-06, + "loss": 3.0275, + "step": 1109500 + }, + { + "epoch": 9.09, + "learning_rate": 6.383304016176495e-06, + "loss": 3.0284, + "step": 1110000 + }, + { + "epoch": 9.09, + "eval_accuracy": 0.48608889348818574, + "eval_loss": 2.885928153991699, + "eval_runtime": 407.7381, + "eval_samples_per_second": 756.26, + "eval_steps_per_second": 15.758, + "step": 1110000 + }, + { + "epoch": 9.1, + "learning_rate": 6.381660063456575e-06, + "loss": 3.0216, + "step": 1110500 + }, + { + "epoch": 9.1, + "learning_rate": 6.380016110736655e-06, + "loss": 3.0199, + "step": 1111000 + }, + { + "epoch": 9.11, + "learning_rate": 6.3783721580167355e-06, + "loss": 3.0232, + "step": 1111500 + }, + { + "epoch": 9.11, + "learning_rate": 6.3767282052968164e-06, + "loss": 3.0248, + "step": 1112000 + }, + { + "epoch": 9.11, + "learning_rate": 6.3750842525768966e-06, + "loss": 3.0276, + "step": 1112500 + }, + { + "epoch": 9.12, + "learning_rate": 6.373440299856977e-06, + "loss": 3.0284, + "step": 1113000 + }, + { + "epoch": 9.12, + "learning_rate": 6.371796347137057e-06, + "loss": 3.0302, + "step": 1113500 + }, + { + "epoch": 9.13, + "learning_rate": 6.370152394417137e-06, + "loss": 3.0274, + "step": 1114000 + }, + { + "epoch": 9.13, + "learning_rate": 6.368508441697218e-06, + "loss": 3.0335, + "step": 1114500 + }, + { + "epoch": 9.14, + "learning_rate": 6.366864488977298e-06, + "loss": 3.0209, + "step": 1115000 + }, + { + "epoch": 9.14, + "learning_rate": 6.365220536257378e-06, + "loss": 3.0297, + "step": 1115500 + }, + { + "epoch": 9.14, + "learning_rate": 6.3635765835374575e-06, + "loss": 3.0267, + "step": 1116000 + }, + { + "epoch": 9.15, + "learning_rate": 6.361932630817538e-06, + "loss": 3.0261, + "step": 1116500 + }, + { + "epoch": 9.15, + "learning_rate": 6.360288678097618e-06, + "loss": 3.0273, + "step": 1117000 + }, + { + "epoch": 9.16, + "learning_rate": 6.358644725377699e-06, + "loss": 3.0163, + "step": 1117500 + }, + { + "epoch": 9.16, + "learning_rate": 6.357000772657779e-06, + "loss": 3.0258, + "step": 1118000 + }, + { + "epoch": 9.16, + "learning_rate": 6.355356819937859e-06, + "loss": 3.0321, + "step": 1118500 + }, + { + "epoch": 9.17, + "learning_rate": 6.353712867217939e-06, + "loss": 3.0181, + "step": 1119000 + }, + { + "epoch": 9.17, + "learning_rate": 6.35206891449802e-06, + "loss": 3.0194, + "step": 1119500 + }, + { + "epoch": 9.18, + "learning_rate": 6.3504249617781e-06, + "loss": 3.0279, + "step": 1120000 + }, + { + "epoch": 9.18, + "learning_rate": 6.34878100905818e-06, + "loss": 3.0236, + "step": 1120500 + }, + { + "epoch": 9.18, + "learning_rate": 6.34713705633826e-06, + "loss": 3.0279, + "step": 1121000 + }, + { + "epoch": 9.19, + "learning_rate": 6.3454931036183405e-06, + "loss": 3.026, + "step": 1121500 + }, + { + "epoch": 9.19, + "learning_rate": 6.34384915089842e-06, + "loss": 3.0217, + "step": 1122000 + }, + { + "epoch": 9.2, + "learning_rate": 6.342205198178501e-06, + "loss": 3.0299, + "step": 1122500 + }, + { + "epoch": 9.2, + "learning_rate": 6.340561245458581e-06, + "loss": 3.0203, + "step": 1123000 + }, + { + "epoch": 9.2, + "learning_rate": 6.338917292738661e-06, + "loss": 3.0224, + "step": 1123500 + }, + { + "epoch": 9.21, + "learning_rate": 6.337273340018741e-06, + "loss": 3.0184, + "step": 1124000 + }, + { + "epoch": 9.21, + "learning_rate": 6.335629387298821e-06, + "loss": 3.0215, + "step": 1124500 + }, + { + "epoch": 9.22, + "learning_rate": 6.333985434578902e-06, + "loss": 3.0261, + "step": 1125000 + }, + { + "epoch": 9.22, + "learning_rate": 6.332341481858982e-06, + "loss": 3.0242, + "step": 1125500 + }, + { + "epoch": 9.23, + "learning_rate": 6.3306975291390625e-06, + "loss": 3.0246, + "step": 1126000 + }, + { + "epoch": 9.23, + "learning_rate": 6.329053576419143e-06, + "loss": 3.0327, + "step": 1126500 + }, + { + "epoch": 9.23, + "learning_rate": 6.327409623699224e-06, + "loss": 3.0222, + "step": 1127000 + }, + { + "epoch": 9.24, + "learning_rate": 6.325765670979304e-06, + "loss": 3.0257, + "step": 1127500 + }, + { + "epoch": 9.24, + "learning_rate": 6.324121718259384e-06, + "loss": 3.0228, + "step": 1128000 + }, + { + "epoch": 9.25, + "learning_rate": 6.322477765539463e-06, + "loss": 3.0246, + "step": 1128500 + }, + { + "epoch": 9.25, + "learning_rate": 6.320833812819543e-06, + "loss": 3.0205, + "step": 1129000 + }, + { + "epoch": 9.25, + "learning_rate": 6.319189860099623e-06, + "loss": 3.0198, + "step": 1129500 + }, + { + "epoch": 9.26, + "learning_rate": 6.317545907379704e-06, + "loss": 3.0227, + "step": 1130000 + }, + { + "epoch": 9.26, + "learning_rate": 6.3159019546597845e-06, + "loss": 3.0205, + "step": 1130500 + }, + { + "epoch": 9.27, + "learning_rate": 6.314258001939865e-06, + "loss": 3.0218, + "step": 1131000 + }, + { + "epoch": 9.27, + "learning_rate": 6.312614049219945e-06, + "loss": 3.0221, + "step": 1131500 + }, + { + "epoch": 9.27, + "learning_rate": 6.310970096500026e-06, + "loss": 3.017, + "step": 1132000 + }, + { + "epoch": 9.28, + "learning_rate": 6.309326143780106e-06, + "loss": 3.0187, + "step": 1132500 + }, + { + "epoch": 9.28, + "learning_rate": 6.307682191060186e-06, + "loss": 3.0173, + "step": 1133000 + }, + { + "epoch": 9.29, + "learning_rate": 6.306038238340266e-06, + "loss": 3.0229, + "step": 1133500 + }, + { + "epoch": 9.29, + "learning_rate": 6.304394285620346e-06, + "loss": 3.0254, + "step": 1134000 + }, + { + "epoch": 9.29, + "learning_rate": 6.3027503329004255e-06, + "loss": 3.0292, + "step": 1134500 + }, + { + "epoch": 9.3, + "learning_rate": 6.3011063801805065e-06, + "loss": 3.0172, + "step": 1135000 + }, + { + "epoch": 9.3, + "learning_rate": 6.299462427460587e-06, + "loss": 3.0233, + "step": 1135500 + }, + { + "epoch": 9.31, + "learning_rate": 6.297818474740667e-06, + "loss": 3.0245, + "step": 1136000 + }, + { + "epoch": 9.31, + "learning_rate": 6.296174522020747e-06, + "loss": 3.0165, + "step": 1136500 + }, + { + "epoch": 9.32, + "learning_rate": 6.294530569300827e-06, + "loss": 3.0161, + "step": 1137000 + }, + { + "epoch": 9.32, + "learning_rate": 6.292886616580908e-06, + "loss": 3.017, + "step": 1137500 + }, + { + "epoch": 9.32, + "learning_rate": 6.291242663860988e-06, + "loss": 3.0244, + "step": 1138000 + }, + { + "epoch": 9.33, + "learning_rate": 6.289598711141068e-06, + "loss": 3.0189, + "step": 1138500 + }, + { + "epoch": 9.33, + "learning_rate": 6.287954758421148e-06, + "loss": 3.0196, + "step": 1139000 + }, + { + "epoch": 9.34, + "learning_rate": 6.286310805701229e-06, + "loss": 3.0169, + "step": 1139500 + }, + { + "epoch": 9.34, + "learning_rate": 6.2846668529813095e-06, + "loss": 3.0224, + "step": 1140000 + }, + { + "epoch": 9.34, + "eval_accuracy": 0.4867489885373119, + "eval_loss": 2.881913185119629, + "eval_runtime": 406.5724, + "eval_samples_per_second": 758.428, + "eval_steps_per_second": 15.803, + "step": 1140000 + }, + { + "epoch": 9.34, + "learning_rate": 6.283022900261389e-06, + "loss": 3.0198, + "step": 1140500 + }, + { + "epoch": 9.35, + "learning_rate": 6.281378947541469e-06, + "loss": 3.0248, + "step": 1141000 + }, + { + "epoch": 9.35, + "learning_rate": 6.279734994821549e-06, + "loss": 3.0256, + "step": 1141500 + }, + { + "epoch": 9.36, + "learning_rate": 6.278091042101629e-06, + "loss": 3.0153, + "step": 1142000 + }, + { + "epoch": 9.36, + "learning_rate": 6.27644708938171e-06, + "loss": 3.0159, + "step": 1142500 + }, + { + "epoch": 9.36, + "learning_rate": 6.27480313666179e-06, + "loss": 3.0145, + "step": 1143000 + }, + { + "epoch": 9.37, + "learning_rate": 6.27315918394187e-06, + "loss": 3.0236, + "step": 1143500 + }, + { + "epoch": 9.37, + "learning_rate": 6.2715152312219505e-06, + "loss": 3.0184, + "step": 1144000 + }, + { + "epoch": 9.38, + "learning_rate": 6.2698712785020314e-06, + "loss": 3.0168, + "step": 1144500 + }, + { + "epoch": 9.38, + "learning_rate": 6.2682273257821116e-06, + "loss": 3.0201, + "step": 1145000 + }, + { + "epoch": 9.38, + "learning_rate": 6.266583373062192e-06, + "loss": 3.0186, + "step": 1145500 + }, + { + "epoch": 9.39, + "learning_rate": 6.264939420342272e-06, + "loss": 3.0217, + "step": 1146000 + }, + { + "epoch": 9.39, + "learning_rate": 6.263295467622351e-06, + "loss": 3.0154, + "step": 1146500 + }, + { + "epoch": 9.4, + "learning_rate": 6.261651514902431e-06, + "loss": 3.0197, + "step": 1147000 + }, + { + "epoch": 9.4, + "learning_rate": 6.260007562182512e-06, + "loss": 3.0149, + "step": 1147500 + }, + { + "epoch": 9.41, + "learning_rate": 6.258363609462592e-06, + "loss": 3.0203, + "step": 1148000 + }, + { + "epoch": 9.41, + "learning_rate": 6.2567196567426725e-06, + "loss": 3.0206, + "step": 1148500 + }, + { + "epoch": 9.41, + "learning_rate": 6.255075704022753e-06, + "loss": 3.0151, + "step": 1149000 + }, + { + "epoch": 9.42, + "learning_rate": 6.253431751302833e-06, + "loss": 3.0192, + "step": 1149500 + }, + { + "epoch": 9.42, + "learning_rate": 6.251787798582914e-06, + "loss": 3.0161, + "step": 1150000 + }, + { + "epoch": 9.43, + "learning_rate": 6.250143845862994e-06, + "loss": 3.0224, + "step": 1150500 + }, + { + "epoch": 9.43, + "learning_rate": 6.248499893143074e-06, + "loss": 3.02, + "step": 1151000 + }, + { + "epoch": 9.43, + "learning_rate": 6.246855940423154e-06, + "loss": 3.0231, + "step": 1151500 + }, + { + "epoch": 9.44, + "learning_rate": 6.245211987703235e-06, + "loss": 3.0199, + "step": 1152000 + }, + { + "epoch": 9.44, + "learning_rate": 6.2435680349833135e-06, + "loss": 3.0159, + "step": 1152500 + }, + { + "epoch": 9.45, + "learning_rate": 6.2419240822633944e-06, + "loss": 3.0192, + "step": 1153000 + }, + { + "epoch": 9.45, + "learning_rate": 6.2402801295434746e-06, + "loss": 3.0151, + "step": 1153500 + }, + { + "epoch": 9.45, + "learning_rate": 6.238636176823555e-06, + "loss": 3.0198, + "step": 1154000 + }, + { + "epoch": 9.46, + "learning_rate": 6.236992224103635e-06, + "loss": 3.0142, + "step": 1154500 + }, + { + "epoch": 9.46, + "learning_rate": 6.235348271383716e-06, + "loss": 3.0177, + "step": 1155000 + }, + { + "epoch": 9.47, + "learning_rate": 6.233704318663796e-06, + "loss": 3.0148, + "step": 1155500 + }, + { + "epoch": 9.47, + "learning_rate": 6.232060365943876e-06, + "loss": 3.0101, + "step": 1156000 + }, + { + "epoch": 9.48, + "learning_rate": 6.230416413223956e-06, + "loss": 3.0113, + "step": 1156500 + }, + { + "epoch": 9.48, + "learning_rate": 6.228772460504036e-06, + "loss": 3.0084, + "step": 1157000 + }, + { + "epoch": 9.48, + "learning_rate": 6.227128507784117e-06, + "loss": 3.0133, + "step": 1157500 + }, + { + "epoch": 9.49, + "learning_rate": 6.225484555064197e-06, + "loss": 3.0143, + "step": 1158000 + }, + { + "epoch": 9.49, + "learning_rate": 6.223840602344277e-06, + "loss": 3.0167, + "step": 1158500 + }, + { + "epoch": 9.5, + "learning_rate": 6.222196649624357e-06, + "loss": 3.0136, + "step": 1159000 + }, + { + "epoch": 9.5, + "learning_rate": 6.220552696904437e-06, + "loss": 3.012, + "step": 1159500 + }, + { + "epoch": 9.5, + "learning_rate": 6.218908744184518e-06, + "loss": 3.012, + "step": 1160000 + }, + { + "epoch": 9.51, + "learning_rate": 6.217264791464598e-06, + "loss": 3.0111, + "step": 1160500 + }, + { + "epoch": 9.51, + "learning_rate": 6.215620838744678e-06, + "loss": 3.0147, + "step": 1161000 + }, + { + "epoch": 9.52, + "learning_rate": 6.213976886024758e-06, + "loss": 3.015, + "step": 1161500 + }, + { + "epoch": 9.52, + "learning_rate": 6.212332933304838e-06, + "loss": 3.017, + "step": 1162000 + }, + { + "epoch": 9.52, + "learning_rate": 6.210688980584919e-06, + "loss": 3.0108, + "step": 1162500 + }, + { + "epoch": 9.53, + "learning_rate": 6.2090450278649995e-06, + "loss": 3.0125, + "step": 1163000 + }, + { + "epoch": 9.53, + "learning_rate": 6.20740107514508e-06, + "loss": 3.0126, + "step": 1163500 + }, + { + "epoch": 9.54, + "learning_rate": 6.20575712242516e-06, + "loss": 3.0209, + "step": 1164000 + }, + { + "epoch": 9.54, + "learning_rate": 6.204113169705239e-06, + "loss": 3.0089, + "step": 1164500 + }, + { + "epoch": 9.54, + "learning_rate": 6.202469216985319e-06, + "loss": 3.0135, + "step": 1165000 + }, + { + "epoch": 9.55, + "learning_rate": 6.2008252642654e-06, + "loss": 3.011, + "step": 1165500 + }, + { + "epoch": 9.55, + "learning_rate": 6.19918131154548e-06, + "loss": 3.0125, + "step": 1166000 + }, + { + "epoch": 9.56, + "learning_rate": 6.19753735882556e-06, + "loss": 3.0127, + "step": 1166500 + }, + { + "epoch": 9.56, + "learning_rate": 6.1958934061056405e-06, + "loss": 3.0144, + "step": 1167000 + }, + { + "epoch": 9.57, + "learning_rate": 6.1942494533857215e-06, + "loss": 3.0174, + "step": 1167500 + }, + { + "epoch": 9.57, + "learning_rate": 6.192605500665802e-06, + "loss": 3.0088, + "step": 1168000 + }, + { + "epoch": 9.57, + "learning_rate": 6.190961547945882e-06, + "loss": 3.0135, + "step": 1168500 + }, + { + "epoch": 9.58, + "learning_rate": 6.189317595225962e-06, + "loss": 3.0141, + "step": 1169000 + }, + { + "epoch": 9.58, + "learning_rate": 6.187673642506042e-06, + "loss": 3.0075, + "step": 1169500 + }, + { + "epoch": 9.59, + "learning_rate": 6.186029689786123e-06, + "loss": 3.019, + "step": 1170000 + }, + { + "epoch": 9.59, + "eval_accuracy": 0.4878472453037176, + "eval_loss": 2.873086929321289, + "eval_runtime": 405.404, + "eval_samples_per_second": 760.614, + "eval_steps_per_second": 15.848, + "step": 1170000 + }, + { + "epoch": 9.59, + "learning_rate": 6.184385737066202e-06, + "loss": 3.0057, + "step": 1170500 + }, + { + "epoch": 9.59, + "learning_rate": 6.182741784346282e-06, + "loss": 3.0136, + "step": 1171000 + }, + { + "epoch": 9.6, + "learning_rate": 6.1810978316263625e-06, + "loss": 3.0157, + "step": 1171500 + }, + { + "epoch": 9.6, + "learning_rate": 6.179453878906443e-06, + "loss": 3.0087, + "step": 1172000 + }, + { + "epoch": 9.61, + "learning_rate": 6.177809926186524e-06, + "loss": 3.0223, + "step": 1172500 + }, + { + "epoch": 9.61, + "learning_rate": 6.176165973466604e-06, + "loss": 3.0064, + "step": 1173000 + }, + { + "epoch": 9.61, + "learning_rate": 6.174522020746684e-06, + "loss": 3.0084, + "step": 1173500 + }, + { + "epoch": 9.62, + "learning_rate": 6.172878068026764e-06, + "loss": 3.0118, + "step": 1174000 + }, + { + "epoch": 9.62, + "learning_rate": 6.171234115306844e-06, + "loss": 3.0115, + "step": 1174500 + }, + { + "epoch": 9.63, + "learning_rate": 6.169590162586925e-06, + "loss": 3.0137, + "step": 1175000 + }, + { + "epoch": 9.63, + "learning_rate": 6.167946209867005e-06, + "loss": 3.0106, + "step": 1175500 + }, + { + "epoch": 9.63, + "learning_rate": 6.166302257147085e-06, + "loss": 3.0104, + "step": 1176000 + }, + { + "epoch": 9.64, + "learning_rate": 6.1646583044271655e-06, + "loss": 3.0126, + "step": 1176500 + }, + { + "epoch": 9.64, + "learning_rate": 6.163014351707245e-06, + "loss": 3.0086, + "step": 1177000 + }, + { + "epoch": 9.65, + "learning_rate": 6.161370398987325e-06, + "loss": 3.0148, + "step": 1177500 + }, + { + "epoch": 9.65, + "learning_rate": 6.159726446267406e-06, + "loss": 3.0146, + "step": 1178000 + }, + { + "epoch": 9.66, + "learning_rate": 6.158082493547486e-06, + "loss": 3.0155, + "step": 1178500 + }, + { + "epoch": 9.66, + "learning_rate": 6.156438540827566e-06, + "loss": 3.012, + "step": 1179000 + }, + { + "epoch": 9.66, + "learning_rate": 6.154794588107646e-06, + "loss": 3.0118, + "step": 1179500 + }, + { + "epoch": 9.67, + "learning_rate": 6.153150635387727e-06, + "loss": 3.0086, + "step": 1180000 + }, + { + "epoch": 9.67, + "learning_rate": 6.151506682667807e-06, + "loss": 3.0139, + "step": 1180500 + }, + { + "epoch": 9.68, + "learning_rate": 6.1498627299478875e-06, + "loss": 3.0105, + "step": 1181000 + }, + { + "epoch": 9.68, + "learning_rate": 6.148218777227968e-06, + "loss": 3.012, + "step": 1181500 + }, + { + "epoch": 9.68, + "learning_rate": 6.146574824508048e-06, + "loss": 3.0096, + "step": 1182000 + }, + { + "epoch": 9.69, + "learning_rate": 6.144930871788129e-06, + "loss": 3.0109, + "step": 1182500 + }, + { + "epoch": 9.69, + "learning_rate": 6.143286919068208e-06, + "loss": 3.0119, + "step": 1183000 + }, + { + "epoch": 9.7, + "learning_rate": 6.141642966348288e-06, + "loss": 3.0086, + "step": 1183500 + }, + { + "epoch": 9.7, + "learning_rate": 6.139999013628368e-06, + "loss": 3.014, + "step": 1184000 + }, + { + "epoch": 9.7, + "learning_rate": 6.138355060908448e-06, + "loss": 3.0107, + "step": 1184500 + }, + { + "epoch": 9.71, + "learning_rate": 6.1367111081885285e-06, + "loss": 3.0039, + "step": 1185000 + }, + { + "epoch": 9.71, + "learning_rate": 6.1350671554686094e-06, + "loss": 3.0066, + "step": 1185500 + }, + { + "epoch": 9.72, + "learning_rate": 6.1334232027486896e-06, + "loss": 3.0103, + "step": 1186000 + }, + { + "epoch": 9.72, + "learning_rate": 6.13177925002877e-06, + "loss": 3.0094, + "step": 1186500 + }, + { + "epoch": 9.72, + "learning_rate": 6.13013529730885e-06, + "loss": 3.011, + "step": 1187000 + }, + { + "epoch": 9.73, + "learning_rate": 6.128491344588931e-06, + "loss": 3.0094, + "step": 1187500 + }, + { + "epoch": 9.73, + "learning_rate": 6.126847391869011e-06, + "loss": 3.0036, + "step": 1188000 + }, + { + "epoch": 9.74, + "learning_rate": 6.125203439149091e-06, + "loss": 3.0096, + "step": 1188500 + }, + { + "epoch": 9.74, + "learning_rate": 6.12355948642917e-06, + "loss": 3.0105, + "step": 1189000 + }, + { + "epoch": 9.75, + "learning_rate": 6.1219155337092504e-06, + "loss": 3.008, + "step": 1189500 + }, + { + "epoch": 9.75, + "learning_rate": 6.1202715809893306e-06, + "loss": 3.0032, + "step": 1190000 + }, + { + "epoch": 9.75, + "learning_rate": 6.1186276282694115e-06, + "loss": 3.0091, + "step": 1190500 + }, + { + "epoch": 9.76, + "learning_rate": 6.116983675549492e-06, + "loss": 3.0035, + "step": 1191000 + }, + { + "epoch": 9.76, + "learning_rate": 6.115339722829572e-06, + "loss": 3.0091, + "step": 1191500 + }, + { + "epoch": 9.77, + "learning_rate": 6.113695770109652e-06, + "loss": 3.0013, + "step": 1192000 + }, + { + "epoch": 9.77, + "learning_rate": 6.112051817389733e-06, + "loss": 3.0043, + "step": 1192500 + }, + { + "epoch": 9.77, + "learning_rate": 6.110407864669813e-06, + "loss": 3.0081, + "step": 1193000 + }, + { + "epoch": 9.78, + "learning_rate": 6.108763911949893e-06, + "loss": 3.0055, + "step": 1193500 + }, + { + "epoch": 9.78, + "learning_rate": 6.107119959229973e-06, + "loss": 3.011, + "step": 1194000 + }, + { + "epoch": 9.79, + "learning_rate": 6.105476006510053e-06, + "loss": 3.0094, + "step": 1194500 + }, + { + "epoch": 9.79, + "learning_rate": 6.103832053790133e-06, + "loss": 3.0133, + "step": 1195000 + }, + { + "epoch": 9.79, + "learning_rate": 6.102188101070214e-06, + "loss": 3.0032, + "step": 1195500 + }, + { + "epoch": 9.8, + "learning_rate": 6.100544148350294e-06, + "loss": 3.0075, + "step": 1196000 + }, + { + "epoch": 9.8, + "learning_rate": 6.098900195630374e-06, + "loss": 3.0091, + "step": 1196500 + }, + { + "epoch": 9.81, + "learning_rate": 6.097256242910454e-06, + "loss": 3.011, + "step": 1197000 + }, + { + "epoch": 9.81, + "learning_rate": 6.095612290190534e-06, + "loss": 3.0039, + "step": 1197500 + }, + { + "epoch": 9.82, + "learning_rate": 6.093968337470615e-06, + "loss": 3.0036, + "step": 1198000 + }, + { + "epoch": 9.82, + "learning_rate": 6.092324384750695e-06, + "loss": 3.0069, + "step": 1198500 + }, + { + "epoch": 9.82, + "learning_rate": 6.090680432030775e-06, + "loss": 3.0114, + "step": 1199000 + }, + { + "epoch": 9.83, + "learning_rate": 6.0890364793108555e-06, + "loss": 3.006, + "step": 1199500 + }, + { + "epoch": 9.83, + "learning_rate": 6.0873925265909365e-06, + "loss": 3.0094, + "step": 1200000 + }, + { + "epoch": 9.83, + "eval_accuracy": 0.48851002828245305, + "eval_loss": 2.8687474727630615, + "eval_runtime": 404.2357, + "eval_samples_per_second": 762.812, + "eval_steps_per_second": 15.894, + "step": 1200000 + }, + { + "epoch": 9.84, + "learning_rate": 6.085748573871017e-06, + "loss": 2.9995, + "step": 1200500 + }, + { + "epoch": 9.84, + "learning_rate": 6.084104621151096e-06, + "loss": 3.0002, + "step": 1201000 + }, + { + "epoch": 9.84, + "learning_rate": 6.082460668431176e-06, + "loss": 3.0047, + "step": 1201500 + }, + { + "epoch": 9.85, + "learning_rate": 6.080816715711256e-06, + "loss": 3.0093, + "step": 1202000 + }, + { + "epoch": 9.85, + "learning_rate": 6.079172762991336e-06, + "loss": 3.0018, + "step": 1202500 + }, + { + "epoch": 9.86, + "learning_rate": 6.077528810271417e-06, + "loss": 3.0067, + "step": 1203000 + }, + { + "epoch": 9.86, + "learning_rate": 6.075884857551497e-06, + "loss": 3.01, + "step": 1203500 + }, + { + "epoch": 9.86, + "learning_rate": 6.0742409048315775e-06, + "loss": 3.0108, + "step": 1204000 + }, + { + "epoch": 9.87, + "learning_rate": 6.072596952111658e-06, + "loss": 3.0089, + "step": 1204500 + }, + { + "epoch": 9.87, + "learning_rate": 6.070952999391739e-06, + "loss": 3.0047, + "step": 1205000 + }, + { + "epoch": 9.88, + "learning_rate": 6.069309046671819e-06, + "loss": 3.0057, + "step": 1205500 + }, + { + "epoch": 9.88, + "learning_rate": 6.067665093951899e-06, + "loss": 3.006, + "step": 1206000 + }, + { + "epoch": 9.88, + "learning_rate": 6.066021141231979e-06, + "loss": 3.0104, + "step": 1206500 + }, + { + "epoch": 9.89, + "learning_rate": 6.064377188512058e-06, + "loss": 3.0047, + "step": 1207000 + }, + { + "epoch": 9.89, + "learning_rate": 6.062733235792138e-06, + "loss": 3.0062, + "step": 1207500 + }, + { + "epoch": 9.9, + "learning_rate": 6.061089283072219e-06, + "loss": 3.0086, + "step": 1208000 + }, + { + "epoch": 9.9, + "learning_rate": 6.0594453303522995e-06, + "loss": 3.0032, + "step": 1208500 + }, + { + "epoch": 9.91, + "learning_rate": 6.05780137763238e-06, + "loss": 3.0042, + "step": 1209000 + }, + { + "epoch": 9.91, + "learning_rate": 6.05615742491246e-06, + "loss": 3.0064, + "step": 1209500 + }, + { + "epoch": 9.91, + "learning_rate": 6.05451347219254e-06, + "loss": 3.0005, + "step": 1210000 + }, + { + "epoch": 9.92, + "learning_rate": 6.052869519472621e-06, + "loss": 3.0043, + "step": 1210500 + }, + { + "epoch": 9.92, + "learning_rate": 6.051225566752701e-06, + "loss": 3.0002, + "step": 1211000 + }, + { + "epoch": 9.93, + "learning_rate": 6.049581614032781e-06, + "loss": 3.011, + "step": 1211500 + }, + { + "epoch": 9.93, + "learning_rate": 6.047937661312861e-06, + "loss": 3.0027, + "step": 1212000 + }, + { + "epoch": 9.93, + "learning_rate": 6.046293708592942e-06, + "loss": 3.0036, + "step": 1212500 + }, + { + "epoch": 9.94, + "learning_rate": 6.044649755873021e-06, + "loss": 3.0021, + "step": 1213000 + }, + { + "epoch": 9.94, + "learning_rate": 6.043005803153102e-06, + "loss": 2.9972, + "step": 1213500 + }, + { + "epoch": 9.95, + "learning_rate": 6.041361850433182e-06, + "loss": 3.0094, + "step": 1214000 + }, + { + "epoch": 9.95, + "learning_rate": 6.039717897713262e-06, + "loss": 3.0087, + "step": 1214500 + }, + { + "epoch": 9.95, + "learning_rate": 6.038073944993342e-06, + "loss": 3.0098, + "step": 1215000 + }, + { + "epoch": 9.96, + "learning_rate": 6.036429992273423e-06, + "loss": 3.0037, + "step": 1215500 + }, + { + "epoch": 9.96, + "learning_rate": 6.034786039553503e-06, + "loss": 3.0057, + "step": 1216000 + }, + { + "epoch": 9.97, + "learning_rate": 6.033142086833583e-06, + "loss": 3.0026, + "step": 1216500 + }, + { + "epoch": 9.97, + "learning_rate": 6.031498134113663e-06, + "loss": 3.0057, + "step": 1217000 + }, + { + "epoch": 9.97, + "learning_rate": 6.029854181393744e-06, + "loss": 3.0037, + "step": 1217500 + }, + { + "epoch": 9.98, + "learning_rate": 6.0282102286738244e-06, + "loss": 3.0066, + "step": 1218000 + }, + { + "epoch": 9.98, + "learning_rate": 6.0265662759539046e-06, + "loss": 3.0039, + "step": 1218500 + }, + { + "epoch": 9.99, + "learning_rate": 6.024922323233984e-06, + "loss": 3.0073, + "step": 1219000 + }, + { + "epoch": 9.99, + "learning_rate": 6.023278370514064e-06, + "loss": 2.9989, + "step": 1219500 + }, + { + "epoch": 10.0, + "learning_rate": 6.021634417794144e-06, + "loss": 3.0029, + "step": 1220000 + }, + { + "epoch": 10.0, + "learning_rate": 6.019990465074225e-06, + "loss": 2.9987, + "step": 1220500 + }, + { + "epoch": 10.0, + "learning_rate": 6.018346512354305e-06, + "loss": 3.0004, + "step": 1221000 + }, + { + "epoch": 10.01, + "learning_rate": 6.016702559634385e-06, + "loss": 3.006, + "step": 1221500 + }, + { + "epoch": 10.01, + "learning_rate": 6.0150586069144654e-06, + "loss": 3.0002, + "step": 1222000 + }, + { + "epoch": 10.02, + "learning_rate": 6.0134146541945456e-06, + "loss": 3.0044, + "step": 1222500 + }, + { + "epoch": 10.02, + "learning_rate": 6.0117707014746265e-06, + "loss": 2.9957, + "step": 1223000 + }, + { + "epoch": 10.02, + "learning_rate": 6.010126748754707e-06, + "loss": 3.0001, + "step": 1223500 + }, + { + "epoch": 10.03, + "learning_rate": 6.008482796034787e-06, + "loss": 3.0058, + "step": 1224000 + }, + { + "epoch": 10.03, + "learning_rate": 6.006838843314867e-06, + "loss": 2.9946, + "step": 1224500 + }, + { + "epoch": 10.04, + "learning_rate": 6.005194890594948e-06, + "loss": 3.0, + "step": 1225000 + }, + { + "epoch": 10.04, + "learning_rate": 6.003550937875026e-06, + "loss": 2.9992, + "step": 1225500 + }, + { + "epoch": 10.04, + "learning_rate": 6.001906985155107e-06, + "loss": 2.9963, + "step": 1226000 + }, + { + "epoch": 10.05, + "learning_rate": 6.0002630324351874e-06, + "loss": 2.9928, + "step": 1226500 + }, + { + "epoch": 10.05, + "learning_rate": 5.9986190797152676e-06, + "loss": 3.0001, + "step": 1227000 + }, + { + "epoch": 10.06, + "learning_rate": 5.996975126995348e-06, + "loss": 3.0003, + "step": 1227500 + }, + { + "epoch": 10.06, + "learning_rate": 5.995331174275429e-06, + "loss": 2.9989, + "step": 1228000 + }, + { + "epoch": 10.06, + "learning_rate": 5.993687221555509e-06, + "loss": 2.9957, + "step": 1228500 + }, + { + "epoch": 10.07, + "learning_rate": 5.992043268835589e-06, + "loss": 3.0049, + "step": 1229000 + }, + { + "epoch": 10.07, + "learning_rate": 5.990399316115669e-06, + "loss": 2.9962, + "step": 1229500 + }, + { + "epoch": 10.08, + "learning_rate": 5.988755363395749e-06, + "loss": 3.0065, + "step": 1230000 + }, + { + "epoch": 10.08, + "eval_accuracy": 0.48933464137984234, + "eval_loss": 2.8635189533233643, + "eval_runtime": 406.4876, + "eval_samples_per_second": 758.587, + "eval_steps_per_second": 15.806, + "step": 1230000 + }, + { + "epoch": 10.08, + "learning_rate": 5.98711141067583e-06, + "loss": 2.998, + "step": 1230500 + }, + { + "epoch": 10.09, + "learning_rate": 5.98546745795591e-06, + "loss": 3.0041, + "step": 1231000 + }, + { + "epoch": 10.09, + "learning_rate": 5.9838235052359895e-06, + "loss": 3.0004, + "step": 1231500 + }, + { + "epoch": 10.09, + "learning_rate": 5.98217955251607e-06, + "loss": 3.0011, + "step": 1232000 + }, + { + "epoch": 10.1, + "learning_rate": 5.98053559979615e-06, + "loss": 3.0099, + "step": 1232500 + }, + { + "epoch": 10.1, + "learning_rate": 5.978891647076231e-06, + "loss": 3.0018, + "step": 1233000 + }, + { + "epoch": 10.11, + "learning_rate": 5.977247694356311e-06, + "loss": 2.9946, + "step": 1233500 + }, + { + "epoch": 10.11, + "learning_rate": 5.975603741636391e-06, + "loss": 3.0006, + "step": 1234000 + }, + { + "epoch": 10.11, + "learning_rate": 5.973959788916471e-06, + "loss": 2.9976, + "step": 1234500 + }, + { + "epoch": 10.12, + "learning_rate": 5.972315836196551e-06, + "loss": 2.9933, + "step": 1235000 + }, + { + "epoch": 10.12, + "learning_rate": 5.970671883476632e-06, + "loss": 2.997, + "step": 1235500 + }, + { + "epoch": 10.13, + "learning_rate": 5.969027930756712e-06, + "loss": 3.0016, + "step": 1236000 + }, + { + "epoch": 10.13, + "learning_rate": 5.9673839780367925e-06, + "loss": 2.9995, + "step": 1236500 + }, + { + "epoch": 10.13, + "learning_rate": 5.965740025316873e-06, + "loss": 3.0035, + "step": 1237000 + }, + { + "epoch": 10.14, + "learning_rate": 5.964096072596952e-06, + "loss": 3.002, + "step": 1237500 + }, + { + "epoch": 10.14, + "learning_rate": 5.962452119877032e-06, + "loss": 2.9986, + "step": 1238000 + }, + { + "epoch": 10.15, + "learning_rate": 5.960808167157113e-06, + "loss": 3.001, + "step": 1238500 + }, + { + "epoch": 10.15, + "learning_rate": 5.959164214437193e-06, + "loss": 2.9956, + "step": 1239000 + }, + { + "epoch": 10.16, + "learning_rate": 5.957520261717273e-06, + "loss": 3.0005, + "step": 1239500 + }, + { + "epoch": 10.16, + "learning_rate": 5.955876308997353e-06, + "loss": 2.9997, + "step": 1240000 + }, + { + "epoch": 10.16, + "learning_rate": 5.954232356277434e-06, + "loss": 2.9939, + "step": 1240500 + }, + { + "epoch": 10.17, + "learning_rate": 5.9525884035575145e-06, + "loss": 2.9964, + "step": 1241000 + }, + { + "epoch": 10.17, + "learning_rate": 5.950944450837595e-06, + "loss": 2.9955, + "step": 1241500 + }, + { + "epoch": 10.18, + "learning_rate": 5.949300498117675e-06, + "loss": 3.0, + "step": 1242000 + }, + { + "epoch": 10.18, + "learning_rate": 5.947656545397755e-06, + "loss": 3.0017, + "step": 1242500 + }, + { + "epoch": 10.18, + "learning_rate": 5.946012592677836e-06, + "loss": 3.0005, + "step": 1243000 + }, + { + "epoch": 10.19, + "learning_rate": 5.944368639957915e-06, + "loss": 2.9954, + "step": 1243500 + }, + { + "epoch": 10.19, + "learning_rate": 5.942724687237995e-06, + "loss": 2.9982, + "step": 1244000 + }, + { + "epoch": 10.2, + "learning_rate": 5.941080734518075e-06, + "loss": 3.0029, + "step": 1244500 + }, + { + "epoch": 10.2, + "learning_rate": 5.9394367817981555e-06, + "loss": 2.9995, + "step": 1245000 + }, + { + "epoch": 10.2, + "learning_rate": 5.9377928290782365e-06, + "loss": 3.0004, + "step": 1245500 + }, + { + "epoch": 10.21, + "learning_rate": 5.936148876358317e-06, + "loss": 3.0033, + "step": 1246000 + }, + { + "epoch": 10.21, + "learning_rate": 5.934504923638397e-06, + "loss": 2.9923, + "step": 1246500 + }, + { + "epoch": 10.22, + "learning_rate": 5.932860970918477e-06, + "loss": 2.9955, + "step": 1247000 + }, + { + "epoch": 10.22, + "learning_rate": 5.931217018198557e-06, + "loss": 2.9977, + "step": 1247500 + }, + { + "epoch": 10.22, + "learning_rate": 5.929573065478638e-06, + "loss": 2.9962, + "step": 1248000 + }, + { + "epoch": 10.23, + "learning_rate": 5.927929112758718e-06, + "loss": 2.9916, + "step": 1248500 + }, + { + "epoch": 10.23, + "learning_rate": 5.926285160038798e-06, + "loss": 2.9935, + "step": 1249000 + }, + { + "epoch": 10.24, + "learning_rate": 5.9246412073188775e-06, + "loss": 2.9927, + "step": 1249500 + }, + { + "epoch": 10.24, + "learning_rate": 5.922997254598958e-06, + "loss": 2.9976, + "step": 1250000 + }, + { + "epoch": 10.25, + "learning_rate": 5.921353301879038e-06, + "loss": 2.9944, + "step": 1250500 + }, + { + "epoch": 10.25, + "learning_rate": 5.919709349159119e-06, + "loss": 3.0006, + "step": 1251000 + }, + { + "epoch": 10.25, + "learning_rate": 5.918065396439199e-06, + "loss": 2.9983, + "step": 1251500 + }, + { + "epoch": 10.26, + "learning_rate": 5.916421443719279e-06, + "loss": 2.9885, + "step": 1252000 + }, + { + "epoch": 10.26, + "learning_rate": 5.914777490999359e-06, + "loss": 2.9992, + "step": 1252500 + }, + { + "epoch": 10.27, + "learning_rate": 5.91313353827944e-06, + "loss": 3.0021, + "step": 1253000 + }, + { + "epoch": 10.27, + "learning_rate": 5.91148958555952e-06, + "loss": 2.9901, + "step": 1253500 + }, + { + "epoch": 10.27, + "learning_rate": 5.9098456328396e-06, + "loss": 2.9947, + "step": 1254000 + }, + { + "epoch": 10.28, + "learning_rate": 5.9082016801196804e-06, + "loss": 2.9951, + "step": 1254500 + }, + { + "epoch": 10.28, + "learning_rate": 5.9065577273997606e-06, + "loss": 2.9973, + "step": 1255000 + }, + { + "epoch": 10.29, + "learning_rate": 5.90491377467984e-06, + "loss": 2.9943, + "step": 1255500 + }, + { + "epoch": 10.29, + "learning_rate": 5.903269821959921e-06, + "loss": 2.9956, + "step": 1256000 + }, + { + "epoch": 10.29, + "learning_rate": 5.901625869240001e-06, + "loss": 2.9963, + "step": 1256500 + }, + { + "epoch": 10.3, + "learning_rate": 5.899981916520081e-06, + "loss": 2.9933, + "step": 1257000 + }, + { + "epoch": 10.3, + "learning_rate": 5.898337963800161e-06, + "loss": 2.9907, + "step": 1257500 + }, + { + "epoch": 10.31, + "learning_rate": 5.896694011080241e-06, + "loss": 2.9903, + "step": 1258000 + }, + { + "epoch": 10.31, + "learning_rate": 5.895050058360322e-06, + "loss": 2.9945, + "step": 1258500 + }, + { + "epoch": 10.31, + "learning_rate": 5.8934061056404024e-06, + "loss": 2.9894, + "step": 1259000 + }, + { + "epoch": 10.32, + "learning_rate": 5.8917621529204826e-06, + "loss": 2.9936, + "step": 1259500 + }, + { + "epoch": 10.32, + "learning_rate": 5.890118200200563e-06, + "loss": 2.9983, + "step": 1260000 + }, + { + "epoch": 10.32, + "eval_accuracy": 0.4899989186098096, + "eval_loss": 2.856139659881592, + "eval_runtime": 405.6967, + "eval_samples_per_second": 760.065, + "eval_steps_per_second": 15.837, + "step": 1260000 + }, + { + "epoch": 10.33, + "learning_rate": 5.888474247480644e-06, + "loss": 2.9992, + "step": 1260500 + }, + { + "epoch": 10.33, + "learning_rate": 5.886830294760724e-06, + "loss": 2.9973, + "step": 1261000 + }, + { + "epoch": 10.34, + "learning_rate": 5.885186342040803e-06, + "loss": 2.996, + "step": 1261500 + }, + { + "epoch": 10.34, + "learning_rate": 5.883542389320883e-06, + "loss": 2.9911, + "step": 1262000 + }, + { + "epoch": 10.34, + "learning_rate": 5.881898436600963e-06, + "loss": 2.9913, + "step": 1262500 + }, + { + "epoch": 10.35, + "learning_rate": 5.8802544838810434e-06, + "loss": 2.9906, + "step": 1263000 + }, + { + "epoch": 10.35, + "learning_rate": 5.878610531161124e-06, + "loss": 3.0007, + "step": 1263500 + }, + { + "epoch": 10.36, + "learning_rate": 5.8769665784412045e-06, + "loss": 2.9983, + "step": 1264000 + }, + { + "epoch": 10.36, + "learning_rate": 5.875322625721285e-06, + "loss": 2.9929, + "step": 1264500 + }, + { + "epoch": 10.36, + "learning_rate": 5.873678673001365e-06, + "loss": 2.999, + "step": 1265000 + }, + { + "epoch": 10.37, + "learning_rate": 5.872034720281446e-06, + "loss": 2.9903, + "step": 1265500 + }, + { + "epoch": 10.37, + "learning_rate": 5.870390767561526e-06, + "loss": 2.9906, + "step": 1266000 + }, + { + "epoch": 10.38, + "learning_rate": 5.868746814841606e-06, + "loss": 2.99, + "step": 1266500 + }, + { + "epoch": 10.38, + "learning_rate": 5.867102862121686e-06, + "loss": 2.9936, + "step": 1267000 + }, + { + "epoch": 10.38, + "learning_rate": 5.8654589094017654e-06, + "loss": 2.9955, + "step": 1267500 + }, + { + "epoch": 10.39, + "learning_rate": 5.8638149566818456e-06, + "loss": 2.9949, + "step": 1268000 + }, + { + "epoch": 10.39, + "learning_rate": 5.8621710039619265e-06, + "loss": 2.9959, + "step": 1268500 + }, + { + "epoch": 10.4, + "learning_rate": 5.860527051242007e-06, + "loss": 2.9974, + "step": 1269000 + }, + { + "epoch": 10.4, + "learning_rate": 5.858883098522087e-06, + "loss": 2.9972, + "step": 1269500 + }, + { + "epoch": 10.4, + "learning_rate": 5.857239145802167e-06, + "loss": 2.9952, + "step": 1270000 + }, + { + "epoch": 10.41, + "learning_rate": 5.855595193082247e-06, + "loss": 2.9968, + "step": 1270500 + }, + { + "epoch": 10.41, + "learning_rate": 5.853951240362328e-06, + "loss": 2.9893, + "step": 1271000 + }, + { + "epoch": 10.42, + "learning_rate": 5.852307287642408e-06, + "loss": 2.9974, + "step": 1271500 + }, + { + "epoch": 10.42, + "learning_rate": 5.850663334922488e-06, + "loss": 2.9862, + "step": 1272000 + }, + { + "epoch": 10.43, + "learning_rate": 5.849019382202568e-06, + "loss": 2.9944, + "step": 1272500 + }, + { + "epoch": 10.43, + "learning_rate": 5.847375429482649e-06, + "loss": 2.9933, + "step": 1273000 + }, + { + "epoch": 10.43, + "learning_rate": 5.845731476762729e-06, + "loss": 2.993, + "step": 1273500 + }, + { + "epoch": 10.44, + "learning_rate": 5.844087524042809e-06, + "loss": 2.9942, + "step": 1274000 + }, + { + "epoch": 10.44, + "learning_rate": 5.842443571322889e-06, + "loss": 2.9952, + "step": 1274500 + }, + { + "epoch": 10.45, + "learning_rate": 5.840799618602969e-06, + "loss": 2.9978, + "step": 1275000 + }, + { + "epoch": 10.45, + "learning_rate": 5.839155665883049e-06, + "loss": 2.9936, + "step": 1275500 + }, + { + "epoch": 10.45, + "learning_rate": 5.83751171316313e-06, + "loss": 2.9934, + "step": 1276000 + }, + { + "epoch": 10.46, + "learning_rate": 5.83586776044321e-06, + "loss": 2.9946, + "step": 1276500 + }, + { + "epoch": 10.46, + "learning_rate": 5.83422380772329e-06, + "loss": 2.9954, + "step": 1277000 + }, + { + "epoch": 10.47, + "learning_rate": 5.8325798550033705e-06, + "loss": 2.9865, + "step": 1277500 + }, + { + "epoch": 10.47, + "learning_rate": 5.8309359022834515e-06, + "loss": 2.9931, + "step": 1278000 + }, + { + "epoch": 10.47, + "learning_rate": 5.829291949563532e-06, + "loss": 2.9937, + "step": 1278500 + }, + { + "epoch": 10.48, + "learning_rate": 5.827647996843612e-06, + "loss": 2.9888, + "step": 1279000 + }, + { + "epoch": 10.48, + "learning_rate": 5.826004044123692e-06, + "loss": 2.9879, + "step": 1279500 + }, + { + "epoch": 10.49, + "learning_rate": 5.824360091403771e-06, + "loss": 2.9912, + "step": 1280000 + }, + { + "epoch": 10.49, + "learning_rate": 5.822716138683851e-06, + "loss": 3.0024, + "step": 1280500 + }, + { + "epoch": 10.5, + "learning_rate": 5.821072185963932e-06, + "loss": 2.9928, + "step": 1281000 + }, + { + "epoch": 10.5, + "learning_rate": 5.819428233244012e-06, + "loss": 2.9852, + "step": 1281500 + }, + { + "epoch": 10.5, + "learning_rate": 5.8177842805240925e-06, + "loss": 2.9819, + "step": 1282000 + }, + { + "epoch": 10.51, + "learning_rate": 5.816140327804173e-06, + "loss": 2.9923, + "step": 1282500 + }, + { + "epoch": 10.51, + "learning_rate": 5.814496375084253e-06, + "loss": 2.9913, + "step": 1283000 + }, + { + "epoch": 10.52, + "learning_rate": 5.812852422364334e-06, + "loss": 2.985, + "step": 1283500 + }, + { + "epoch": 10.52, + "learning_rate": 5.811208469644414e-06, + "loss": 2.9962, + "step": 1284000 + }, + { + "epoch": 10.52, + "learning_rate": 5.809564516924494e-06, + "loss": 2.9926, + "step": 1284500 + }, + { + "epoch": 10.53, + "learning_rate": 5.807920564204574e-06, + "loss": 2.9897, + "step": 1285000 + }, + { + "epoch": 10.53, + "learning_rate": 5.806276611484655e-06, + "loss": 2.9893, + "step": 1285500 + }, + { + "epoch": 10.54, + "learning_rate": 5.8046326587647335e-06, + "loss": 2.9912, + "step": 1286000 + }, + { + "epoch": 10.54, + "learning_rate": 5.8029887060448145e-06, + "loss": 2.9872, + "step": 1286500 + }, + { + "epoch": 10.54, + "learning_rate": 5.801344753324895e-06, + "loss": 2.9919, + "step": 1287000 + }, + { + "epoch": 10.55, + "learning_rate": 5.799700800604975e-06, + "loss": 2.991, + "step": 1287500 + }, + { + "epoch": 10.55, + "learning_rate": 5.798056847885055e-06, + "loss": 2.9917, + "step": 1288000 + }, + { + "epoch": 10.56, + "learning_rate": 5.796412895165136e-06, + "loss": 2.9909, + "step": 1288500 + }, + { + "epoch": 10.56, + "learning_rate": 5.794768942445216e-06, + "loss": 2.988, + "step": 1289000 + }, + { + "epoch": 10.56, + "learning_rate": 5.793124989725296e-06, + "loss": 2.9947, + "step": 1289500 + }, + { + "epoch": 10.57, + "learning_rate": 5.791481037005376e-06, + "loss": 2.9834, + "step": 1290000 + }, + { + "epoch": 10.57, + "eval_accuracy": 0.490716405708734, + "eval_loss": 2.852367401123047, + "eval_runtime": 409.9329, + "eval_samples_per_second": 752.211, + "eval_steps_per_second": 15.673, + "step": 1290000 + }, + { + "epoch": 10.57, + "learning_rate": 5.789837084285457e-06, + "loss": 2.9875, + "step": 1290500 + }, + { + "epoch": 10.58, + "learning_rate": 5.788193131565537e-06, + "loss": 2.99, + "step": 1291000 + }, + { + "epoch": 10.58, + "learning_rate": 5.7865491788456174e-06, + "loss": 2.9915, + "step": 1291500 + }, + { + "epoch": 10.59, + "learning_rate": 5.784905226125697e-06, + "loss": 2.989, + "step": 1292000 + }, + { + "epoch": 10.59, + "learning_rate": 5.783261273405777e-06, + "loss": 2.9908, + "step": 1292500 + }, + { + "epoch": 10.59, + "learning_rate": 5.781617320685857e-06, + "loss": 2.9896, + "step": 1293000 + }, + { + "epoch": 10.6, + "learning_rate": 5.779973367965938e-06, + "loss": 2.9904, + "step": 1293500 + }, + { + "epoch": 10.6, + "learning_rate": 5.778329415246018e-06, + "loss": 2.9917, + "step": 1294000 + }, + { + "epoch": 10.61, + "learning_rate": 5.776685462526098e-06, + "loss": 2.9959, + "step": 1294500 + }, + { + "epoch": 10.61, + "learning_rate": 5.775041509806178e-06, + "loss": 2.9892, + "step": 1295000 + }, + { + "epoch": 10.61, + "learning_rate": 5.7733975570862584e-06, + "loss": 2.9959, + "step": 1295500 + }, + { + "epoch": 10.62, + "learning_rate": 5.771753604366339e-06, + "loss": 2.9883, + "step": 1296000 + }, + { + "epoch": 10.62, + "learning_rate": 5.7701096516464195e-06, + "loss": 2.9849, + "step": 1296500 + }, + { + "epoch": 10.63, + "learning_rate": 5.7684656989265e-06, + "loss": 2.99, + "step": 1297000 + }, + { + "epoch": 10.63, + "learning_rate": 5.76682174620658e-06, + "loss": 2.9891, + "step": 1297500 + }, + { + "epoch": 10.63, + "learning_rate": 5.765177793486659e-06, + "loss": 2.9913, + "step": 1298000 + }, + { + "epoch": 10.64, + "learning_rate": 5.763533840766739e-06, + "loss": 2.9844, + "step": 1298500 + }, + { + "epoch": 10.64, + "learning_rate": 5.76188988804682e-06, + "loss": 2.988, + "step": 1299000 + }, + { + "epoch": 10.65, + "learning_rate": 5.7602459353269e-06, + "loss": 2.9861, + "step": 1299500 + }, + { + "epoch": 10.65, + "learning_rate": 5.7586019826069804e-06, + "loss": 2.9871, + "step": 1300000 + }, + { + "epoch": 10.65, + "learning_rate": 5.7569580298870606e-06, + "loss": 2.9841, + "step": 1300500 + }, + { + "epoch": 10.66, + "learning_rate": 5.7553140771671415e-06, + "loss": 2.9844, + "step": 1301000 + }, + { + "epoch": 10.66, + "learning_rate": 5.753670124447222e-06, + "loss": 2.9895, + "step": 1301500 + }, + { + "epoch": 10.67, + "learning_rate": 5.752026171727302e-06, + "loss": 2.9843, + "step": 1302000 + }, + { + "epoch": 10.67, + "learning_rate": 5.750382219007382e-06, + "loss": 2.9913, + "step": 1302500 + }, + { + "epoch": 10.68, + "learning_rate": 5.748738266287462e-06, + "loss": 2.9902, + "step": 1303000 + }, + { + "epoch": 10.68, + "learning_rate": 5.747094313567543e-06, + "loss": 2.9856, + "step": 1303500 + }, + { + "epoch": 10.68, + "learning_rate": 5.745450360847622e-06, + "loss": 2.9884, + "step": 1304000 + }, + { + "epoch": 10.69, + "learning_rate": 5.743806408127702e-06, + "loss": 2.9866, + "step": 1304500 + }, + { + "epoch": 10.69, + "learning_rate": 5.7421624554077825e-06, + "loss": 2.9871, + "step": 1305000 + }, + { + "epoch": 10.7, + "learning_rate": 5.740518502687863e-06, + "loss": 2.9865, + "step": 1305500 + }, + { + "epoch": 10.7, + "learning_rate": 5.738874549967944e-06, + "loss": 2.9871, + "step": 1306000 + }, + { + "epoch": 10.7, + "learning_rate": 5.737230597248024e-06, + "loss": 2.9857, + "step": 1306500 + }, + { + "epoch": 10.71, + "learning_rate": 5.735586644528104e-06, + "loss": 2.9791, + "step": 1307000 + }, + { + "epoch": 10.71, + "learning_rate": 5.733942691808184e-06, + "loss": 2.9903, + "step": 1307500 + }, + { + "epoch": 10.72, + "learning_rate": 5.732298739088264e-06, + "loss": 2.9838, + "step": 1308000 + }, + { + "epoch": 10.72, + "learning_rate": 5.730654786368345e-06, + "loss": 2.9891, + "step": 1308500 + }, + { + "epoch": 10.72, + "learning_rate": 5.729010833648425e-06, + "loss": 2.9863, + "step": 1309000 + }, + { + "epoch": 10.73, + "learning_rate": 5.727366880928505e-06, + "loss": 2.9899, + "step": 1309500 + }, + { + "epoch": 10.73, + "learning_rate": 5.725722928208585e-06, + "loss": 2.9845, + "step": 1310000 + }, + { + "epoch": 10.74, + "learning_rate": 5.724078975488665e-06, + "loss": 2.9834, + "step": 1310500 + }, + { + "epoch": 10.74, + "learning_rate": 5.722435022768745e-06, + "loss": 2.9827, + "step": 1311000 + }, + { + "epoch": 10.74, + "learning_rate": 5.720791070048826e-06, + "loss": 2.9816, + "step": 1311500 + }, + { + "epoch": 10.75, + "learning_rate": 5.719147117328906e-06, + "loss": 2.9773, + "step": 1312000 + }, + { + "epoch": 10.75, + "learning_rate": 5.717503164608986e-06, + "loss": 2.9882, + "step": 1312500 + }, + { + "epoch": 10.76, + "learning_rate": 5.715859211889066e-06, + "loss": 2.9822, + "step": 1313000 + }, + { + "epoch": 10.76, + "learning_rate": 5.714215259169147e-06, + "loss": 2.9863, + "step": 1313500 + }, + { + "epoch": 10.77, + "learning_rate": 5.712571306449227e-06, + "loss": 2.9802, + "step": 1314000 + }, + { + "epoch": 10.77, + "learning_rate": 5.7109273537293075e-06, + "loss": 2.9899, + "step": 1314500 + }, + { + "epoch": 10.77, + "learning_rate": 5.709283401009388e-06, + "loss": 2.9936, + "step": 1315000 + }, + { + "epoch": 10.78, + "learning_rate": 5.707639448289468e-06, + "loss": 2.9852, + "step": 1315500 + }, + { + "epoch": 10.78, + "learning_rate": 5.705995495569547e-06, + "loss": 2.9875, + "step": 1316000 + }, + { + "epoch": 10.79, + "learning_rate": 5.704351542849628e-06, + "loss": 2.9867, + "step": 1316500 + }, + { + "epoch": 10.79, + "learning_rate": 5.702707590129708e-06, + "loss": 2.982, + "step": 1317000 + }, + { + "epoch": 10.79, + "learning_rate": 5.701063637409788e-06, + "loss": 2.9805, + "step": 1317500 + }, + { + "epoch": 10.8, + "learning_rate": 5.699419684689868e-06, + "loss": 2.9825, + "step": 1318000 + }, + { + "epoch": 10.8, + "learning_rate": 5.697775731969949e-06, + "loss": 2.9785, + "step": 1318500 + }, + { + "epoch": 10.81, + "learning_rate": 5.6961317792500295e-06, + "loss": 2.9884, + "step": 1319000 + }, + { + "epoch": 10.81, + "learning_rate": 5.69448782653011e-06, + "loss": 2.9854, + "step": 1319500 + }, + { + "epoch": 10.81, + "learning_rate": 5.69284387381019e-06, + "loss": 2.9873, + "step": 1320000 + }, + { + "epoch": 10.81, + "eval_accuracy": 0.4910966306213036, + "eval_loss": 2.8484363555908203, + "eval_runtime": 403.7174, + "eval_samples_per_second": 763.792, + "eval_steps_per_second": 15.915, + "step": 1320000 + }, + { + "epoch": 10.82, + "learning_rate": 5.69119992109027e-06, + "loss": 2.9893, + "step": 1320500 + }, + { + "epoch": 10.82, + "learning_rate": 5.689555968370351e-06, + "loss": 2.9849, + "step": 1321000 + }, + { + "epoch": 10.83, + "learning_rate": 5.687912015650431e-06, + "loss": 2.9878, + "step": 1321500 + }, + { + "epoch": 10.83, + "learning_rate": 5.68626806293051e-06, + "loss": 2.9807, + "step": 1322000 + }, + { + "epoch": 10.84, + "learning_rate": 5.68462411021059e-06, + "loss": 2.9778, + "step": 1322500 + }, + { + "epoch": 10.84, + "learning_rate": 5.6829801574906705e-06, + "loss": 2.9867, + "step": 1323000 + }, + { + "epoch": 10.84, + "learning_rate": 5.681336204770751e-06, + "loss": 2.9818, + "step": 1323500 + }, + { + "epoch": 10.85, + "learning_rate": 5.679692252050832e-06, + "loss": 2.9833, + "step": 1324000 + }, + { + "epoch": 10.85, + "learning_rate": 5.678048299330912e-06, + "loss": 2.982, + "step": 1324500 + }, + { + "epoch": 10.86, + "learning_rate": 5.676404346610992e-06, + "loss": 2.9867, + "step": 1325000 + }, + { + "epoch": 10.86, + "learning_rate": 5.674760393891072e-06, + "loss": 2.9878, + "step": 1325500 + }, + { + "epoch": 10.86, + "learning_rate": 5.673116441171153e-06, + "loss": 2.9814, + "step": 1326000 + }, + { + "epoch": 10.87, + "learning_rate": 5.671472488451233e-06, + "loss": 2.9849, + "step": 1326500 + }, + { + "epoch": 10.87, + "learning_rate": 5.669828535731313e-06, + "loss": 2.9777, + "step": 1327000 + }, + { + "epoch": 10.88, + "learning_rate": 5.668184583011393e-06, + "loss": 2.9794, + "step": 1327500 + }, + { + "epoch": 10.88, + "learning_rate": 5.6665406302914734e-06, + "loss": 2.983, + "step": 1328000 + }, + { + "epoch": 10.88, + "learning_rate": 5.664896677571553e-06, + "loss": 2.9901, + "step": 1328500 + }, + { + "epoch": 10.89, + "learning_rate": 5.663252724851634e-06, + "loss": 2.9791, + "step": 1329000 + }, + { + "epoch": 10.89, + "learning_rate": 5.661608772131714e-06, + "loss": 2.9856, + "step": 1329500 + }, + { + "epoch": 10.9, + "learning_rate": 5.659964819411794e-06, + "loss": 2.9838, + "step": 1330000 + }, + { + "epoch": 10.9, + "learning_rate": 5.658320866691874e-06, + "loss": 2.9832, + "step": 1330500 + }, + { + "epoch": 10.9, + "learning_rate": 5.656676913971954e-06, + "loss": 2.985, + "step": 1331000 + }, + { + "epoch": 10.91, + "learning_rate": 5.655032961252035e-06, + "loss": 2.9829, + "step": 1331500 + }, + { + "epoch": 10.91, + "learning_rate": 5.653389008532115e-06, + "loss": 2.9868, + "step": 1332000 + }, + { + "epoch": 10.92, + "learning_rate": 5.6517450558121954e-06, + "loss": 2.9833, + "step": 1332500 + }, + { + "epoch": 10.92, + "learning_rate": 5.6501011030922756e-06, + "loss": 2.9865, + "step": 1333000 + }, + { + "epoch": 10.93, + "learning_rate": 5.6484571503723565e-06, + "loss": 2.9829, + "step": 1333500 + }, + { + "epoch": 10.93, + "learning_rate": 5.646813197652437e-06, + "loss": 2.9747, + "step": 1334000 + }, + { + "epoch": 10.93, + "learning_rate": 5.645169244932516e-06, + "loss": 2.9909, + "step": 1334500 + }, + { + "epoch": 10.94, + "learning_rate": 5.643525292212596e-06, + "loss": 2.9836, + "step": 1335000 + }, + { + "epoch": 10.94, + "learning_rate": 5.641881339492676e-06, + "loss": 2.9859, + "step": 1335500 + }, + { + "epoch": 10.95, + "learning_rate": 5.640237386772756e-06, + "loss": 2.9849, + "step": 1336000 + }, + { + "epoch": 10.95, + "learning_rate": 5.638593434052837e-06, + "loss": 2.9826, + "step": 1336500 + }, + { + "epoch": 10.95, + "learning_rate": 5.636949481332917e-06, + "loss": 2.9834, + "step": 1337000 + }, + { + "epoch": 10.96, + "learning_rate": 5.6353055286129975e-06, + "loss": 2.9836, + "step": 1337500 + }, + { + "epoch": 10.96, + "learning_rate": 5.633661575893078e-06, + "loss": 2.9822, + "step": 1338000 + }, + { + "epoch": 10.97, + "learning_rate": 5.632017623173159e-06, + "loss": 2.9908, + "step": 1338500 + }, + { + "epoch": 10.97, + "learning_rate": 5.630373670453239e-06, + "loss": 2.9815, + "step": 1339000 + }, + { + "epoch": 10.97, + "learning_rate": 5.628729717733319e-06, + "loss": 2.9858, + "step": 1339500 + }, + { + "epoch": 10.98, + "learning_rate": 5.627085765013399e-06, + "loss": 2.9802, + "step": 1340000 + }, + { + "epoch": 10.98, + "learning_rate": 5.625441812293478e-06, + "loss": 2.9778, + "step": 1340500 + }, + { + "epoch": 10.99, + "learning_rate": 5.623797859573558e-06, + "loss": 2.9915, + "step": 1341000 + }, + { + "epoch": 10.99, + "learning_rate": 5.622153906853639e-06, + "loss": 2.9819, + "step": 1341500 + }, + { + "epoch": 10.99, + "learning_rate": 5.6205099541337195e-06, + "loss": 2.9803, + "step": 1342000 + }, + { + "epoch": 11.0, + "learning_rate": 5.6188660014138e-06, + "loss": 2.9769, + "step": 1342500 + }, + { + "epoch": 11.0, + "learning_rate": 5.61722204869388e-06, + "loss": 2.9809, + "step": 1343000 + }, + { + "epoch": 11.01, + "learning_rate": 5.61557809597396e-06, + "loss": 2.981, + "step": 1343500 + }, + { + "epoch": 11.01, + "learning_rate": 5.613934143254041e-06, + "loss": 2.9829, + "step": 1344000 + }, + { + "epoch": 11.02, + "learning_rate": 5.612290190534121e-06, + "loss": 2.9789, + "step": 1344500 + }, + { + "epoch": 11.02, + "learning_rate": 5.610646237814201e-06, + "loss": 2.9819, + "step": 1345000 + }, + { + "epoch": 11.02, + "learning_rate": 5.609002285094281e-06, + "loss": 2.978, + "step": 1345500 + }, + { + "epoch": 11.03, + "learning_rate": 5.607358332374362e-06, + "loss": 2.987, + "step": 1346000 + }, + { + "epoch": 11.03, + "learning_rate": 5.6057143796544415e-06, + "loss": 2.9808, + "step": 1346500 + }, + { + "epoch": 11.04, + "learning_rate": 5.604070426934522e-06, + "loss": 2.9786, + "step": 1347000 + }, + { + "epoch": 11.04, + "learning_rate": 5.602426474214602e-06, + "loss": 2.9847, + "step": 1347500 + }, + { + "epoch": 11.04, + "learning_rate": 5.600782521494682e-06, + "loss": 2.9829, + "step": 1348000 + }, + { + "epoch": 11.05, + "learning_rate": 5.599138568774762e-06, + "loss": 2.9851, + "step": 1348500 + }, + { + "epoch": 11.05, + "learning_rate": 5.597494616054843e-06, + "loss": 2.9856, + "step": 1349000 + }, + { + "epoch": 11.06, + "learning_rate": 5.595850663334923e-06, + "loss": 2.9804, + "step": 1349500 + }, + { + "epoch": 11.06, + "learning_rate": 5.594206710615003e-06, + "loss": 2.978, + "step": 1350000 + }, + { + "epoch": 11.06, + "eval_accuracy": 0.4923695335880713, + "eval_loss": 2.841365337371826, + "eval_runtime": 412.3409, + "eval_samples_per_second": 747.818, + "eval_steps_per_second": 15.582, + "step": 1350000 + }, + { + "epoch": 11.06, + "learning_rate": 5.592562757895083e-06, + "loss": 2.9795, + "step": 1350500 + }, + { + "epoch": 11.07, + "learning_rate": 5.590918805175164e-06, + "loss": 2.979, + "step": 1351000 + }, + { + "epoch": 11.07, + "learning_rate": 5.5892748524552445e-06, + "loss": 2.9805, + "step": 1351500 + }, + { + "epoch": 11.08, + "learning_rate": 5.587630899735325e-06, + "loss": 2.9765, + "step": 1352000 + }, + { + "epoch": 11.08, + "learning_rate": 5.585986947015404e-06, + "loss": 2.984, + "step": 1352500 + }, + { + "epoch": 11.08, + "learning_rate": 5.584342994295484e-06, + "loss": 2.9842, + "step": 1353000 + }, + { + "epoch": 11.09, + "learning_rate": 5.582699041575564e-06, + "loss": 2.9697, + "step": 1353500 + }, + { + "epoch": 11.09, + "learning_rate": 5.581055088855645e-06, + "loss": 2.9756, + "step": 1354000 + }, + { + "epoch": 11.1, + "learning_rate": 5.579411136135725e-06, + "loss": 2.974, + "step": 1354500 + }, + { + "epoch": 11.1, + "learning_rate": 5.577767183415805e-06, + "loss": 2.9776, + "step": 1355000 + }, + { + "epoch": 11.11, + "learning_rate": 5.5761232306958855e-06, + "loss": 2.9763, + "step": 1355500 + }, + { + "epoch": 11.11, + "learning_rate": 5.574479277975966e-06, + "loss": 2.9764, + "step": 1356000 + }, + { + "epoch": 11.11, + "learning_rate": 5.572835325256047e-06, + "loss": 2.9828, + "step": 1356500 + }, + { + "epoch": 11.12, + "learning_rate": 5.571191372536127e-06, + "loss": 2.9758, + "step": 1357000 + }, + { + "epoch": 11.12, + "learning_rate": 5.569547419816207e-06, + "loss": 2.979, + "step": 1357500 + }, + { + "epoch": 11.13, + "learning_rate": 5.567903467096287e-06, + "loss": 2.9827, + "step": 1358000 + }, + { + "epoch": 11.13, + "learning_rate": 5.566259514376366e-06, + "loss": 2.9817, + "step": 1358500 + }, + { + "epoch": 11.13, + "learning_rate": 5.564615561656446e-06, + "loss": 2.9774, + "step": 1359000 + }, + { + "epoch": 11.14, + "learning_rate": 5.562971608936527e-06, + "loss": 2.9756, + "step": 1359500 + }, + { + "epoch": 11.14, + "learning_rate": 5.5613276562166075e-06, + "loss": 2.9787, + "step": 1360000 + }, + { + "epoch": 11.15, + "learning_rate": 5.559683703496688e-06, + "loss": 2.9761, + "step": 1360500 + }, + { + "epoch": 11.15, + "learning_rate": 5.558039750776768e-06, + "loss": 2.9789, + "step": 1361000 + }, + { + "epoch": 11.15, + "learning_rate": 5.556395798056849e-06, + "loss": 2.9803, + "step": 1361500 + }, + { + "epoch": 11.16, + "learning_rate": 5.554751845336929e-06, + "loss": 2.9784, + "step": 1362000 + }, + { + "epoch": 11.16, + "learning_rate": 5.553107892617009e-06, + "loss": 2.9759, + "step": 1362500 + }, + { + "epoch": 11.17, + "learning_rate": 5.551463939897089e-06, + "loss": 2.9823, + "step": 1363000 + }, + { + "epoch": 11.17, + "learning_rate": 5.54981998717717e-06, + "loss": 2.9767, + "step": 1363500 + }, + { + "epoch": 11.18, + "learning_rate": 5.54817603445725e-06, + "loss": 2.978, + "step": 1364000 + }, + { + "epoch": 11.18, + "learning_rate": 5.5465320817373294e-06, + "loss": 2.9843, + "step": 1364500 + }, + { + "epoch": 11.18, + "learning_rate": 5.5448881290174096e-06, + "loss": 2.9708, + "step": 1365000 + }, + { + "epoch": 11.19, + "learning_rate": 5.54324417629749e-06, + "loss": 2.9756, + "step": 1365500 + }, + { + "epoch": 11.19, + "learning_rate": 5.54160022357757e-06, + "loss": 2.973, + "step": 1366000 + }, + { + "epoch": 11.2, + "learning_rate": 5.539956270857651e-06, + "loss": 2.981, + "step": 1366500 + }, + { + "epoch": 11.2, + "learning_rate": 5.538312318137731e-06, + "loss": 2.971, + "step": 1367000 + }, + { + "epoch": 11.2, + "learning_rate": 5.536668365417811e-06, + "loss": 2.9819, + "step": 1367500 + }, + { + "epoch": 11.21, + "learning_rate": 5.535024412697891e-06, + "loss": 2.975, + "step": 1368000 + }, + { + "epoch": 11.21, + "learning_rate": 5.533380459977971e-06, + "loss": 2.9697, + "step": 1368500 + }, + { + "epoch": 11.22, + "learning_rate": 5.531736507258052e-06, + "loss": 2.9745, + "step": 1369000 + }, + { + "epoch": 11.22, + "learning_rate": 5.530092554538132e-06, + "loss": 2.9812, + "step": 1369500 + }, + { + "epoch": 11.22, + "learning_rate": 5.5284486018182125e-06, + "loss": 2.9816, + "step": 1370000 + }, + { + "epoch": 11.23, + "learning_rate": 5.526804649098292e-06, + "loss": 2.9712, + "step": 1370500 + }, + { + "epoch": 11.23, + "learning_rate": 5.525160696378372e-06, + "loss": 2.9771, + "step": 1371000 + }, + { + "epoch": 11.24, + "learning_rate": 5.523516743658452e-06, + "loss": 2.9747, + "step": 1371500 + }, + { + "epoch": 11.24, + "learning_rate": 5.521872790938533e-06, + "loss": 2.9702, + "step": 1372000 + }, + { + "epoch": 11.24, + "learning_rate": 5.520228838218613e-06, + "loss": 2.9757, + "step": 1372500 + }, + { + "epoch": 11.25, + "learning_rate": 5.518584885498693e-06, + "loss": 2.9752, + "step": 1373000 + }, + { + "epoch": 11.25, + "learning_rate": 5.516940932778773e-06, + "loss": 2.9699, + "step": 1373500 + }, + { + "epoch": 11.26, + "learning_rate": 5.515296980058854e-06, + "loss": 2.9753, + "step": 1374000 + }, + { + "epoch": 11.26, + "learning_rate": 5.5136530273389345e-06, + "loss": 2.9799, + "step": 1374500 + }, + { + "epoch": 11.27, + "learning_rate": 5.512009074619015e-06, + "loss": 2.9813, + "step": 1375000 + }, + { + "epoch": 11.27, + "learning_rate": 5.510365121899095e-06, + "loss": 2.9736, + "step": 1375500 + }, + { + "epoch": 11.27, + "learning_rate": 5.508721169179175e-06, + "loss": 2.9802, + "step": 1376000 + }, + { + "epoch": 11.28, + "learning_rate": 5.507077216459256e-06, + "loss": 2.9686, + "step": 1376500 + }, + { + "epoch": 11.28, + "learning_rate": 5.505433263739335e-06, + "loss": 2.9786, + "step": 1377000 + }, + { + "epoch": 11.29, + "learning_rate": 5.503789311019415e-06, + "loss": 2.9808, + "step": 1377500 + }, + { + "epoch": 11.29, + "learning_rate": 5.502145358299495e-06, + "loss": 2.9748, + "step": 1378000 + }, + { + "epoch": 11.29, + "learning_rate": 5.5005014055795755e-06, + "loss": 2.9771, + "step": 1378500 + }, + { + "epoch": 11.3, + "learning_rate": 5.4988574528596565e-06, + "loss": 2.9765, + "step": 1379000 + }, + { + "epoch": 11.3, + "learning_rate": 5.497213500139737e-06, + "loss": 2.9745, + "step": 1379500 + }, + { + "epoch": 11.31, + "learning_rate": 5.495569547419817e-06, + "loss": 2.9709, + "step": 1380000 + }, + { + "epoch": 11.31, + "eval_accuracy": 0.4926664563281789, + "eval_loss": 2.8374874591827393, + "eval_runtime": 405.084, + "eval_samples_per_second": 761.215, + "eval_steps_per_second": 15.861, + "step": 1380000 + }, + { + "epoch": 11.31, + "learning_rate": 5.493925594699897e-06, + "loss": 2.9771, + "step": 1380500 + }, + { + "epoch": 11.31, + "learning_rate": 5.492281641979977e-06, + "loss": 2.9832, + "step": 1381000 + }, + { + "epoch": 11.32, + "learning_rate": 5.490637689260058e-06, + "loss": 2.9741, + "step": 1381500 + }, + { + "epoch": 11.32, + "learning_rate": 5.488993736540138e-06, + "loss": 2.9815, + "step": 1382000 + }, + { + "epoch": 11.33, + "learning_rate": 5.487349783820218e-06, + "loss": 2.9758, + "step": 1382500 + }, + { + "epoch": 11.33, + "learning_rate": 5.4857058311002975e-06, + "loss": 2.9791, + "step": 1383000 + }, + { + "epoch": 11.33, + "learning_rate": 5.484061878380378e-06, + "loss": 2.9798, + "step": 1383500 + }, + { + "epoch": 11.34, + "learning_rate": 5.482417925660458e-06, + "loss": 2.9725, + "step": 1384000 + }, + { + "epoch": 11.34, + "learning_rate": 5.480773972940539e-06, + "loss": 2.975, + "step": 1384500 + }, + { + "epoch": 11.35, + "learning_rate": 5.479130020220619e-06, + "loss": 2.9717, + "step": 1385000 + }, + { + "epoch": 11.35, + "learning_rate": 5.477486067500699e-06, + "loss": 2.9701, + "step": 1385500 + }, + { + "epoch": 11.36, + "learning_rate": 5.475842114780779e-06, + "loss": 2.9725, + "step": 1386000 + }, + { + "epoch": 11.36, + "learning_rate": 5.47419816206086e-06, + "loss": 2.9766, + "step": 1386500 + }, + { + "epoch": 11.36, + "learning_rate": 5.47255420934094e-06, + "loss": 2.9762, + "step": 1387000 + }, + { + "epoch": 11.37, + "learning_rate": 5.47091025662102e-06, + "loss": 2.968, + "step": 1387500 + }, + { + "epoch": 11.37, + "learning_rate": 5.4692663039011005e-06, + "loss": 2.9754, + "step": 1388000 + }, + { + "epoch": 11.38, + "learning_rate": 5.467622351181181e-06, + "loss": 2.9663, + "step": 1388500 + }, + { + "epoch": 11.38, + "learning_rate": 5.46597839846126e-06, + "loss": 2.973, + "step": 1389000 + }, + { + "epoch": 11.38, + "learning_rate": 5.464334445741341e-06, + "loss": 2.9739, + "step": 1389500 + }, + { + "epoch": 11.39, + "learning_rate": 5.462690493021421e-06, + "loss": 2.9662, + "step": 1390000 + }, + { + "epoch": 11.39, + "learning_rate": 5.461046540301501e-06, + "loss": 2.9719, + "step": 1390500 + }, + { + "epoch": 11.4, + "learning_rate": 5.459402587581581e-06, + "loss": 2.9817, + "step": 1391000 + }, + { + "epoch": 11.4, + "learning_rate": 5.457758634861662e-06, + "loss": 2.9718, + "step": 1391500 + }, + { + "epoch": 11.4, + "learning_rate": 5.456114682141742e-06, + "loss": 2.9796, + "step": 1392000 + }, + { + "epoch": 11.41, + "learning_rate": 5.4544707294218225e-06, + "loss": 2.972, + "step": 1392500 + }, + { + "epoch": 11.41, + "learning_rate": 5.452826776701903e-06, + "loss": 2.9755, + "step": 1393000 + }, + { + "epoch": 11.42, + "learning_rate": 5.451182823981983e-06, + "loss": 2.9731, + "step": 1393500 + }, + { + "epoch": 11.42, + "learning_rate": 5.449538871262064e-06, + "loss": 2.9719, + "step": 1394000 + }, + { + "epoch": 11.42, + "learning_rate": 5.447894918542144e-06, + "loss": 2.9669, + "step": 1394500 + }, + { + "epoch": 11.43, + "learning_rate": 5.446250965822223e-06, + "loss": 2.974, + "step": 1395000 + }, + { + "epoch": 11.43, + "learning_rate": 5.444607013102303e-06, + "loss": 2.9758, + "step": 1395500 + }, + { + "epoch": 11.44, + "learning_rate": 5.442963060382383e-06, + "loss": 2.9695, + "step": 1396000 + }, + { + "epoch": 11.44, + "learning_rate": 5.4413191076624635e-06, + "loss": 2.9685, + "step": 1396500 + }, + { + "epoch": 11.45, + "learning_rate": 5.4396751549425444e-06, + "loss": 2.9747, + "step": 1397000 + }, + { + "epoch": 11.45, + "learning_rate": 5.4380312022226246e-06, + "loss": 2.973, + "step": 1397500 + }, + { + "epoch": 11.45, + "learning_rate": 5.436387249502705e-06, + "loss": 2.9753, + "step": 1398000 + }, + { + "epoch": 11.46, + "learning_rate": 5.434743296782785e-06, + "loss": 2.9692, + "step": 1398500 + }, + { + "epoch": 11.46, + "learning_rate": 5.433099344062866e-06, + "loss": 2.9772, + "step": 1399000 + }, + { + "epoch": 11.47, + "learning_rate": 5.431455391342946e-06, + "loss": 2.9683, + "step": 1399500 + }, + { + "epoch": 11.47, + "learning_rate": 5.429811438623026e-06, + "loss": 2.9707, + "step": 1400000 + }, + { + "epoch": 11.47, + "learning_rate": 5.428167485903106e-06, + "loss": 2.9687, + "step": 1400500 + }, + { + "epoch": 11.48, + "learning_rate": 5.4265235331831855e-06, + "loss": 2.9696, + "step": 1401000 + }, + { + "epoch": 11.48, + "learning_rate": 5.424879580463266e-06, + "loss": 2.9797, + "step": 1401500 + }, + { + "epoch": 11.49, + "learning_rate": 5.4232356277433466e-06, + "loss": 2.9733, + "step": 1402000 + }, + { + "epoch": 11.49, + "learning_rate": 5.421591675023427e-06, + "loss": 2.9671, + "step": 1402500 + }, + { + "epoch": 11.49, + "learning_rate": 5.419947722303507e-06, + "loss": 2.978, + "step": 1403000 + }, + { + "epoch": 11.5, + "learning_rate": 5.418303769583587e-06, + "loss": 2.9727, + "step": 1403500 + }, + { + "epoch": 11.5, + "learning_rate": 5.416659816863667e-06, + "loss": 2.9745, + "step": 1404000 + }, + { + "epoch": 11.51, + "learning_rate": 5.415015864143748e-06, + "loss": 2.9711, + "step": 1404500 + }, + { + "epoch": 11.51, + "learning_rate": 5.413371911423828e-06, + "loss": 2.9749, + "step": 1405000 + }, + { + "epoch": 11.52, + "learning_rate": 5.411727958703908e-06, + "loss": 2.972, + "step": 1405500 + }, + { + "epoch": 11.52, + "learning_rate": 5.410084005983988e-06, + "loss": 2.9717, + "step": 1406000 + }, + { + "epoch": 11.52, + "learning_rate": 5.408440053264069e-06, + "loss": 2.9745, + "step": 1406500 + }, + { + "epoch": 11.53, + "learning_rate": 5.406796100544149e-06, + "loss": 2.9727, + "step": 1407000 + }, + { + "epoch": 11.53, + "learning_rate": 5.405152147824229e-06, + "loss": 2.9724, + "step": 1407500 + }, + { + "epoch": 11.54, + "learning_rate": 5.403508195104309e-06, + "loss": 2.9654, + "step": 1408000 + }, + { + "epoch": 11.54, + "learning_rate": 5.401864242384389e-06, + "loss": 2.9749, + "step": 1408500 + }, + { + "epoch": 11.54, + "learning_rate": 5.400220289664469e-06, + "loss": 2.971, + "step": 1409000 + }, + { + "epoch": 11.55, + "learning_rate": 5.39857633694455e-06, + "loss": 2.9723, + "step": 1409500 + }, + { + "epoch": 11.55, + "learning_rate": 5.39693238422463e-06, + "loss": 2.9695, + "step": 1410000 + }, + { + "epoch": 11.55, + "eval_accuracy": 0.49317043725147325, + "eval_loss": 2.835310459136963, + "eval_runtime": 406.1684, + "eval_samples_per_second": 759.183, + "eval_steps_per_second": 15.819, + "step": 1410000 + }, + { + "epoch": 11.56, + "learning_rate": 5.39528843150471e-06, + "loss": 2.9687, + "step": 1410500 + }, + { + "epoch": 11.56, + "learning_rate": 5.3936444787847905e-06, + "loss": 2.9696, + "step": 1411000 + }, + { + "epoch": 11.56, + "learning_rate": 5.3920005260648715e-06, + "loss": 2.9737, + "step": 1411500 + }, + { + "epoch": 11.57, + "learning_rate": 5.390356573344952e-06, + "loss": 2.9715, + "step": 1412000 + }, + { + "epoch": 11.57, + "learning_rate": 5.388712620625032e-06, + "loss": 2.971, + "step": 1412500 + }, + { + "epoch": 11.58, + "learning_rate": 5.387068667905111e-06, + "loss": 2.9643, + "step": 1413000 + }, + { + "epoch": 11.58, + "learning_rate": 5.385424715185191e-06, + "loss": 2.9683, + "step": 1413500 + }, + { + "epoch": 11.58, + "learning_rate": 5.383780762465271e-06, + "loss": 2.9697, + "step": 1414000 + }, + { + "epoch": 11.59, + "learning_rate": 5.382136809745352e-06, + "loss": 2.9724, + "step": 1414500 + }, + { + "epoch": 11.59, + "learning_rate": 5.380492857025432e-06, + "loss": 2.9723, + "step": 1415000 + }, + { + "epoch": 11.6, + "learning_rate": 5.3788489043055125e-06, + "loss": 2.9665, + "step": 1415500 + }, + { + "epoch": 11.6, + "learning_rate": 5.377204951585593e-06, + "loss": 2.9792, + "step": 1416000 + }, + { + "epoch": 11.61, + "learning_rate": 5.375560998865673e-06, + "loss": 2.9728, + "step": 1416500 + }, + { + "epoch": 11.61, + "learning_rate": 5.373917046145754e-06, + "loss": 2.9705, + "step": 1417000 + }, + { + "epoch": 11.61, + "learning_rate": 5.372273093425834e-06, + "loss": 2.9694, + "step": 1417500 + }, + { + "epoch": 11.62, + "learning_rate": 5.370629140705914e-06, + "loss": 2.9662, + "step": 1418000 + }, + { + "epoch": 11.62, + "learning_rate": 5.368985187985994e-06, + "loss": 2.9696, + "step": 1418500 + }, + { + "epoch": 11.63, + "learning_rate": 5.367341235266073e-06, + "loss": 2.9668, + "step": 1419000 + }, + { + "epoch": 11.63, + "learning_rate": 5.365697282546154e-06, + "loss": 2.9754, + "step": 1419500 + }, + { + "epoch": 11.63, + "learning_rate": 5.3640533298262345e-06, + "loss": 2.9687, + "step": 1420000 + }, + { + "epoch": 11.64, + "learning_rate": 5.362409377106315e-06, + "loss": 2.9657, + "step": 1420500 + }, + { + "epoch": 11.64, + "learning_rate": 5.360765424386395e-06, + "loss": 2.9687, + "step": 1421000 + }, + { + "epoch": 11.65, + "learning_rate": 5.359121471666475e-06, + "loss": 2.963, + "step": 1421500 + }, + { + "epoch": 11.65, + "learning_rate": 5.357477518946556e-06, + "loss": 2.9688, + "step": 1422000 + }, + { + "epoch": 11.65, + "learning_rate": 5.355833566226636e-06, + "loss": 2.9654, + "step": 1422500 + }, + { + "epoch": 11.66, + "learning_rate": 5.354189613506716e-06, + "loss": 2.973, + "step": 1423000 + }, + { + "epoch": 11.66, + "learning_rate": 5.352545660786796e-06, + "loss": 2.9717, + "step": 1423500 + }, + { + "epoch": 11.67, + "learning_rate": 5.350901708066877e-06, + "loss": 2.9654, + "step": 1424000 + }, + { + "epoch": 11.67, + "learning_rate": 5.349257755346957e-06, + "loss": 2.96, + "step": 1424500 + }, + { + "epoch": 11.67, + "learning_rate": 5.347613802627037e-06, + "loss": 2.9653, + "step": 1425000 + }, + { + "epoch": 11.68, + "learning_rate": 5.345969849907117e-06, + "loss": 2.9667, + "step": 1425500 + }, + { + "epoch": 11.68, + "learning_rate": 5.344325897187197e-06, + "loss": 2.9697, + "step": 1426000 + }, + { + "epoch": 11.69, + "learning_rate": 5.342681944467277e-06, + "loss": 2.9725, + "step": 1426500 + }, + { + "epoch": 11.69, + "learning_rate": 5.341037991747358e-06, + "loss": 2.9673, + "step": 1427000 + }, + { + "epoch": 11.7, + "learning_rate": 5.339394039027438e-06, + "loss": 2.9679, + "step": 1427500 + }, + { + "epoch": 11.7, + "learning_rate": 5.337750086307518e-06, + "loss": 2.9735, + "step": 1428000 + }, + { + "epoch": 11.7, + "learning_rate": 5.336106133587598e-06, + "loss": 2.9692, + "step": 1428500 + }, + { + "epoch": 11.71, + "learning_rate": 5.3344621808676785e-06, + "loss": 2.9707, + "step": 1429000 + }, + { + "epoch": 11.71, + "learning_rate": 5.3328182281477594e-06, + "loss": 2.9742, + "step": 1429500 + }, + { + "epoch": 11.72, + "learning_rate": 5.3311742754278396e-06, + "loss": 2.9593, + "step": 1430000 + }, + { + "epoch": 11.72, + "learning_rate": 5.32953032270792e-06, + "loss": 2.9698, + "step": 1430500 + }, + { + "epoch": 11.72, + "learning_rate": 5.327886369988e-06, + "loss": 2.9685, + "step": 1431000 + }, + { + "epoch": 11.73, + "learning_rate": 5.326242417268079e-06, + "loss": 2.9763, + "step": 1431500 + }, + { + "epoch": 11.73, + "learning_rate": 5.324598464548159e-06, + "loss": 2.9721, + "step": 1432000 + }, + { + "epoch": 11.74, + "learning_rate": 5.32295451182824e-06, + "loss": 2.9659, + "step": 1432500 + }, + { + "epoch": 11.74, + "learning_rate": 5.32131055910832e-06, + "loss": 2.9669, + "step": 1433000 + }, + { + "epoch": 11.74, + "learning_rate": 5.3196666063884005e-06, + "loss": 2.9627, + "step": 1433500 + }, + { + "epoch": 11.75, + "learning_rate": 5.318022653668481e-06, + "loss": 2.9696, + "step": 1434000 + }, + { + "epoch": 11.75, + "learning_rate": 5.3163787009485616e-06, + "loss": 2.972, + "step": 1434500 + }, + { + "epoch": 11.76, + "learning_rate": 5.314734748228642e-06, + "loss": 2.9714, + "step": 1435000 + }, + { + "epoch": 11.76, + "learning_rate": 5.313090795508722e-06, + "loss": 2.9683, + "step": 1435500 + }, + { + "epoch": 11.76, + "learning_rate": 5.311446842788802e-06, + "loss": 2.9655, + "step": 1436000 + }, + { + "epoch": 11.77, + "learning_rate": 5.309802890068883e-06, + "loss": 2.9673, + "step": 1436500 + }, + { + "epoch": 11.77, + "learning_rate": 5.308158937348963e-06, + "loss": 2.9678, + "step": 1437000 + }, + { + "epoch": 11.78, + "learning_rate": 5.306514984629042e-06, + "loss": 2.969, + "step": 1437500 + }, + { + "epoch": 11.78, + "learning_rate": 5.3048710319091224e-06, + "loss": 2.9631, + "step": 1438000 + }, + { + "epoch": 11.79, + "learning_rate": 5.3032270791892026e-06, + "loss": 2.9681, + "step": 1438500 + }, + { + "epoch": 11.79, + "learning_rate": 5.301583126469283e-06, + "loss": 2.9658, + "step": 1439000 + }, + { + "epoch": 11.79, + "learning_rate": 5.299939173749364e-06, + "loss": 2.9621, + "step": 1439500 + }, + { + "epoch": 11.8, + "learning_rate": 5.298295221029444e-06, + "loss": 2.9607, + "step": 1440000 + }, + { + "epoch": 11.8, + "eval_accuracy": 0.4940959315942787, + "eval_loss": 2.829009771347046, + "eval_runtime": 405.0153, + "eval_samples_per_second": 761.344, + "eval_steps_per_second": 15.864, + "step": 1440000 + }, + { + "epoch": 11.8, + "learning_rate": 5.296651268309524e-06, + "loss": 2.9657, + "step": 1440500 + }, + { + "epoch": 11.81, + "learning_rate": 5.295007315589604e-06, + "loss": 2.9663, + "step": 1441000 + }, + { + "epoch": 11.81, + "learning_rate": 5.293363362869684e-06, + "loss": 2.9679, + "step": 1441500 + }, + { + "epoch": 11.81, + "learning_rate": 5.291719410149765e-06, + "loss": 2.9643, + "step": 1442000 + }, + { + "epoch": 11.82, + "learning_rate": 5.290075457429845e-06, + "loss": 2.975, + "step": 1442500 + }, + { + "epoch": 11.82, + "learning_rate": 5.288431504709925e-06, + "loss": 2.963, + "step": 1443000 + }, + { + "epoch": 11.83, + "learning_rate": 5.286787551990005e-06, + "loss": 2.9608, + "step": 1443500 + }, + { + "epoch": 11.83, + "learning_rate": 5.285143599270085e-06, + "loss": 2.962, + "step": 1444000 + }, + { + "epoch": 11.83, + "learning_rate": 5.283499646550165e-06, + "loss": 2.9687, + "step": 1444500 + }, + { + "epoch": 11.84, + "learning_rate": 5.281855693830246e-06, + "loss": 2.9767, + "step": 1445000 + }, + { + "epoch": 11.84, + "learning_rate": 5.280211741110326e-06, + "loss": 2.9664, + "step": 1445500 + }, + { + "epoch": 11.85, + "learning_rate": 5.278567788390406e-06, + "loss": 2.9695, + "step": 1446000 + }, + { + "epoch": 11.85, + "learning_rate": 5.276923835670486e-06, + "loss": 2.9689, + "step": 1446500 + }, + { + "epoch": 11.86, + "learning_rate": 5.275279882950567e-06, + "loss": 2.968, + "step": 1447000 + }, + { + "epoch": 11.86, + "learning_rate": 5.273635930230647e-06, + "loss": 2.9666, + "step": 1447500 + }, + { + "epoch": 11.86, + "learning_rate": 5.2719919775107275e-06, + "loss": 2.9656, + "step": 1448000 + }, + { + "epoch": 11.87, + "learning_rate": 5.270348024790808e-06, + "loss": 2.9618, + "step": 1448500 + }, + { + "epoch": 11.87, + "learning_rate": 5.268704072070888e-06, + "loss": 2.9627, + "step": 1449000 + }, + { + "epoch": 11.88, + "learning_rate": 5.267060119350967e-06, + "loss": 2.9654, + "step": 1449500 + }, + { + "epoch": 11.88, + "learning_rate": 5.265416166631048e-06, + "loss": 2.974, + "step": 1450000 + }, + { + "epoch": 11.88, + "learning_rate": 5.263772213911128e-06, + "loss": 2.9723, + "step": 1450500 + }, + { + "epoch": 11.89, + "learning_rate": 5.262128261191208e-06, + "loss": 2.9629, + "step": 1451000 + }, + { + "epoch": 11.89, + "learning_rate": 5.260484308471288e-06, + "loss": 2.9679, + "step": 1451500 + }, + { + "epoch": 11.9, + "learning_rate": 5.258840355751369e-06, + "loss": 2.9637, + "step": 1452000 + }, + { + "epoch": 11.9, + "learning_rate": 5.2571964030314495e-06, + "loss": 2.9677, + "step": 1452500 + }, + { + "epoch": 11.9, + "learning_rate": 5.25555245031153e-06, + "loss": 2.9707, + "step": 1453000 + }, + { + "epoch": 11.91, + "learning_rate": 5.25390849759161e-06, + "loss": 2.9706, + "step": 1453500 + }, + { + "epoch": 11.91, + "learning_rate": 5.25226454487169e-06, + "loss": 2.9627, + "step": 1454000 + }, + { + "epoch": 11.92, + "learning_rate": 5.250620592151771e-06, + "loss": 2.9631, + "step": 1454500 + }, + { + "epoch": 11.92, + "learning_rate": 5.248976639431851e-06, + "loss": 2.9652, + "step": 1455000 + }, + { + "epoch": 11.92, + "learning_rate": 5.24733268671193e-06, + "loss": 2.9613, + "step": 1455500 + }, + { + "epoch": 11.93, + "learning_rate": 5.24568873399201e-06, + "loss": 2.9683, + "step": 1456000 + }, + { + "epoch": 11.93, + "learning_rate": 5.2440447812720905e-06, + "loss": 2.954, + "step": 1456500 + }, + { + "epoch": 11.94, + "learning_rate": 5.242400828552171e-06, + "loss": 2.9646, + "step": 1457000 + }, + { + "epoch": 11.94, + "learning_rate": 5.240756875832252e-06, + "loss": 2.9694, + "step": 1457500 + }, + { + "epoch": 11.95, + "learning_rate": 5.239112923112332e-06, + "loss": 2.9651, + "step": 1458000 + }, + { + "epoch": 11.95, + "learning_rate": 5.237468970392412e-06, + "loss": 2.9675, + "step": 1458500 + }, + { + "epoch": 11.95, + "learning_rate": 5.235825017672492e-06, + "loss": 2.967, + "step": 1459000 + }, + { + "epoch": 11.96, + "learning_rate": 5.234181064952573e-06, + "loss": 2.9687, + "step": 1459500 + }, + { + "epoch": 11.96, + "learning_rate": 5.232537112232653e-06, + "loss": 2.9624, + "step": 1460000 + }, + { + "epoch": 11.97, + "learning_rate": 5.230893159512733e-06, + "loss": 2.9649, + "step": 1460500 + }, + { + "epoch": 11.97, + "learning_rate": 5.229249206792813e-06, + "loss": 2.9656, + "step": 1461000 + }, + { + "epoch": 11.97, + "learning_rate": 5.227605254072893e-06, + "loss": 2.9581, + "step": 1461500 + }, + { + "epoch": 11.98, + "learning_rate": 5.225961301352973e-06, + "loss": 2.964, + "step": 1462000 + }, + { + "epoch": 11.98, + "learning_rate": 5.224317348633054e-06, + "loss": 2.958, + "step": 1462500 + }, + { + "epoch": 11.99, + "learning_rate": 5.222673395913134e-06, + "loss": 2.9638, + "step": 1463000 + }, + { + "epoch": 11.99, + "learning_rate": 5.221029443193214e-06, + "loss": 2.9679, + "step": 1463500 + }, + { + "epoch": 11.99, + "learning_rate": 5.219385490473294e-06, + "loss": 2.9585, + "step": 1464000 + }, + { + "epoch": 12.0, + "learning_rate": 5.217741537753375e-06, + "loss": 2.9664, + "step": 1464500 + }, + { + "epoch": 12.0, + "learning_rate": 5.216097585033455e-06, + "loss": 2.9604, + "step": 1465000 + }, + { + "epoch": 12.01, + "learning_rate": 5.214453632313535e-06, + "loss": 2.9489, + "step": 1465500 + }, + { + "epoch": 12.01, + "learning_rate": 5.2128096795936155e-06, + "loss": 2.9678, + "step": 1466000 + }, + { + "epoch": 12.01, + "learning_rate": 5.211165726873696e-06, + "loss": 2.9626, + "step": 1466500 + }, + { + "epoch": 12.02, + "learning_rate": 5.2095217741537766e-06, + "loss": 2.9567, + "step": 1467000 + }, + { + "epoch": 12.02, + "learning_rate": 5.207877821433856e-06, + "loss": 2.9636, + "step": 1467500 + }, + { + "epoch": 12.03, + "learning_rate": 5.206233868713936e-06, + "loss": 2.9654, + "step": 1468000 + }, + { + "epoch": 12.03, + "learning_rate": 5.204589915994016e-06, + "loss": 2.9617, + "step": 1468500 + }, + { + "epoch": 12.04, + "learning_rate": 5.202945963274096e-06, + "loss": 2.959, + "step": 1469000 + }, + { + "epoch": 12.04, + "learning_rate": 5.201302010554176e-06, + "loss": 2.9599, + "step": 1469500 + }, + { + "epoch": 12.04, + "learning_rate": 5.199658057834257e-06, + "loss": 2.9636, + "step": 1470000 + }, + { + "epoch": 12.04, + "eval_accuracy": 0.494423367210693, + "eval_loss": 2.8266873359680176, + "eval_runtime": 407.8041, + "eval_samples_per_second": 756.138, + "eval_steps_per_second": 15.755, + "step": 1470000 + }, + { + "epoch": 12.05, + "learning_rate": 5.1980141051143374e-06, + "loss": 2.9612, + "step": 1470500 + }, + { + "epoch": 12.05, + "learning_rate": 5.1963701523944176e-06, + "loss": 2.9556, + "step": 1471000 + }, + { + "epoch": 12.06, + "learning_rate": 5.194726199674498e-06, + "loss": 2.9642, + "step": 1471500 + }, + { + "epoch": 12.06, + "learning_rate": 5.193082246954579e-06, + "loss": 2.9593, + "step": 1472000 + }, + { + "epoch": 12.06, + "learning_rate": 5.191438294234659e-06, + "loss": 2.9578, + "step": 1472500 + }, + { + "epoch": 12.07, + "learning_rate": 5.189794341514739e-06, + "loss": 2.962, + "step": 1473000 + }, + { + "epoch": 12.07, + "learning_rate": 5.188150388794818e-06, + "loss": 2.9613, + "step": 1473500 + }, + { + "epoch": 12.08, + "learning_rate": 5.186506436074898e-06, + "loss": 2.96, + "step": 1474000 + }, + { + "epoch": 12.08, + "learning_rate": 5.1848624833549785e-06, + "loss": 2.9622, + "step": 1474500 + }, + { + "epoch": 12.08, + "learning_rate": 5.1832185306350594e-06, + "loss": 2.9623, + "step": 1475000 + }, + { + "epoch": 12.09, + "learning_rate": 5.1815745779151396e-06, + "loss": 2.9627, + "step": 1475500 + }, + { + "epoch": 12.09, + "learning_rate": 5.17993062519522e-06, + "loss": 2.9649, + "step": 1476000 + }, + { + "epoch": 12.1, + "learning_rate": 5.1782866724753e-06, + "loss": 2.9676, + "step": 1476500 + }, + { + "epoch": 12.1, + "learning_rate": 5.17664271975538e-06, + "loss": 2.9661, + "step": 1477000 + }, + { + "epoch": 12.1, + "learning_rate": 5.174998767035461e-06, + "loss": 2.9663, + "step": 1477500 + }, + { + "epoch": 12.11, + "learning_rate": 5.173354814315541e-06, + "loss": 2.9647, + "step": 1478000 + }, + { + "epoch": 12.11, + "learning_rate": 5.171710861595621e-06, + "loss": 2.9625, + "step": 1478500 + }, + { + "epoch": 12.12, + "learning_rate": 5.170066908875701e-06, + "loss": 2.9613, + "step": 1479000 + }, + { + "epoch": 12.12, + "learning_rate": 5.168422956155782e-06, + "loss": 2.9616, + "step": 1479500 + }, + { + "epoch": 12.13, + "learning_rate": 5.1667790034358615e-06, + "loss": 2.9624, + "step": 1480000 + }, + { + "epoch": 12.13, + "learning_rate": 5.165135050715942e-06, + "loss": 2.9611, + "step": 1480500 + }, + { + "epoch": 12.13, + "learning_rate": 5.163491097996022e-06, + "loss": 2.9657, + "step": 1481000 + }, + { + "epoch": 12.14, + "learning_rate": 5.161847145276102e-06, + "loss": 2.9649, + "step": 1481500 + }, + { + "epoch": 12.14, + "learning_rate": 5.160203192556182e-06, + "loss": 2.9588, + "step": 1482000 + }, + { + "epoch": 12.15, + "learning_rate": 5.158559239836263e-06, + "loss": 2.9646, + "step": 1482500 + }, + { + "epoch": 12.15, + "learning_rate": 5.156915287116343e-06, + "loss": 2.968, + "step": 1483000 + }, + { + "epoch": 12.15, + "learning_rate": 5.155271334396423e-06, + "loss": 2.9627, + "step": 1483500 + }, + { + "epoch": 12.16, + "learning_rate": 5.153627381676503e-06, + "loss": 2.9611, + "step": 1484000 + }, + { + "epoch": 12.16, + "learning_rate": 5.151983428956584e-06, + "loss": 2.9668, + "step": 1484500 + }, + { + "epoch": 12.17, + "learning_rate": 5.1503394762366645e-06, + "loss": 2.954, + "step": 1485000 + }, + { + "epoch": 12.17, + "learning_rate": 5.148695523516745e-06, + "loss": 2.963, + "step": 1485500 + }, + { + "epoch": 12.17, + "learning_rate": 5.147051570796824e-06, + "loss": 2.9599, + "step": 1486000 + }, + { + "epoch": 12.18, + "learning_rate": 5.145407618076904e-06, + "loss": 2.9564, + "step": 1486500 + }, + { + "epoch": 12.18, + "learning_rate": 5.143763665356984e-06, + "loss": 2.9578, + "step": 1487000 + }, + { + "epoch": 12.19, + "learning_rate": 5.142119712637065e-06, + "loss": 2.9657, + "step": 1487500 + }, + { + "epoch": 12.19, + "learning_rate": 5.140475759917145e-06, + "loss": 2.9635, + "step": 1488000 + }, + { + "epoch": 12.2, + "learning_rate": 5.138831807197225e-06, + "loss": 2.966, + "step": 1488500 + }, + { + "epoch": 12.2, + "learning_rate": 5.1371878544773055e-06, + "loss": 2.96, + "step": 1489000 + }, + { + "epoch": 12.2, + "learning_rate": 5.135543901757386e-06, + "loss": 2.9557, + "step": 1489500 + }, + { + "epoch": 12.21, + "learning_rate": 5.133899949037467e-06, + "loss": 2.96, + "step": 1490000 + }, + { + "epoch": 12.21, + "learning_rate": 5.132255996317547e-06, + "loss": 2.967, + "step": 1490500 + }, + { + "epoch": 12.22, + "learning_rate": 5.130612043597627e-06, + "loss": 2.9576, + "step": 1491000 + }, + { + "epoch": 12.22, + "learning_rate": 5.128968090877707e-06, + "loss": 2.9589, + "step": 1491500 + }, + { + "epoch": 12.22, + "learning_rate": 5.127324138157786e-06, + "loss": 2.9561, + "step": 1492000 + }, + { + "epoch": 12.23, + "learning_rate": 5.125680185437867e-06, + "loss": 2.9592, + "step": 1492500 + }, + { + "epoch": 12.23, + "learning_rate": 5.124036232717947e-06, + "loss": 2.9611, + "step": 1493000 + }, + { + "epoch": 12.24, + "learning_rate": 5.1223922799980275e-06, + "loss": 2.9568, + "step": 1493500 + }, + { + "epoch": 12.24, + "learning_rate": 5.120748327278108e-06, + "loss": 2.96, + "step": 1494000 + }, + { + "epoch": 12.24, + "learning_rate": 5.119104374558188e-06, + "loss": 2.961, + "step": 1494500 + }, + { + "epoch": 12.25, + "learning_rate": 5.117460421838269e-06, + "loss": 2.9604, + "step": 1495000 + }, + { + "epoch": 12.25, + "learning_rate": 5.115816469118349e-06, + "loss": 2.9559, + "step": 1495500 + }, + { + "epoch": 12.26, + "learning_rate": 5.114172516398429e-06, + "loss": 2.9636, + "step": 1496000 + }, + { + "epoch": 12.26, + "learning_rate": 5.112528563678509e-06, + "loss": 2.9589, + "step": 1496500 + }, + { + "epoch": 12.26, + "learning_rate": 5.11088461095859e-06, + "loss": 2.9592, + "step": 1497000 + }, + { + "epoch": 12.27, + "learning_rate": 5.10924065823867e-06, + "loss": 2.9643, + "step": 1497500 + }, + { + "epoch": 12.27, + "learning_rate": 5.1075967055187495e-06, + "loss": 2.9587, + "step": 1498000 + }, + { + "epoch": 12.28, + "learning_rate": 5.10595275279883e-06, + "loss": 2.9601, + "step": 1498500 + }, + { + "epoch": 12.28, + "learning_rate": 5.10430880007891e-06, + "loss": 2.9574, + "step": 1499000 + }, + { + "epoch": 12.29, + "learning_rate": 5.10266484735899e-06, + "loss": 2.9626, + "step": 1499500 + }, + { + "epoch": 12.29, + "learning_rate": 5.101020894639071e-06, + "loss": 2.9584, + "step": 1500000 + }, + { + "epoch": 12.29, + "eval_accuracy": 0.49460218852402815, + "eval_loss": 2.8247358798980713, + "eval_runtime": 404.5149, + "eval_samples_per_second": 762.286, + "eval_steps_per_second": 15.883, + "step": 1500000 + }, + { + "epoch": 12.29, + "learning_rate": 5.099376941919151e-06, + "loss": 2.9567, + "step": 1500500 + }, + { + "epoch": 12.3, + "learning_rate": 5.097732989199231e-06, + "loss": 2.9534, + "step": 1501000 + }, + { + "epoch": 12.3, + "learning_rate": 5.096089036479311e-06, + "loss": 2.9594, + "step": 1501500 + }, + { + "epoch": 12.31, + "learning_rate": 5.094445083759391e-06, + "loss": 2.9505, + "step": 1502000 + }, + { + "epoch": 12.31, + "learning_rate": 5.092801131039472e-06, + "loss": 2.9665, + "step": 1502500 + }, + { + "epoch": 12.31, + "learning_rate": 5.0911571783195524e-06, + "loss": 2.9554, + "step": 1503000 + }, + { + "epoch": 12.32, + "learning_rate": 5.0895132255996326e-06, + "loss": 2.9596, + "step": 1503500 + }, + { + "epoch": 12.32, + "learning_rate": 5.087869272879712e-06, + "loss": 2.9574, + "step": 1504000 + }, + { + "epoch": 12.33, + "learning_rate": 5.086225320159792e-06, + "loss": 2.9596, + "step": 1504500 + }, + { + "epoch": 12.33, + "learning_rate": 5.084581367439872e-06, + "loss": 2.9548, + "step": 1505000 + }, + { + "epoch": 12.33, + "learning_rate": 5.082937414719953e-06, + "loss": 2.9619, + "step": 1505500 + }, + { + "epoch": 12.34, + "learning_rate": 5.081293462000033e-06, + "loss": 2.9556, + "step": 1506000 + }, + { + "epoch": 12.34, + "learning_rate": 5.079649509280113e-06, + "loss": 2.9447, + "step": 1506500 + }, + { + "epoch": 12.35, + "learning_rate": 5.0780055565601935e-06, + "loss": 2.951, + "step": 1507000 + }, + { + "epoch": 12.35, + "learning_rate": 5.0763616038402744e-06, + "loss": 2.9613, + "step": 1507500 + }, + { + "epoch": 12.35, + "learning_rate": 5.0747176511203546e-06, + "loss": 2.9594, + "step": 1508000 + }, + { + "epoch": 12.36, + "learning_rate": 5.073073698400435e-06, + "loss": 2.9587, + "step": 1508500 + }, + { + "epoch": 12.36, + "learning_rate": 5.071429745680515e-06, + "loss": 2.9508, + "step": 1509000 + }, + { + "epoch": 12.37, + "learning_rate": 5.069785792960596e-06, + "loss": 2.9505, + "step": 1509500 + }, + { + "epoch": 12.37, + "learning_rate": 5.068141840240674e-06, + "loss": 2.9547, + "step": 1510000 + }, + { + "epoch": 12.38, + "learning_rate": 5.066497887520755e-06, + "loss": 2.9588, + "step": 1510500 + }, + { + "epoch": 12.38, + "learning_rate": 5.064853934800835e-06, + "loss": 2.9564, + "step": 1511000 + }, + { + "epoch": 12.38, + "learning_rate": 5.0632099820809154e-06, + "loss": 2.953, + "step": 1511500 + }, + { + "epoch": 12.39, + "learning_rate": 5.0615660293609956e-06, + "loss": 2.964, + "step": 1512000 + }, + { + "epoch": 12.39, + "learning_rate": 5.0599220766410765e-06, + "loss": 2.9576, + "step": 1512500 + }, + { + "epoch": 12.4, + "learning_rate": 5.058278123921157e-06, + "loss": 2.9596, + "step": 1513000 + }, + { + "epoch": 12.4, + "learning_rate": 5.056634171201237e-06, + "loss": 2.9625, + "step": 1513500 + }, + { + "epoch": 12.4, + "learning_rate": 5.054990218481317e-06, + "loss": 2.9526, + "step": 1514000 + }, + { + "epoch": 12.41, + "learning_rate": 5.053346265761397e-06, + "loss": 2.957, + "step": 1514500 + }, + { + "epoch": 12.41, + "learning_rate": 5.051702313041478e-06, + "loss": 2.9643, + "step": 1515000 + }, + { + "epoch": 12.42, + "learning_rate": 5.050058360321558e-06, + "loss": 2.9526, + "step": 1515500 + }, + { + "epoch": 12.42, + "learning_rate": 5.048414407601637e-06, + "loss": 2.9535, + "step": 1516000 + }, + { + "epoch": 12.42, + "learning_rate": 5.0467704548817175e-06, + "loss": 2.9598, + "step": 1516500 + }, + { + "epoch": 12.43, + "learning_rate": 5.045126502161798e-06, + "loss": 2.953, + "step": 1517000 + }, + { + "epoch": 12.43, + "learning_rate": 5.043482549441878e-06, + "loss": 2.9571, + "step": 1517500 + }, + { + "epoch": 12.44, + "learning_rate": 5.041838596721959e-06, + "loss": 2.9583, + "step": 1518000 + }, + { + "epoch": 12.44, + "learning_rate": 5.040194644002039e-06, + "loss": 2.9615, + "step": 1518500 + }, + { + "epoch": 12.44, + "learning_rate": 5.038550691282119e-06, + "loss": 2.9541, + "step": 1519000 + }, + { + "epoch": 12.45, + "learning_rate": 5.036906738562199e-06, + "loss": 2.9595, + "step": 1519500 + }, + { + "epoch": 12.45, + "learning_rate": 5.03526278584228e-06, + "loss": 2.9549, + "step": 1520000 + }, + { + "epoch": 12.46, + "learning_rate": 5.03361883312236e-06, + "loss": 2.958, + "step": 1520500 + }, + { + "epoch": 12.46, + "learning_rate": 5.03197488040244e-06, + "loss": 2.9547, + "step": 1521000 + }, + { + "epoch": 12.47, + "learning_rate": 5.0303309276825205e-06, + "loss": 2.9544, + "step": 1521500 + }, + { + "epoch": 12.47, + "learning_rate": 5.0286869749626e-06, + "loss": 2.9553, + "step": 1522000 + }, + { + "epoch": 12.47, + "learning_rate": 5.02704302224268e-06, + "loss": 2.9533, + "step": 1522500 + }, + { + "epoch": 12.48, + "learning_rate": 5.025399069522761e-06, + "loss": 2.9578, + "step": 1523000 + }, + { + "epoch": 12.48, + "learning_rate": 5.023755116802841e-06, + "loss": 2.9628, + "step": 1523500 + }, + { + "epoch": 12.49, + "learning_rate": 5.022111164082921e-06, + "loss": 2.9515, + "step": 1524000 + }, + { + "epoch": 12.49, + "learning_rate": 5.020467211363001e-06, + "loss": 2.9511, + "step": 1524500 + }, + { + "epoch": 12.49, + "learning_rate": 5.018823258643082e-06, + "loss": 2.9623, + "step": 1525000 + }, + { + "epoch": 12.5, + "learning_rate": 5.017179305923162e-06, + "loss": 2.9648, + "step": 1525500 + }, + { + "epoch": 12.5, + "learning_rate": 5.0155353532032425e-06, + "loss": 2.9571, + "step": 1526000 + }, + { + "epoch": 12.51, + "learning_rate": 5.013891400483323e-06, + "loss": 2.9543, + "step": 1526500 + }, + { + "epoch": 12.51, + "learning_rate": 5.012247447763403e-06, + "loss": 2.9567, + "step": 1527000 + }, + { + "epoch": 12.51, + "learning_rate": 5.010603495043484e-06, + "loss": 2.9549, + "step": 1527500 + }, + { + "epoch": 12.52, + "learning_rate": 5.008959542323563e-06, + "loss": 2.9604, + "step": 1528000 + }, + { + "epoch": 12.52, + "learning_rate": 5.007315589603643e-06, + "loss": 2.9591, + "step": 1528500 + }, + { + "epoch": 12.53, + "learning_rate": 5.005671636883723e-06, + "loss": 2.9592, + "step": 1529000 + }, + { + "epoch": 12.53, + "learning_rate": 5.004027684163803e-06, + "loss": 2.9529, + "step": 1529500 + }, + { + "epoch": 12.54, + "learning_rate": 5.0023837314438835e-06, + "loss": 2.9546, + "step": 1530000 + }, + { + "epoch": 12.54, + "eval_accuracy": 0.4951152598295225, + "eval_loss": 2.8195910453796387, + "eval_runtime": 406.5071, + "eval_samples_per_second": 758.55, + "eval_steps_per_second": 15.805, + "step": 1530000 + }, + { + "epoch": 12.54, + "learning_rate": 5.0007397787239645e-06, + "loss": 2.9556, + "step": 1530500 + }, + { + "epoch": 12.54, + "learning_rate": 4.999095826004045e-06, + "loss": 2.9566, + "step": 1531000 + }, + { + "epoch": 12.55, + "learning_rate": 4.997451873284125e-06, + "loss": 2.9579, + "step": 1531500 + }, + { + "epoch": 12.55, + "learning_rate": 4.995807920564205e-06, + "loss": 2.9552, + "step": 1532000 + }, + { + "epoch": 12.56, + "learning_rate": 4.994163967844286e-06, + "loss": 2.9573, + "step": 1532500 + }, + { + "epoch": 12.56, + "learning_rate": 4.992520015124365e-06, + "loss": 2.9501, + "step": 1533000 + }, + { + "epoch": 12.56, + "learning_rate": 4.990876062404445e-06, + "loss": 2.9542, + "step": 1533500 + }, + { + "epoch": 12.57, + "learning_rate": 4.989232109684526e-06, + "loss": 2.9543, + "step": 1534000 + }, + { + "epoch": 12.57, + "learning_rate": 4.987588156964606e-06, + "loss": 2.9581, + "step": 1534500 + }, + { + "epoch": 12.58, + "learning_rate": 4.9859442042446865e-06, + "loss": 2.9519, + "step": 1535000 + }, + { + "epoch": 12.58, + "learning_rate": 4.984300251524767e-06, + "loss": 2.9525, + "step": 1535500 + }, + { + "epoch": 12.58, + "learning_rate": 4.982656298804847e-06, + "loss": 2.9575, + "step": 1536000 + }, + { + "epoch": 12.59, + "learning_rate": 4.981012346084927e-06, + "loss": 2.951, + "step": 1536500 + }, + { + "epoch": 12.59, + "learning_rate": 4.979368393365007e-06, + "loss": 2.9554, + "step": 1537000 + }, + { + "epoch": 12.6, + "learning_rate": 4.977724440645088e-06, + "loss": 2.948, + "step": 1537500 + }, + { + "epoch": 12.6, + "learning_rate": 4.976080487925168e-06, + "loss": 2.9571, + "step": 1538000 + }, + { + "epoch": 12.6, + "learning_rate": 4.974436535205248e-06, + "loss": 2.9573, + "step": 1538500 + }, + { + "epoch": 12.61, + "learning_rate": 4.972792582485328e-06, + "loss": 2.9505, + "step": 1539000 + }, + { + "epoch": 12.61, + "learning_rate": 4.9711486297654085e-06, + "loss": 2.9538, + "step": 1539500 + }, + { + "epoch": 12.62, + "learning_rate": 4.969504677045489e-06, + "loss": 2.953, + "step": 1540000 + }, + { + "epoch": 12.62, + "learning_rate": 4.967860724325569e-06, + "loss": 2.9508, + "step": 1540500 + }, + { + "epoch": 12.63, + "learning_rate": 4.966216771605649e-06, + "loss": 2.9517, + "step": 1541000 + }, + { + "epoch": 12.63, + "learning_rate": 4.96457281888573e-06, + "loss": 2.9515, + "step": 1541500 + }, + { + "epoch": 12.63, + "learning_rate": 4.962928866165809e-06, + "loss": 2.9529, + "step": 1542000 + }, + { + "epoch": 12.64, + "learning_rate": 4.961284913445889e-06, + "loss": 2.9492, + "step": 1542500 + }, + { + "epoch": 12.64, + "learning_rate": 4.95964096072597e-06, + "loss": 2.9554, + "step": 1543000 + }, + { + "epoch": 12.65, + "learning_rate": 4.95799700800605e-06, + "loss": 2.9546, + "step": 1543500 + }, + { + "epoch": 12.65, + "learning_rate": 4.9563530552861304e-06, + "loss": 2.9517, + "step": 1544000 + }, + { + "epoch": 12.65, + "learning_rate": 4.9547091025662106e-06, + "loss": 2.9527, + "step": 1544500 + }, + { + "epoch": 12.66, + "learning_rate": 4.953065149846291e-06, + "loss": 2.9507, + "step": 1545000 + }, + { + "epoch": 12.66, + "learning_rate": 4.951421197126371e-06, + "loss": 2.9389, + "step": 1545500 + }, + { + "epoch": 12.67, + "learning_rate": 4.949777244406451e-06, + "loss": 2.9514, + "step": 1546000 + }, + { + "epoch": 12.67, + "learning_rate": 4.948133291686532e-06, + "loss": 2.9557, + "step": 1546500 + }, + { + "epoch": 12.67, + "learning_rate": 4.946489338966612e-06, + "loss": 2.9524, + "step": 1547000 + }, + { + "epoch": 12.68, + "learning_rate": 4.944845386246692e-06, + "loss": 2.9492, + "step": 1547500 + }, + { + "epoch": 12.68, + "learning_rate": 4.943201433526772e-06, + "loss": 2.9557, + "step": 1548000 + }, + { + "epoch": 12.69, + "learning_rate": 4.941557480806852e-06, + "loss": 2.9515, + "step": 1548500 + }, + { + "epoch": 12.69, + "learning_rate": 4.9399135280869325e-06, + "loss": 2.9559, + "step": 1549000 + }, + { + "epoch": 12.69, + "learning_rate": 4.938269575367013e-06, + "loss": 2.9511, + "step": 1549500 + }, + { + "epoch": 12.7, + "learning_rate": 4.936625622647093e-06, + "loss": 2.9542, + "step": 1550000 + }, + { + "epoch": 12.7, + "learning_rate": 4.934981669927174e-06, + "loss": 2.9592, + "step": 1550500 + }, + { + "epoch": 12.71, + "learning_rate": 4.933337717207253e-06, + "loss": 2.95, + "step": 1551000 + }, + { + "epoch": 12.71, + "learning_rate": 4.931693764487334e-06, + "loss": 2.9537, + "step": 1551500 + }, + { + "epoch": 12.72, + "learning_rate": 4.930049811767414e-06, + "loss": 2.9526, + "step": 1552000 + }, + { + "epoch": 12.72, + "learning_rate": 4.928405859047494e-06, + "loss": 2.9549, + "step": 1552500 + }, + { + "epoch": 12.72, + "learning_rate": 4.926761906327574e-06, + "loss": 2.9513, + "step": 1553000 + }, + { + "epoch": 12.73, + "learning_rate": 4.9251179536076545e-06, + "loss": 2.9561, + "step": 1553500 + }, + { + "epoch": 12.73, + "learning_rate": 4.923474000887735e-06, + "loss": 2.9589, + "step": 1554000 + }, + { + "epoch": 12.74, + "learning_rate": 4.921830048167815e-06, + "loss": 2.9527, + "step": 1554500 + }, + { + "epoch": 12.74, + "learning_rate": 4.920186095447895e-06, + "loss": 2.9527, + "step": 1555000 + }, + { + "epoch": 12.74, + "learning_rate": 4.918542142727976e-06, + "loss": 2.954, + "step": 1555500 + }, + { + "epoch": 12.75, + "learning_rate": 4.916898190008056e-06, + "loss": 2.943, + "step": 1556000 + }, + { + "epoch": 12.75, + "learning_rate": 4.915254237288136e-06, + "loss": 2.9428, + "step": 1556500 + }, + { + "epoch": 12.76, + "learning_rate": 4.913610284568216e-06, + "loss": 2.9548, + "step": 1557000 + }, + { + "epoch": 12.76, + "learning_rate": 4.911966331848296e-06, + "loss": 2.9565, + "step": 1557500 + }, + { + "epoch": 12.76, + "learning_rate": 4.9103223791283765e-06, + "loss": 2.9475, + "step": 1558000 + }, + { + "epoch": 12.77, + "learning_rate": 4.908678426408457e-06, + "loss": 2.9534, + "step": 1558500 + }, + { + "epoch": 12.77, + "learning_rate": 4.907034473688538e-06, + "loss": 2.9567, + "step": 1559000 + }, + { + "epoch": 12.78, + "learning_rate": 4.905390520968618e-06, + "loss": 2.9496, + "step": 1559500 + }, + { + "epoch": 12.78, + "learning_rate": 4.903746568248697e-06, + "loss": 2.9544, + "step": 1560000 + }, + { + "epoch": 12.78, + "eval_accuracy": 0.4959430255891887, + "eval_loss": 2.8146307468414307, + "eval_runtime": 404.6908, + "eval_samples_per_second": 761.955, + "eval_steps_per_second": 15.876, + "step": 1560000 + }, + { + "epoch": 12.78, + "learning_rate": 4.902102615528778e-06, + "loss": 2.9512, + "step": 1560500 + }, + { + "epoch": 12.79, + "learning_rate": 4.900458662808858e-06, + "loss": 2.9506, + "step": 1561000 + }, + { + "epoch": 12.79, + "learning_rate": 4.898814710088938e-06, + "loss": 2.9507, + "step": 1561500 + }, + { + "epoch": 12.8, + "learning_rate": 4.897170757369018e-06, + "loss": 2.9485, + "step": 1562000 + }, + { + "epoch": 12.8, + "learning_rate": 4.8955268046490985e-06, + "loss": 2.9472, + "step": 1562500 + }, + { + "epoch": 12.81, + "learning_rate": 4.893882851929179e-06, + "loss": 2.9466, + "step": 1563000 + }, + { + "epoch": 12.81, + "learning_rate": 4.892238899209259e-06, + "loss": 2.9461, + "step": 1563500 + }, + { + "epoch": 12.81, + "learning_rate": 4.890594946489339e-06, + "loss": 2.9519, + "step": 1564000 + }, + { + "epoch": 12.82, + "learning_rate": 4.88895099376942e-06, + "loss": 2.9548, + "step": 1564500 + }, + { + "epoch": 12.82, + "learning_rate": 4.8873070410495e-06, + "loss": 2.9514, + "step": 1565000 + }, + { + "epoch": 12.83, + "learning_rate": 4.88566308832958e-06, + "loss": 2.9515, + "step": 1565500 + }, + { + "epoch": 12.83, + "learning_rate": 4.88401913560966e-06, + "loss": 2.9448, + "step": 1566000 + }, + { + "epoch": 12.83, + "learning_rate": 4.88237518288974e-06, + "loss": 2.9534, + "step": 1566500 + }, + { + "epoch": 12.84, + "learning_rate": 4.8807312301698205e-06, + "loss": 2.9453, + "step": 1567000 + }, + { + "epoch": 12.84, + "learning_rate": 4.879087277449901e-06, + "loss": 2.95, + "step": 1567500 + }, + { + "epoch": 12.85, + "learning_rate": 4.877443324729982e-06, + "loss": 2.9518, + "step": 1568000 + }, + { + "epoch": 12.85, + "learning_rate": 4.875799372010062e-06, + "loss": 2.9582, + "step": 1568500 + }, + { + "epoch": 12.85, + "learning_rate": 4.874155419290141e-06, + "loss": 2.9535, + "step": 1569000 + }, + { + "epoch": 12.86, + "learning_rate": 4.872511466570222e-06, + "loss": 2.9523, + "step": 1569500 + }, + { + "epoch": 12.86, + "learning_rate": 4.870867513850302e-06, + "loss": 2.9578, + "step": 1570000 + }, + { + "epoch": 12.87, + "learning_rate": 4.869223561130382e-06, + "loss": 2.95, + "step": 1570500 + }, + { + "epoch": 12.87, + "learning_rate": 4.867579608410462e-06, + "loss": 2.9472, + "step": 1571000 + }, + { + "epoch": 12.88, + "learning_rate": 4.865935655690543e-06, + "loss": 2.9505, + "step": 1571500 + }, + { + "epoch": 12.88, + "learning_rate": 4.864291702970623e-06, + "loss": 2.9519, + "step": 1572000 + }, + { + "epoch": 12.88, + "learning_rate": 4.862647750250703e-06, + "loss": 2.939, + "step": 1572500 + }, + { + "epoch": 12.89, + "learning_rate": 4.861003797530784e-06, + "loss": 2.9494, + "step": 1573000 + }, + { + "epoch": 12.89, + "learning_rate": 4.859359844810864e-06, + "loss": 2.9491, + "step": 1573500 + }, + { + "epoch": 12.9, + "learning_rate": 4.857715892090944e-06, + "loss": 2.9524, + "step": 1574000 + }, + { + "epoch": 12.9, + "learning_rate": 4.856071939371024e-06, + "loss": 2.9501, + "step": 1574500 + }, + { + "epoch": 12.9, + "learning_rate": 4.854427986651104e-06, + "loss": 2.9494, + "step": 1575000 + }, + { + "epoch": 12.91, + "learning_rate": 4.852784033931184e-06, + "loss": 2.949, + "step": 1575500 + }, + { + "epoch": 12.91, + "learning_rate": 4.8511400812112645e-06, + "loss": 2.9445, + "step": 1576000 + }, + { + "epoch": 12.92, + "learning_rate": 4.849496128491345e-06, + "loss": 2.9457, + "step": 1576500 + }, + { + "epoch": 12.92, + "learning_rate": 4.8478521757714256e-06, + "loss": 2.9505, + "step": 1577000 + }, + { + "epoch": 12.92, + "learning_rate": 4.846208223051506e-06, + "loss": 2.9521, + "step": 1577500 + }, + { + "epoch": 12.93, + "learning_rate": 4.844564270331585e-06, + "loss": 2.9494, + "step": 1578000 + }, + { + "epoch": 12.93, + "learning_rate": 4.842920317611666e-06, + "loss": 2.9524, + "step": 1578500 + }, + { + "epoch": 12.94, + "learning_rate": 4.841276364891746e-06, + "loss": 2.9477, + "step": 1579000 + }, + { + "epoch": 12.94, + "learning_rate": 4.839632412171826e-06, + "loss": 2.947, + "step": 1579500 + }, + { + "epoch": 12.94, + "learning_rate": 4.837988459451906e-06, + "loss": 2.9519, + "step": 1580000 + }, + { + "epoch": 12.95, + "learning_rate": 4.836344506731987e-06, + "loss": 2.95, + "step": 1580500 + }, + { + "epoch": 12.95, + "learning_rate": 4.834700554012067e-06, + "loss": 2.9561, + "step": 1581000 + }, + { + "epoch": 12.96, + "learning_rate": 4.833056601292147e-06, + "loss": 2.9502, + "step": 1581500 + }, + { + "epoch": 12.96, + "learning_rate": 4.831412648572228e-06, + "loss": 2.9511, + "step": 1582000 + }, + { + "epoch": 12.97, + "learning_rate": 4.829768695852308e-06, + "loss": 2.9545, + "step": 1582500 + }, + { + "epoch": 12.97, + "learning_rate": 4.828124743132388e-06, + "loss": 2.9486, + "step": 1583000 + }, + { + "epoch": 12.97, + "learning_rate": 4.826480790412468e-06, + "loss": 2.9494, + "step": 1583500 + }, + { + "epoch": 12.98, + "learning_rate": 4.824836837692549e-06, + "loss": 2.9427, + "step": 1584000 + }, + { + "epoch": 12.98, + "learning_rate": 4.823192884972628e-06, + "loss": 2.9539, + "step": 1584500 + }, + { + "epoch": 12.99, + "learning_rate": 4.8215489322527084e-06, + "loss": 2.9488, + "step": 1585000 + }, + { + "epoch": 12.99, + "learning_rate": 4.819904979532789e-06, + "loss": 2.9474, + "step": 1585500 + }, + { + "epoch": 12.99, + "learning_rate": 4.8182610268128695e-06, + "loss": 2.9436, + "step": 1586000 + }, + { + "epoch": 13.0, + "learning_rate": 4.81661707409295e-06, + "loss": 2.9426, + "step": 1586500 + }, + { + "epoch": 13.0, + "learning_rate": 4.81497312137303e-06, + "loss": 2.9475, + "step": 1587000 + }, + { + "epoch": 13.01, + "learning_rate": 4.81332916865311e-06, + "loss": 2.9566, + "step": 1587500 + }, + { + "epoch": 13.01, + "learning_rate": 4.81168521593319e-06, + "loss": 2.9528, + "step": 1588000 + }, + { + "epoch": 13.01, + "learning_rate": 4.81004126321327e-06, + "loss": 2.9459, + "step": 1588500 + }, + { + "epoch": 13.02, + "learning_rate": 4.80839731049335e-06, + "loss": 2.9478, + "step": 1589000 + }, + { + "epoch": 13.02, + "learning_rate": 4.806753357773431e-06, + "loss": 2.9457, + "step": 1589500 + }, + { + "epoch": 13.03, + "learning_rate": 4.805109405053511e-06, + "loss": 2.9486, + "step": 1590000 + }, + { + "epoch": 13.03, + "eval_accuracy": 0.4963652221640762, + "eval_loss": 2.8131563663482666, + "eval_runtime": 407.4078, + "eval_samples_per_second": 756.873, + "eval_steps_per_second": 15.77, + "step": 1590000 + }, + { + "epoch": 13.03, + "learning_rate": 4.803465452333591e-06, + "loss": 2.9509, + "step": 1590500 + }, + { + "epoch": 13.03, + "learning_rate": 4.801821499613672e-06, + "loss": 2.9485, + "step": 1591000 + }, + { + "epoch": 13.04, + "learning_rate": 4.800177546893752e-06, + "loss": 2.9573, + "step": 1591500 + }, + { + "epoch": 13.04, + "learning_rate": 4.798533594173832e-06, + "loss": 2.9446, + "step": 1592000 + }, + { + "epoch": 13.05, + "learning_rate": 4.796889641453912e-06, + "loss": 2.9451, + "step": 1592500 + }, + { + "epoch": 13.05, + "learning_rate": 4.795245688733993e-06, + "loss": 2.9395, + "step": 1593000 + }, + { + "epoch": 13.06, + "learning_rate": 4.793601736014072e-06, + "loss": 2.9443, + "step": 1593500 + }, + { + "epoch": 13.06, + "learning_rate": 4.791957783294152e-06, + "loss": 2.9481, + "step": 1594000 + }, + { + "epoch": 13.06, + "learning_rate": 4.790313830574233e-06, + "loss": 2.9436, + "step": 1594500 + }, + { + "epoch": 13.07, + "learning_rate": 4.7886698778543135e-06, + "loss": 2.95, + "step": 1595000 + }, + { + "epoch": 13.07, + "learning_rate": 4.787025925134394e-06, + "loss": 2.9522, + "step": 1595500 + }, + { + "epoch": 13.08, + "learning_rate": 4.785381972414474e-06, + "loss": 2.9437, + "step": 1596000 + }, + { + "epoch": 13.08, + "learning_rate": 4.783738019694554e-06, + "loss": 2.9467, + "step": 1596500 + }, + { + "epoch": 13.08, + "learning_rate": 4.782094066974634e-06, + "loss": 2.9474, + "step": 1597000 + }, + { + "epoch": 13.09, + "learning_rate": 4.780450114254714e-06, + "loss": 2.945, + "step": 1597500 + }, + { + "epoch": 13.09, + "learning_rate": 4.778806161534795e-06, + "loss": 2.9562, + "step": 1598000 + }, + { + "epoch": 13.1, + "learning_rate": 4.777162208814875e-06, + "loss": 2.9497, + "step": 1598500 + }, + { + "epoch": 13.1, + "learning_rate": 4.775518256094955e-06, + "loss": 2.9503, + "step": 1599000 + }, + { + "epoch": 13.1, + "learning_rate": 4.7738743033750355e-06, + "loss": 2.9489, + "step": 1599500 + }, + { + "epoch": 13.11, + "learning_rate": 4.772230350655116e-06, + "loss": 2.9467, + "step": 1600000 + }, + { + "epoch": 13.11, + "learning_rate": 4.770586397935196e-06, + "loss": 2.9436, + "step": 1600500 + }, + { + "epoch": 13.12, + "learning_rate": 4.768942445215276e-06, + "loss": 2.9546, + "step": 1601000 + }, + { + "epoch": 13.12, + "learning_rate": 4.767298492495356e-06, + "loss": 2.9411, + "step": 1601500 + }, + { + "epoch": 13.12, + "learning_rate": 4.765654539775437e-06, + "loss": 2.9413, + "step": 1602000 + }, + { + "epoch": 13.13, + "learning_rate": 4.764010587055516e-06, + "loss": 2.9436, + "step": 1602500 + }, + { + "epoch": 13.13, + "learning_rate": 4.762366634335596e-06, + "loss": 2.9467, + "step": 1603000 + }, + { + "epoch": 13.14, + "learning_rate": 4.760722681615677e-06, + "loss": 2.9432, + "step": 1603500 + }, + { + "epoch": 13.14, + "learning_rate": 4.7590787288957575e-06, + "loss": 2.9429, + "step": 1604000 + }, + { + "epoch": 13.15, + "learning_rate": 4.757434776175838e-06, + "loss": 2.9474, + "step": 1604500 + }, + { + "epoch": 13.15, + "learning_rate": 4.755790823455918e-06, + "loss": 2.9524, + "step": 1605000 + }, + { + "epoch": 13.15, + "learning_rate": 4.754146870735998e-06, + "loss": 2.9471, + "step": 1605500 + }, + { + "epoch": 13.16, + "learning_rate": 4.752502918016078e-06, + "loss": 2.9486, + "step": 1606000 + }, + { + "epoch": 13.16, + "learning_rate": 4.750858965296158e-06, + "loss": 2.9405, + "step": 1606500 + }, + { + "epoch": 13.17, + "learning_rate": 4.749215012576239e-06, + "loss": 2.9477, + "step": 1607000 + }, + { + "epoch": 13.17, + "learning_rate": 4.747571059856319e-06, + "loss": 2.9465, + "step": 1607500 + }, + { + "epoch": 13.17, + "learning_rate": 4.745927107136399e-06, + "loss": 2.9527, + "step": 1608000 + }, + { + "epoch": 13.18, + "learning_rate": 4.7442831544164795e-06, + "loss": 2.947, + "step": 1608500 + }, + { + "epoch": 13.18, + "learning_rate": 4.74263920169656e-06, + "loss": 2.9366, + "step": 1609000 + }, + { + "epoch": 13.19, + "learning_rate": 4.74099524897664e-06, + "loss": 2.9444, + "step": 1609500 + }, + { + "epoch": 13.19, + "learning_rate": 4.73935129625672e-06, + "loss": 2.9487, + "step": 1610000 + }, + { + "epoch": 13.19, + "learning_rate": 4.737707343536801e-06, + "loss": 2.9506, + "step": 1610500 + }, + { + "epoch": 13.2, + "learning_rate": 4.736063390816881e-06, + "loss": 2.9445, + "step": 1611000 + }, + { + "epoch": 13.2, + "learning_rate": 4.73441943809696e-06, + "loss": 2.9486, + "step": 1611500 + }, + { + "epoch": 13.21, + "learning_rate": 4.732775485377041e-06, + "loss": 2.9435, + "step": 1612000 + }, + { + "epoch": 13.21, + "learning_rate": 4.731131532657121e-06, + "loss": 2.9416, + "step": 1612500 + }, + { + "epoch": 13.22, + "learning_rate": 4.7294875799372014e-06, + "loss": 2.9449, + "step": 1613000 + }, + { + "epoch": 13.22, + "learning_rate": 4.7278436272172816e-06, + "loss": 2.95, + "step": 1613500 + }, + { + "epoch": 13.22, + "learning_rate": 4.726199674497362e-06, + "loss": 2.9506, + "step": 1614000 + }, + { + "epoch": 13.23, + "learning_rate": 4.724555721777442e-06, + "loss": 2.9452, + "step": 1614500 + }, + { + "epoch": 13.23, + "learning_rate": 4.722911769057522e-06, + "loss": 2.9432, + "step": 1615000 + }, + { + "epoch": 13.24, + "learning_rate": 4.721267816337602e-06, + "loss": 2.9425, + "step": 1615500 + }, + { + "epoch": 13.24, + "learning_rate": 4.719623863617683e-06, + "loss": 2.9451, + "step": 1616000 + }, + { + "epoch": 13.24, + "learning_rate": 4.717979910897763e-06, + "loss": 2.9468, + "step": 1616500 + }, + { + "epoch": 13.25, + "learning_rate": 4.716335958177843e-06, + "loss": 2.9504, + "step": 1617000 + }, + { + "epoch": 13.25, + "learning_rate": 4.7146920054579234e-06, + "loss": 2.9464, + "step": 1617500 + }, + { + "epoch": 13.26, + "learning_rate": 4.7130480527380036e-06, + "loss": 2.9482, + "step": 1618000 + }, + { + "epoch": 13.26, + "learning_rate": 4.711404100018084e-06, + "loss": 2.9461, + "step": 1618500 + }, + { + "epoch": 13.26, + "learning_rate": 4.709760147298164e-06, + "loss": 2.9471, + "step": 1619000 + }, + { + "epoch": 13.27, + "learning_rate": 4.708116194578245e-06, + "loss": 2.9439, + "step": 1619500 + }, + { + "epoch": 13.27, + "learning_rate": 4.706472241858325e-06, + "loss": 2.9413, + "step": 1620000 + }, + { + "epoch": 13.27, + "eval_accuracy": 0.49669974826700375, + "eval_loss": 2.809884786605835, + "eval_runtime": 410.3222, + "eval_samples_per_second": 751.497, + "eval_steps_per_second": 15.658, + "step": 1620000 + }, + { + "epoch": 13.28, + "learning_rate": 4.704828289138404e-06, + "loss": 2.9414, + "step": 1620500 + }, + { + "epoch": 13.28, + "learning_rate": 4.703184336418485e-06, + "loss": 2.9411, + "step": 1621000 + }, + { + "epoch": 13.28, + "learning_rate": 4.701540383698565e-06, + "loss": 2.9508, + "step": 1621500 + }, + { + "epoch": 13.29, + "learning_rate": 4.699896430978645e-06, + "loss": 2.9504, + "step": 1622000 + }, + { + "epoch": 13.29, + "learning_rate": 4.6982524782587255e-06, + "loss": 2.9523, + "step": 1622500 + }, + { + "epoch": 13.3, + "learning_rate": 4.696608525538806e-06, + "loss": 2.9378, + "step": 1623000 + }, + { + "epoch": 13.3, + "learning_rate": 4.694964572818886e-06, + "loss": 2.9436, + "step": 1623500 + }, + { + "epoch": 13.31, + "learning_rate": 4.693320620098966e-06, + "loss": 2.9432, + "step": 1624000 + }, + { + "epoch": 13.31, + "learning_rate": 4.691676667379047e-06, + "loss": 2.9492, + "step": 1624500 + }, + { + "epoch": 13.31, + "learning_rate": 4.690032714659127e-06, + "loss": 2.9463, + "step": 1625000 + }, + { + "epoch": 13.32, + "learning_rate": 4.688388761939207e-06, + "loss": 2.9491, + "step": 1625500 + }, + { + "epoch": 13.32, + "learning_rate": 4.686744809219287e-06, + "loss": 2.9386, + "step": 1626000 + }, + { + "epoch": 13.33, + "learning_rate": 4.685100856499367e-06, + "loss": 2.9441, + "step": 1626500 + }, + { + "epoch": 13.33, + "learning_rate": 4.6834569037794475e-06, + "loss": 2.9448, + "step": 1627000 + }, + { + "epoch": 13.33, + "learning_rate": 4.681812951059528e-06, + "loss": 2.9372, + "step": 1627500 + }, + { + "epoch": 13.34, + "learning_rate": 4.680168998339608e-06, + "loss": 2.9463, + "step": 1628000 + }, + { + "epoch": 13.34, + "learning_rate": 4.678525045619689e-06, + "loss": 2.9438, + "step": 1628500 + }, + { + "epoch": 13.35, + "learning_rate": 4.676881092899769e-06, + "loss": 2.9344, + "step": 1629000 + }, + { + "epoch": 13.35, + "learning_rate": 4.675237140179849e-06, + "loss": 2.9421, + "step": 1629500 + }, + { + "epoch": 13.35, + "learning_rate": 4.673593187459929e-06, + "loss": 2.9386, + "step": 1630000 + }, + { + "epoch": 13.36, + "learning_rate": 4.671949234740009e-06, + "loss": 2.9458, + "step": 1630500 + }, + { + "epoch": 13.36, + "learning_rate": 4.670305282020089e-06, + "loss": 2.9415, + "step": 1631000 + }, + { + "epoch": 13.37, + "learning_rate": 4.6686613293001695e-06, + "loss": 2.9457, + "step": 1631500 + }, + { + "epoch": 13.37, + "learning_rate": 4.6670173765802505e-06, + "loss": 2.9435, + "step": 1632000 + }, + { + "epoch": 13.37, + "learning_rate": 4.665373423860331e-06, + "loss": 2.9474, + "step": 1632500 + }, + { + "epoch": 13.38, + "learning_rate": 4.66372947114041e-06, + "loss": 2.9461, + "step": 1633000 + }, + { + "epoch": 13.38, + "learning_rate": 4.662085518420491e-06, + "loss": 2.9435, + "step": 1633500 + }, + { + "epoch": 13.39, + "learning_rate": 4.660441565700571e-06, + "loss": 2.9385, + "step": 1634000 + }, + { + "epoch": 13.39, + "learning_rate": 4.658797612980651e-06, + "loss": 2.9458, + "step": 1634500 + }, + { + "epoch": 13.4, + "learning_rate": 4.657153660260731e-06, + "loss": 2.9402, + "step": 1635000 + }, + { + "epoch": 13.4, + "learning_rate": 4.655509707540811e-06, + "loss": 2.9385, + "step": 1635500 + }, + { + "epoch": 13.4, + "learning_rate": 4.6538657548208915e-06, + "loss": 2.9452, + "step": 1636000 + }, + { + "epoch": 13.41, + "learning_rate": 4.652221802100972e-06, + "loss": 2.9495, + "step": 1636500 + }, + { + "epoch": 13.41, + "learning_rate": 4.650577849381052e-06, + "loss": 2.9466, + "step": 1637000 + }, + { + "epoch": 13.42, + "learning_rate": 4.648933896661133e-06, + "loss": 2.9438, + "step": 1637500 + }, + { + "epoch": 13.42, + "learning_rate": 4.647289943941213e-06, + "loss": 2.9452, + "step": 1638000 + }, + { + "epoch": 13.42, + "learning_rate": 4.645645991221293e-06, + "loss": 2.9386, + "step": 1638500 + }, + { + "epoch": 13.43, + "learning_rate": 4.644002038501373e-06, + "loss": 2.9393, + "step": 1639000 + }, + { + "epoch": 13.43, + "learning_rate": 4.642358085781453e-06, + "loss": 2.9346, + "step": 1639500 + }, + { + "epoch": 13.44, + "learning_rate": 4.640714133061533e-06, + "loss": 2.9463, + "step": 1640000 + }, + { + "epoch": 13.44, + "learning_rate": 4.6390701803416135e-06, + "loss": 2.9433, + "step": 1640500 + }, + { + "epoch": 13.44, + "learning_rate": 4.6374262276216945e-06, + "loss": 2.9408, + "step": 1641000 + }, + { + "epoch": 13.45, + "learning_rate": 4.635782274901775e-06, + "loss": 2.9425, + "step": 1641500 + }, + { + "epoch": 13.45, + "learning_rate": 4.634138322181854e-06, + "loss": 2.9493, + "step": 1642000 + }, + { + "epoch": 13.46, + "learning_rate": 4.632494369461935e-06, + "loss": 2.9481, + "step": 1642500 + }, + { + "epoch": 13.46, + "learning_rate": 4.630850416742015e-06, + "loss": 2.9438, + "step": 1643000 + }, + { + "epoch": 13.46, + "learning_rate": 4.629206464022095e-06, + "loss": 2.9436, + "step": 1643500 + }, + { + "epoch": 13.47, + "learning_rate": 4.627562511302175e-06, + "loss": 2.9404, + "step": 1644000 + }, + { + "epoch": 13.47, + "learning_rate": 4.625918558582256e-06, + "loss": 2.9467, + "step": 1644500 + }, + { + "epoch": 13.48, + "learning_rate": 4.6242746058623355e-06, + "loss": 2.9504, + "step": 1645000 + }, + { + "epoch": 13.48, + "learning_rate": 4.622630653142416e-06, + "loss": 2.947, + "step": 1645500 + }, + { + "epoch": 13.49, + "learning_rate": 4.6209867004224966e-06, + "loss": 2.9411, + "step": 1646000 + }, + { + "epoch": 13.49, + "learning_rate": 4.619342747702577e-06, + "loss": 2.9402, + "step": 1646500 + }, + { + "epoch": 13.49, + "learning_rate": 4.617698794982657e-06, + "loss": 2.9397, + "step": 1647000 + }, + { + "epoch": 13.5, + "learning_rate": 4.616054842262737e-06, + "loss": 2.9406, + "step": 1647500 + }, + { + "epoch": 13.5, + "learning_rate": 4.614410889542817e-06, + "loss": 2.9432, + "step": 1648000 + }, + { + "epoch": 13.51, + "learning_rate": 4.612766936822897e-06, + "loss": 2.9428, + "step": 1648500 + }, + { + "epoch": 13.51, + "learning_rate": 4.611122984102977e-06, + "loss": 2.9471, + "step": 1649000 + }, + { + "epoch": 13.51, + "learning_rate": 4.6094790313830575e-06, + "loss": 2.944, + "step": 1649500 + }, + { + "epoch": 13.52, + "learning_rate": 4.6078350786631384e-06, + "loss": 2.9381, + "step": 1650000 + }, + { + "epoch": 13.52, + "eval_accuracy": 0.49680267143539775, + "eval_loss": 2.8081107139587402, + "eval_runtime": 403.5601, + "eval_samples_per_second": 764.089, + "eval_steps_per_second": 15.921, + "step": 1650000 + }, + { + "epoch": 13.52, + "learning_rate": 4.6061911259432186e-06, + "loss": 2.9407, + "step": 1650500 + }, + { + "epoch": 13.53, + "learning_rate": 4.604547173223298e-06, + "loss": 2.9429, + "step": 1651000 + }, + { + "epoch": 13.53, + "learning_rate": 4.602903220503379e-06, + "loss": 2.9429, + "step": 1651500 + }, + { + "epoch": 13.53, + "learning_rate": 4.601259267783459e-06, + "loss": 2.9433, + "step": 1652000 + }, + { + "epoch": 13.54, + "learning_rate": 4.599615315063539e-06, + "loss": 2.9313, + "step": 1652500 + }, + { + "epoch": 13.54, + "learning_rate": 4.597971362343619e-06, + "loss": 2.942, + "step": 1653000 + }, + { + "epoch": 13.55, + "learning_rate": 4.5963274096237e-06, + "loss": 2.9498, + "step": 1653500 + }, + { + "epoch": 13.55, + "learning_rate": 4.5946834569037794e-06, + "loss": 2.9326, + "step": 1654000 + }, + { + "epoch": 13.56, + "learning_rate": 4.5930395041838596e-06, + "loss": 2.9415, + "step": 1654500 + }, + { + "epoch": 13.56, + "learning_rate": 4.5913955514639405e-06, + "loss": 2.9337, + "step": 1655000 + }, + { + "epoch": 13.56, + "learning_rate": 4.589751598744021e-06, + "loss": 2.9363, + "step": 1655500 + }, + { + "epoch": 13.57, + "learning_rate": 4.588107646024101e-06, + "loss": 2.943, + "step": 1656000 + }, + { + "epoch": 13.57, + "learning_rate": 4.586463693304181e-06, + "loss": 2.9443, + "step": 1656500 + }, + { + "epoch": 13.58, + "learning_rate": 4.584819740584261e-06, + "loss": 2.9424, + "step": 1657000 + }, + { + "epoch": 13.58, + "learning_rate": 4.583175787864341e-06, + "loss": 2.9449, + "step": 1657500 + }, + { + "epoch": 13.58, + "learning_rate": 4.581531835144421e-06, + "loss": 2.9351, + "step": 1658000 + }, + { + "epoch": 13.59, + "learning_rate": 4.579887882424502e-06, + "loss": 2.9457, + "step": 1658500 + }, + { + "epoch": 13.59, + "learning_rate": 4.578243929704582e-06, + "loss": 2.9411, + "step": 1659000 + }, + { + "epoch": 13.6, + "learning_rate": 4.5765999769846625e-06, + "loss": 2.9469, + "step": 1659500 + }, + { + "epoch": 13.6, + "learning_rate": 4.574956024264743e-06, + "loss": 2.9427, + "step": 1660000 + }, + { + "epoch": 13.6, + "learning_rate": 4.573312071544823e-06, + "loss": 2.9381, + "step": 1660500 + }, + { + "epoch": 13.61, + "learning_rate": 4.571668118824903e-06, + "loss": 2.9367, + "step": 1661000 + }, + { + "epoch": 13.61, + "learning_rate": 4.570024166104983e-06, + "loss": 2.9354, + "step": 1661500 + }, + { + "epoch": 13.62, + "learning_rate": 4.568380213385063e-06, + "loss": 2.9368, + "step": 1662000 + }, + { + "epoch": 13.62, + "learning_rate": 4.566736260665144e-06, + "loss": 2.948, + "step": 1662500 + }, + { + "epoch": 13.62, + "learning_rate": 4.565092307945223e-06, + "loss": 2.9395, + "step": 1663000 + }, + { + "epoch": 13.63, + "learning_rate": 4.5634483552253035e-06, + "loss": 2.942, + "step": 1663500 + }, + { + "epoch": 13.63, + "learning_rate": 4.5618044025053845e-06, + "loss": 2.933, + "step": 1664000 + }, + { + "epoch": 13.64, + "learning_rate": 4.560160449785465e-06, + "loss": 2.9397, + "step": 1664500 + }, + { + "epoch": 13.64, + "learning_rate": 4.558516497065545e-06, + "loss": 2.9435, + "step": 1665000 + }, + { + "epoch": 13.65, + "learning_rate": 4.556872544345625e-06, + "loss": 2.9398, + "step": 1665500 + }, + { + "epoch": 13.65, + "learning_rate": 4.555228591625705e-06, + "loss": 2.9433, + "step": 1666000 + }, + { + "epoch": 13.65, + "learning_rate": 4.553584638905785e-06, + "loss": 2.9367, + "step": 1666500 + }, + { + "epoch": 13.66, + "learning_rate": 4.551940686185865e-06, + "loss": 2.9471, + "step": 1667000 + }, + { + "epoch": 13.66, + "learning_rate": 4.550296733465946e-06, + "loss": 2.9446, + "step": 1667500 + }, + { + "epoch": 13.67, + "learning_rate": 4.548652780746026e-06, + "loss": 2.9373, + "step": 1668000 + }, + { + "epoch": 13.67, + "learning_rate": 4.5470088280261065e-06, + "loss": 2.9354, + "step": 1668500 + }, + { + "epoch": 13.67, + "learning_rate": 4.545364875306187e-06, + "loss": 2.9431, + "step": 1669000 + }, + { + "epoch": 13.68, + "learning_rate": 4.543720922586267e-06, + "loss": 2.9444, + "step": 1669500 + }, + { + "epoch": 13.68, + "learning_rate": 4.542076969866347e-06, + "loss": 2.9413, + "step": 1670000 + }, + { + "epoch": 13.69, + "learning_rate": 4.540433017146427e-06, + "loss": 2.9407, + "step": 1670500 + }, + { + "epoch": 13.69, + "learning_rate": 4.538789064426508e-06, + "loss": 2.9372, + "step": 1671000 + }, + { + "epoch": 13.69, + "learning_rate": 4.537145111706588e-06, + "loss": 2.9418, + "step": 1671500 + }, + { + "epoch": 13.7, + "learning_rate": 4.535501158986667e-06, + "loss": 2.9401, + "step": 1672000 + }, + { + "epoch": 13.7, + "learning_rate": 4.533857206266748e-06, + "loss": 2.9391, + "step": 1672500 + }, + { + "epoch": 13.71, + "learning_rate": 4.5322132535468285e-06, + "loss": 2.9417, + "step": 1673000 + }, + { + "epoch": 13.71, + "learning_rate": 4.530569300826909e-06, + "loss": 2.9413, + "step": 1673500 + }, + { + "epoch": 13.71, + "learning_rate": 4.528925348106989e-06, + "loss": 2.9382, + "step": 1674000 + }, + { + "epoch": 13.72, + "learning_rate": 4.527281395387069e-06, + "loss": 2.942, + "step": 1674500 + }, + { + "epoch": 13.72, + "learning_rate": 4.525637442667149e-06, + "loss": 2.9443, + "step": 1675000 + }, + { + "epoch": 13.73, + "learning_rate": 4.523993489947229e-06, + "loss": 2.9417, + "step": 1675500 + }, + { + "epoch": 13.73, + "learning_rate": 4.522349537227309e-06, + "loss": 2.9398, + "step": 1676000 + }, + { + "epoch": 13.74, + "learning_rate": 4.52070558450739e-06, + "loss": 2.943, + "step": 1676500 + }, + { + "epoch": 13.74, + "learning_rate": 4.51906163178747e-06, + "loss": 2.9367, + "step": 1677000 + }, + { + "epoch": 13.74, + "learning_rate": 4.5174176790675505e-06, + "loss": 2.9447, + "step": 1677500 + }, + { + "epoch": 13.75, + "learning_rate": 4.515773726347631e-06, + "loss": 2.9327, + "step": 1678000 + }, + { + "epoch": 13.75, + "learning_rate": 4.514129773627711e-06, + "loss": 2.9379, + "step": 1678500 + }, + { + "epoch": 13.76, + "learning_rate": 4.512485820907791e-06, + "loss": 2.9412, + "step": 1679000 + }, + { + "epoch": 13.76, + "learning_rate": 4.510841868187871e-06, + "loss": 2.9383, + "step": 1679500 + }, + { + "epoch": 13.76, + "learning_rate": 4.509197915467952e-06, + "loss": 2.9389, + "step": 1680000 + }, + { + "epoch": 13.76, + "eval_accuracy": 0.4972946717627968, + "eval_loss": 2.80572772026062, + "eval_runtime": 409.9534, + "eval_samples_per_second": 752.173, + "eval_steps_per_second": 15.673, + "step": 1680000 + }, + { + "epoch": 13.77, + "learning_rate": 4.507553962748032e-06, + "loss": 2.9444, + "step": 1680500 + }, + { + "epoch": 13.77, + "learning_rate": 4.505910010028112e-06, + "loss": 2.9413, + "step": 1681000 + }, + { + "epoch": 13.78, + "learning_rate": 4.504266057308192e-06, + "loss": 2.949, + "step": 1681500 + }, + { + "epoch": 13.78, + "learning_rate": 4.5026221045882725e-06, + "loss": 2.9386, + "step": 1682000 + }, + { + "epoch": 13.78, + "learning_rate": 4.500978151868353e-06, + "loss": 2.9448, + "step": 1682500 + }, + { + "epoch": 13.79, + "learning_rate": 4.499334199148433e-06, + "loss": 2.934, + "step": 1683000 + }, + { + "epoch": 13.79, + "learning_rate": 4.497690246428514e-06, + "loss": 2.9403, + "step": 1683500 + }, + { + "epoch": 13.8, + "learning_rate": 4.496046293708594e-06, + "loss": 2.9464, + "step": 1684000 + }, + { + "epoch": 13.8, + "learning_rate": 4.494402340988673e-06, + "loss": 2.9362, + "step": 1684500 + }, + { + "epoch": 13.8, + "learning_rate": 4.492758388268754e-06, + "loss": 2.9427, + "step": 1685000 + }, + { + "epoch": 13.81, + "learning_rate": 4.491114435548834e-06, + "loss": 2.939, + "step": 1685500 + }, + { + "epoch": 13.81, + "learning_rate": 4.489470482828914e-06, + "loss": 2.9373, + "step": 1686000 + }, + { + "epoch": 13.82, + "learning_rate": 4.4878265301089944e-06, + "loss": 2.941, + "step": 1686500 + }, + { + "epoch": 13.82, + "learning_rate": 4.4861825773890746e-06, + "loss": 2.9358, + "step": 1687000 + }, + { + "epoch": 13.83, + "learning_rate": 4.484538624669155e-06, + "loss": 2.9371, + "step": 1687500 + }, + { + "epoch": 13.83, + "learning_rate": 4.482894671949235e-06, + "loss": 2.9382, + "step": 1688000 + }, + { + "epoch": 13.83, + "learning_rate": 4.481250719229315e-06, + "loss": 2.931, + "step": 1688500 + }, + { + "epoch": 13.84, + "learning_rate": 4.479606766509396e-06, + "loss": 2.9357, + "step": 1689000 + }, + { + "epoch": 13.84, + "learning_rate": 4.477962813789476e-06, + "loss": 2.9389, + "step": 1689500 + }, + { + "epoch": 13.85, + "learning_rate": 4.476318861069556e-06, + "loss": 2.9362, + "step": 1690000 + }, + { + "epoch": 13.85, + "learning_rate": 4.474674908349636e-06, + "loss": 2.9395, + "step": 1690500 + }, + { + "epoch": 13.85, + "learning_rate": 4.473030955629716e-06, + "loss": 2.9368, + "step": 1691000 + }, + { + "epoch": 13.86, + "learning_rate": 4.4713870029097965e-06, + "loss": 2.9447, + "step": 1691500 + }, + { + "epoch": 13.86, + "learning_rate": 4.469743050189877e-06, + "loss": 2.9401, + "step": 1692000 + }, + { + "epoch": 13.87, + "learning_rate": 4.468099097469958e-06, + "loss": 2.9357, + "step": 1692500 + }, + { + "epoch": 13.87, + "learning_rate": 4.466455144750038e-06, + "loss": 2.9433, + "step": 1693000 + }, + { + "epoch": 13.87, + "learning_rate": 4.464811192030117e-06, + "loss": 2.9427, + "step": 1693500 + }, + { + "epoch": 13.88, + "learning_rate": 4.463167239310198e-06, + "loss": 2.9336, + "step": 1694000 + }, + { + "epoch": 13.88, + "learning_rate": 4.461523286590278e-06, + "loss": 2.9388, + "step": 1694500 + }, + { + "epoch": 13.89, + "learning_rate": 4.459879333870358e-06, + "loss": 2.9386, + "step": 1695000 + }, + { + "epoch": 13.89, + "learning_rate": 4.458235381150438e-06, + "loss": 2.9384, + "step": 1695500 + }, + { + "epoch": 13.9, + "learning_rate": 4.4565914284305185e-06, + "loss": 2.9361, + "step": 1696000 + }, + { + "epoch": 13.9, + "learning_rate": 4.454947475710599e-06, + "loss": 2.9363, + "step": 1696500 + }, + { + "epoch": 13.9, + "learning_rate": 4.453303522990679e-06, + "loss": 2.9361, + "step": 1697000 + }, + { + "epoch": 13.91, + "learning_rate": 4.45165957027076e-06, + "loss": 2.9355, + "step": 1697500 + }, + { + "epoch": 13.91, + "learning_rate": 4.45001561755084e-06, + "loss": 2.9356, + "step": 1698000 + }, + { + "epoch": 13.92, + "learning_rate": 4.44837166483092e-06, + "loss": 2.9391, + "step": 1698500 + }, + { + "epoch": 13.92, + "learning_rate": 4.446727712111e-06, + "loss": 2.9384, + "step": 1699000 + }, + { + "epoch": 13.92, + "learning_rate": 4.44508375939108e-06, + "loss": 2.9367, + "step": 1699500 + }, + { + "epoch": 13.93, + "learning_rate": 4.44343980667116e-06, + "loss": 2.9446, + "step": 1700000 + }, + { + "epoch": 13.93, + "learning_rate": 4.4417958539512405e-06, + "loss": 2.9439, + "step": 1700500 + }, + { + "epoch": 13.94, + "learning_rate": 4.440151901231321e-06, + "loss": 2.9381, + "step": 1701000 + }, + { + "epoch": 13.94, + "learning_rate": 4.438507948511402e-06, + "loss": 2.9345, + "step": 1701500 + }, + { + "epoch": 13.94, + "learning_rate": 4.436863995791482e-06, + "loss": 2.9391, + "step": 1702000 + }, + { + "epoch": 13.95, + "learning_rate": 4.435220043071561e-06, + "loss": 2.9316, + "step": 1702500 + }, + { + "epoch": 13.95, + "learning_rate": 4.433576090351642e-06, + "loss": 2.9351, + "step": 1703000 + }, + { + "epoch": 13.96, + "learning_rate": 4.431932137631722e-06, + "loss": 2.932, + "step": 1703500 + }, + { + "epoch": 13.96, + "learning_rate": 4.430288184911802e-06, + "loss": 2.938, + "step": 1704000 + }, + { + "epoch": 13.96, + "learning_rate": 4.428644232191882e-06, + "loss": 2.9325, + "step": 1704500 + }, + { + "epoch": 13.97, + "learning_rate": 4.427000279471963e-06, + "loss": 2.9388, + "step": 1705000 + }, + { + "epoch": 13.97, + "learning_rate": 4.425356326752043e-06, + "loss": 2.9338, + "step": 1705500 + }, + { + "epoch": 13.98, + "learning_rate": 4.423712374032123e-06, + "loss": 2.9372, + "step": 1706000 + }, + { + "epoch": 13.98, + "learning_rate": 4.422068421312204e-06, + "loss": 2.9421, + "step": 1706500 + }, + { + "epoch": 13.99, + "learning_rate": 4.420424468592284e-06, + "loss": 2.9381, + "step": 1707000 + }, + { + "epoch": 13.99, + "learning_rate": 4.418780515872364e-06, + "loss": 2.9352, + "step": 1707500 + }, + { + "epoch": 13.99, + "learning_rate": 4.417136563152444e-06, + "loss": 2.9376, + "step": 1708000 + }, + { + "epoch": 14.0, + "learning_rate": 4.415492610432524e-06, + "loss": 2.9373, + "step": 1708500 + }, + { + "epoch": 14.0, + "learning_rate": 4.413848657712604e-06, + "loss": 2.9386, + "step": 1709000 + }, + { + "epoch": 14.01, + "learning_rate": 4.4122047049926845e-06, + "loss": 2.9335, + "step": 1709500 + }, + { + "epoch": 14.01, + "learning_rate": 4.410560752272765e-06, + "loss": 2.9374, + "step": 1710000 + }, + { + "epoch": 14.01, + "eval_accuracy": 0.4976944853269889, + "eval_loss": 2.8028008937835693, + "eval_runtime": 411.9878, + "eval_samples_per_second": 748.459, + "eval_steps_per_second": 15.595, + "step": 1710000 + }, + { + "epoch": 14.01, + "learning_rate": 4.408916799552846e-06, + "loss": 2.9313, + "step": 1710500 + }, + { + "epoch": 14.02, + "learning_rate": 4.407272846832926e-06, + "loss": 2.9383, + "step": 1711000 + }, + { + "epoch": 14.02, + "learning_rate": 4.405628894113006e-06, + "loss": 2.939, + "step": 1711500 + }, + { + "epoch": 14.03, + "learning_rate": 4.403984941393086e-06, + "loss": 2.9374, + "step": 1712000 + }, + { + "epoch": 14.03, + "learning_rate": 4.402340988673166e-06, + "loss": 2.9351, + "step": 1712500 + }, + { + "epoch": 14.03, + "learning_rate": 4.400697035953246e-06, + "loss": 2.9335, + "step": 1713000 + }, + { + "epoch": 14.04, + "learning_rate": 4.399053083233326e-06, + "loss": 2.9393, + "step": 1713500 + }, + { + "epoch": 14.04, + "learning_rate": 4.397409130513407e-06, + "loss": 2.9374, + "step": 1714000 + }, + { + "epoch": 14.05, + "learning_rate": 4.395765177793487e-06, + "loss": 2.9355, + "step": 1714500 + }, + { + "epoch": 14.05, + "learning_rate": 4.394121225073567e-06, + "loss": 2.9253, + "step": 1715000 + }, + { + "epoch": 14.05, + "learning_rate": 4.392477272353648e-06, + "loss": 2.9334, + "step": 1715500 + }, + { + "epoch": 14.06, + "learning_rate": 4.390833319633728e-06, + "loss": 2.9287, + "step": 1716000 + }, + { + "epoch": 14.06, + "learning_rate": 4.389189366913808e-06, + "loss": 2.9363, + "step": 1716500 + }, + { + "epoch": 14.07, + "learning_rate": 4.387545414193888e-06, + "loss": 2.9334, + "step": 1717000 + }, + { + "epoch": 14.07, + "learning_rate": 4.385901461473968e-06, + "loss": 2.9396, + "step": 1717500 + }, + { + "epoch": 14.08, + "learning_rate": 4.384257508754048e-06, + "loss": 2.9314, + "step": 1718000 + }, + { + "epoch": 14.08, + "learning_rate": 4.3826135560341285e-06, + "loss": 2.9284, + "step": 1718500 + }, + { + "epoch": 14.08, + "learning_rate": 4.3809696033142094e-06, + "loss": 2.9397, + "step": 1719000 + }, + { + "epoch": 14.09, + "learning_rate": 4.3793256505942896e-06, + "loss": 2.9268, + "step": 1719500 + }, + { + "epoch": 14.09, + "learning_rate": 4.37768169787437e-06, + "loss": 2.9344, + "step": 1720000 + }, + { + "epoch": 14.1, + "learning_rate": 4.37603774515445e-06, + "loss": 2.9336, + "step": 1720500 + }, + { + "epoch": 14.1, + "learning_rate": 4.37439379243453e-06, + "loss": 2.9399, + "step": 1721000 + }, + { + "epoch": 14.1, + "learning_rate": 4.37274983971461e-06, + "loss": 2.9335, + "step": 1721500 + }, + { + "epoch": 14.11, + "learning_rate": 4.37110588699469e-06, + "loss": 2.9299, + "step": 1722000 + }, + { + "epoch": 14.11, + "learning_rate": 4.36946193427477e-06, + "loss": 2.9353, + "step": 1722500 + }, + { + "epoch": 14.12, + "learning_rate": 4.367817981554851e-06, + "loss": 2.9282, + "step": 1723000 + }, + { + "epoch": 14.12, + "learning_rate": 4.3661740288349306e-06, + "loss": 2.9343, + "step": 1723500 + }, + { + "epoch": 14.12, + "learning_rate": 4.364530076115011e-06, + "loss": 2.9299, + "step": 1724000 + }, + { + "epoch": 14.13, + "learning_rate": 4.362886123395092e-06, + "loss": 2.9317, + "step": 1724500 + }, + { + "epoch": 14.13, + "learning_rate": 4.361242170675172e-06, + "loss": 2.9287, + "step": 1725000 + }, + { + "epoch": 14.14, + "learning_rate": 4.359598217955252e-06, + "loss": 2.9372, + "step": 1725500 + }, + { + "epoch": 14.14, + "learning_rate": 4.357954265235332e-06, + "loss": 2.9415, + "step": 1726000 + }, + { + "epoch": 14.14, + "learning_rate": 4.356310312515412e-06, + "loss": 2.9302, + "step": 1726500 + }, + { + "epoch": 14.15, + "learning_rate": 4.354666359795492e-06, + "loss": 2.9365, + "step": 1727000 + }, + { + "epoch": 14.15, + "learning_rate": 4.3530224070755724e-06, + "loss": 2.9399, + "step": 1727500 + }, + { + "epoch": 14.16, + "learning_rate": 4.351378454355653e-06, + "loss": 2.934, + "step": 1728000 + }, + { + "epoch": 14.16, + "learning_rate": 4.3497345016357335e-06, + "loss": 2.9384, + "step": 1728500 + }, + { + "epoch": 14.17, + "learning_rate": 4.348090548915814e-06, + "loss": 2.9384, + "step": 1729000 + }, + { + "epoch": 14.17, + "learning_rate": 4.346446596195894e-06, + "loss": 2.9356, + "step": 1729500 + }, + { + "epoch": 14.17, + "learning_rate": 4.344802643475974e-06, + "loss": 2.9365, + "step": 1730000 + }, + { + "epoch": 14.18, + "learning_rate": 4.343158690756054e-06, + "loss": 2.941, + "step": 1730500 + }, + { + "epoch": 14.18, + "learning_rate": 4.341514738036134e-06, + "loss": 2.9355, + "step": 1731000 + }, + { + "epoch": 14.19, + "learning_rate": 4.339870785316215e-06, + "loss": 2.9348, + "step": 1731500 + }, + { + "epoch": 14.19, + "learning_rate": 4.338226832596295e-06, + "loss": 2.9331, + "step": 1732000 + }, + { + "epoch": 14.19, + "learning_rate": 4.336582879876375e-06, + "loss": 2.9319, + "step": 1732500 + }, + { + "epoch": 14.2, + "learning_rate": 4.3349389271564555e-06, + "loss": 2.9369, + "step": 1733000 + }, + { + "epoch": 14.2, + "learning_rate": 4.333294974436536e-06, + "loss": 2.9361, + "step": 1733500 + }, + { + "epoch": 14.21, + "learning_rate": 4.331651021716616e-06, + "loss": 2.9339, + "step": 1734000 + }, + { + "epoch": 14.21, + "learning_rate": 4.330007068996696e-06, + "loss": 2.9307, + "step": 1734500 + }, + { + "epoch": 14.21, + "learning_rate": 4.328363116276776e-06, + "loss": 2.9329, + "step": 1735000 + }, + { + "epoch": 14.22, + "learning_rate": 4.326719163556857e-06, + "loss": 2.9387, + "step": 1735500 + }, + { + "epoch": 14.22, + "learning_rate": 4.325075210836936e-06, + "loss": 2.9352, + "step": 1736000 + }, + { + "epoch": 14.23, + "learning_rate": 4.323431258117016e-06, + "loss": 2.9308, + "step": 1736500 + }, + { + "epoch": 14.23, + "learning_rate": 4.321787305397097e-06, + "loss": 2.9319, + "step": 1737000 + }, + { + "epoch": 14.24, + "learning_rate": 4.3201433526771775e-06, + "loss": 2.9321, + "step": 1737500 + }, + { + "epoch": 14.24, + "learning_rate": 4.318499399957258e-06, + "loss": 2.9276, + "step": 1738000 + }, + { + "epoch": 14.24, + "learning_rate": 4.316855447237338e-06, + "loss": 2.933, + "step": 1738500 + }, + { + "epoch": 14.25, + "learning_rate": 4.315211494517418e-06, + "loss": 2.936, + "step": 1739000 + }, + { + "epoch": 14.25, + "learning_rate": 4.313567541797498e-06, + "loss": 2.9336, + "step": 1739500 + }, + { + "epoch": 14.26, + "learning_rate": 4.311923589077578e-06, + "loss": 2.9341, + "step": 1740000 + }, + { + "epoch": 14.26, + "eval_accuracy": 0.4978347321882909, + "eval_loss": 2.8000051975250244, + "eval_runtime": 409.1394, + "eval_samples_per_second": 753.67, + "eval_steps_per_second": 15.704, + "step": 1740000 + }, + { + "epoch": 14.26, + "learning_rate": 4.310279636357659e-06, + "loss": 2.9279, + "step": 1740500 + }, + { + "epoch": 14.26, + "learning_rate": 4.308635683637739e-06, + "loss": 2.9396, + "step": 1741000 + }, + { + "epoch": 14.27, + "learning_rate": 4.306991730917819e-06, + "loss": 2.9339, + "step": 1741500 + }, + { + "epoch": 14.27, + "learning_rate": 4.3053477781978995e-06, + "loss": 2.9332, + "step": 1742000 + }, + { + "epoch": 14.28, + "learning_rate": 4.30370382547798e-06, + "loss": 2.9357, + "step": 1742500 + }, + { + "epoch": 14.28, + "learning_rate": 4.30205987275806e-06, + "loss": 2.9298, + "step": 1743000 + }, + { + "epoch": 14.28, + "learning_rate": 4.30041592003814e-06, + "loss": 2.936, + "step": 1743500 + }, + { + "epoch": 14.29, + "learning_rate": 4.298771967318221e-06, + "loss": 2.9274, + "step": 1744000 + }, + { + "epoch": 14.29, + "learning_rate": 4.297128014598301e-06, + "loss": 2.9341, + "step": 1744500 + }, + { + "epoch": 14.3, + "learning_rate": 4.29548406187838e-06, + "loss": 2.9329, + "step": 1745000 + }, + { + "epoch": 14.3, + "learning_rate": 4.293840109158461e-06, + "loss": 2.9282, + "step": 1745500 + }, + { + "epoch": 14.3, + "learning_rate": 4.292196156438541e-06, + "loss": 2.9359, + "step": 1746000 + }, + { + "epoch": 14.31, + "learning_rate": 4.2905522037186215e-06, + "loss": 2.9311, + "step": 1746500 + }, + { + "epoch": 14.31, + "learning_rate": 4.288908250998702e-06, + "loss": 2.9301, + "step": 1747000 + }, + { + "epoch": 14.32, + "learning_rate": 4.287264298278782e-06, + "loss": 2.9277, + "step": 1747500 + }, + { + "epoch": 14.32, + "learning_rate": 4.285620345558862e-06, + "loss": 2.9333, + "step": 1748000 + }, + { + "epoch": 14.33, + "learning_rate": 4.283976392838942e-06, + "loss": 2.9305, + "step": 1748500 + }, + { + "epoch": 14.33, + "learning_rate": 4.282332440119022e-06, + "loss": 2.9342, + "step": 1749000 + }, + { + "epoch": 14.33, + "learning_rate": 4.280688487399103e-06, + "loss": 2.9321, + "step": 1749500 + }, + { + "epoch": 14.34, + "learning_rate": 4.279044534679183e-06, + "loss": 2.9341, + "step": 1750000 + }, + { + "epoch": 14.34, + "learning_rate": 4.277400581959263e-06, + "loss": 2.9312, + "step": 1750500 + }, + { + "epoch": 14.35, + "learning_rate": 4.2757566292393435e-06, + "loss": 2.9252, + "step": 1751000 + }, + { + "epoch": 14.35, + "learning_rate": 4.274112676519424e-06, + "loss": 2.9335, + "step": 1751500 + }, + { + "epoch": 14.35, + "learning_rate": 4.272468723799504e-06, + "loss": 2.9248, + "step": 1752000 + }, + { + "epoch": 14.36, + "learning_rate": 4.270824771079584e-06, + "loss": 2.9322, + "step": 1752500 + }, + { + "epoch": 14.36, + "learning_rate": 4.269180818359665e-06, + "loss": 2.9343, + "step": 1753000 + }, + { + "epoch": 14.37, + "learning_rate": 4.267536865639745e-06, + "loss": 2.9346, + "step": 1753500 + }, + { + "epoch": 14.37, + "learning_rate": 4.265892912919824e-06, + "loss": 2.9312, + "step": 1754000 + }, + { + "epoch": 14.37, + "learning_rate": 4.264248960199905e-06, + "loss": 2.9274, + "step": 1754500 + }, + { + "epoch": 14.38, + "learning_rate": 4.262605007479985e-06, + "loss": 2.9308, + "step": 1755000 + }, + { + "epoch": 14.38, + "learning_rate": 4.2609610547600654e-06, + "loss": 2.9315, + "step": 1755500 + }, + { + "epoch": 14.39, + "learning_rate": 4.2593171020401456e-06, + "loss": 2.929, + "step": 1756000 + }, + { + "epoch": 14.39, + "learning_rate": 4.2576731493202265e-06, + "loss": 2.9362, + "step": 1756500 + }, + { + "epoch": 14.39, + "learning_rate": 4.256029196600306e-06, + "loss": 2.9238, + "step": 1757000 + }, + { + "epoch": 14.4, + "learning_rate": 4.254385243880386e-06, + "loss": 2.9318, + "step": 1757500 + }, + { + "epoch": 14.4, + "learning_rate": 4.252741291160467e-06, + "loss": 2.935, + "step": 1758000 + }, + { + "epoch": 14.41, + "learning_rate": 4.251097338440547e-06, + "loss": 2.9325, + "step": 1758500 + }, + { + "epoch": 14.41, + "learning_rate": 4.249453385720627e-06, + "loss": 2.9388, + "step": 1759000 + }, + { + "epoch": 14.42, + "learning_rate": 4.247809433000707e-06, + "loss": 2.9329, + "step": 1759500 + }, + { + "epoch": 14.42, + "learning_rate": 4.2461654802807874e-06, + "loss": 2.9317, + "step": 1760000 + }, + { + "epoch": 14.42, + "learning_rate": 4.2445215275608676e-06, + "loss": 2.9316, + "step": 1760500 + }, + { + "epoch": 14.43, + "learning_rate": 4.242877574840948e-06, + "loss": 2.9311, + "step": 1761000 + }, + { + "epoch": 14.43, + "learning_rate": 4.241233622121028e-06, + "loss": 2.932, + "step": 1761500 + }, + { + "epoch": 14.44, + "learning_rate": 4.239589669401109e-06, + "loss": 2.9319, + "step": 1762000 + }, + { + "epoch": 14.44, + "learning_rate": 4.237945716681189e-06, + "loss": 2.9314, + "step": 1762500 + }, + { + "epoch": 14.44, + "learning_rate": 4.236301763961268e-06, + "loss": 2.934, + "step": 1763000 + }, + { + "epoch": 14.45, + "learning_rate": 4.234657811241349e-06, + "loss": 2.9321, + "step": 1763500 + }, + { + "epoch": 14.45, + "learning_rate": 4.233013858521429e-06, + "loss": 2.936, + "step": 1764000 + }, + { + "epoch": 14.46, + "learning_rate": 4.231369905801509e-06, + "loss": 2.9388, + "step": 1764500 + }, + { + "epoch": 14.46, + "learning_rate": 4.2297259530815895e-06, + "loss": 2.925, + "step": 1765000 + }, + { + "epoch": 14.46, + "learning_rate": 4.2280820003616705e-06, + "loss": 2.9331, + "step": 1765500 + }, + { + "epoch": 14.47, + "learning_rate": 4.22643804764175e-06, + "loss": 2.9265, + "step": 1766000 + }, + { + "epoch": 14.47, + "learning_rate": 4.22479409492183e-06, + "loss": 2.9322, + "step": 1766500 + }, + { + "epoch": 14.48, + "learning_rate": 4.223150142201911e-06, + "loss": 2.9266, + "step": 1767000 + }, + { + "epoch": 14.48, + "learning_rate": 4.221506189481991e-06, + "loss": 2.9338, + "step": 1767500 + }, + { + "epoch": 14.48, + "learning_rate": 4.219862236762071e-06, + "loss": 2.9362, + "step": 1768000 + }, + { + "epoch": 14.49, + "learning_rate": 4.218218284042151e-06, + "loss": 2.9338, + "step": 1768500 + }, + { + "epoch": 14.49, + "learning_rate": 4.216574331322231e-06, + "loss": 2.9352, + "step": 1769000 + }, + { + "epoch": 14.5, + "learning_rate": 4.2149303786023115e-06, + "loss": 2.9298, + "step": 1769500 + }, + { + "epoch": 14.5, + "learning_rate": 4.213286425882392e-06, + "loss": 2.9275, + "step": 1770000 + }, + { + "epoch": 14.5, + "eval_accuracy": 0.49835039139690634, + "eval_loss": 2.7977969646453857, + "eval_runtime": 403.9508, + "eval_samples_per_second": 763.35, + "eval_steps_per_second": 15.905, + "step": 1770000 + }, + { + "epoch": 14.51, + "learning_rate": 4.211642473162473e-06, + "loss": 2.9349, + "step": 1770500 + }, + { + "epoch": 14.51, + "learning_rate": 4.209998520442553e-06, + "loss": 2.9281, + "step": 1771000 + }, + { + "epoch": 14.51, + "learning_rate": 4.208354567722633e-06, + "loss": 2.9367, + "step": 1771500 + }, + { + "epoch": 14.52, + "learning_rate": 4.206710615002713e-06, + "loss": 2.9272, + "step": 1772000 + }, + { + "epoch": 14.52, + "learning_rate": 4.205066662282793e-06, + "loss": 2.9336, + "step": 1772500 + }, + { + "epoch": 14.53, + "learning_rate": 4.203422709562873e-06, + "loss": 2.9265, + "step": 1773000 + }, + { + "epoch": 14.53, + "learning_rate": 4.201778756842953e-06, + "loss": 2.9294, + "step": 1773500 + }, + { + "epoch": 14.53, + "learning_rate": 4.2001348041230335e-06, + "loss": 2.9236, + "step": 1774000 + }, + { + "epoch": 14.54, + "learning_rate": 4.1984908514031145e-06, + "loss": 2.9314, + "step": 1774500 + }, + { + "epoch": 14.54, + "learning_rate": 4.196846898683194e-06, + "loss": 2.9274, + "step": 1775000 + }, + { + "epoch": 14.55, + "learning_rate": 4.195202945963274e-06, + "loss": 2.9199, + "step": 1775500 + }, + { + "epoch": 14.55, + "learning_rate": 4.193558993243355e-06, + "loss": 2.9313, + "step": 1776000 + }, + { + "epoch": 14.55, + "learning_rate": 4.191915040523435e-06, + "loss": 2.9253, + "step": 1776500 + }, + { + "epoch": 14.56, + "learning_rate": 4.190271087803515e-06, + "loss": 2.9304, + "step": 1777000 + }, + { + "epoch": 14.56, + "learning_rate": 4.188627135083595e-06, + "loss": 2.9333, + "step": 1777500 + }, + { + "epoch": 14.57, + "learning_rate": 4.186983182363675e-06, + "loss": 2.9317, + "step": 1778000 + }, + { + "epoch": 14.57, + "learning_rate": 4.1853392296437555e-06, + "loss": 2.9276, + "step": 1778500 + }, + { + "epoch": 14.58, + "learning_rate": 4.183695276923836e-06, + "loss": 2.9319, + "step": 1779000 + }, + { + "epoch": 14.58, + "learning_rate": 4.182051324203917e-06, + "loss": 2.933, + "step": 1779500 + }, + { + "epoch": 14.58, + "learning_rate": 4.180407371483997e-06, + "loss": 2.9379, + "step": 1780000 + }, + { + "epoch": 14.59, + "learning_rate": 4.178763418764077e-06, + "loss": 2.9306, + "step": 1780500 + }, + { + "epoch": 14.59, + "learning_rate": 4.177119466044157e-06, + "loss": 2.9328, + "step": 1781000 + }, + { + "epoch": 14.6, + "learning_rate": 4.175475513324237e-06, + "loss": 2.9255, + "step": 1781500 + }, + { + "epoch": 14.6, + "learning_rate": 4.173831560604317e-06, + "loss": 2.9313, + "step": 1782000 + }, + { + "epoch": 14.6, + "learning_rate": 4.172187607884397e-06, + "loss": 2.9252, + "step": 1782500 + }, + { + "epoch": 14.61, + "learning_rate": 4.1705436551644775e-06, + "loss": 2.9281, + "step": 1783000 + }, + { + "epoch": 14.61, + "learning_rate": 4.1688997024445585e-06, + "loss": 2.9268, + "step": 1783500 + }, + { + "epoch": 14.62, + "learning_rate": 4.167255749724639e-06, + "loss": 2.9269, + "step": 1784000 + }, + { + "epoch": 14.62, + "learning_rate": 4.165611797004719e-06, + "loss": 2.9347, + "step": 1784500 + }, + { + "epoch": 14.62, + "learning_rate": 4.163967844284799e-06, + "loss": 2.9234, + "step": 1785000 + }, + { + "epoch": 14.63, + "learning_rate": 4.162323891564879e-06, + "loss": 2.9271, + "step": 1785500 + }, + { + "epoch": 14.63, + "learning_rate": 4.160679938844959e-06, + "loss": 2.9335, + "step": 1786000 + }, + { + "epoch": 14.64, + "learning_rate": 4.159035986125039e-06, + "loss": 2.9314, + "step": 1786500 + }, + { + "epoch": 14.64, + "learning_rate": 4.15739203340512e-06, + "loss": 2.9327, + "step": 1787000 + }, + { + "epoch": 14.64, + "learning_rate": 4.1557480806851995e-06, + "loss": 2.9259, + "step": 1787500 + }, + { + "epoch": 14.65, + "learning_rate": 4.15410412796528e-06, + "loss": 2.9288, + "step": 1788000 + }, + { + "epoch": 14.65, + "learning_rate": 4.1524601752453606e-06, + "loss": 2.9282, + "step": 1788500 + }, + { + "epoch": 14.66, + "learning_rate": 4.150816222525441e-06, + "loss": 2.9276, + "step": 1789000 + }, + { + "epoch": 14.66, + "learning_rate": 4.149172269805521e-06, + "loss": 2.9337, + "step": 1789500 + }, + { + "epoch": 14.67, + "learning_rate": 4.147528317085601e-06, + "loss": 2.9284, + "step": 1790000 + }, + { + "epoch": 14.67, + "learning_rate": 4.145884364365681e-06, + "loss": 2.9208, + "step": 1790500 + }, + { + "epoch": 14.67, + "learning_rate": 4.144240411645761e-06, + "loss": 2.9408, + "step": 1791000 + }, + { + "epoch": 14.68, + "learning_rate": 4.142596458925841e-06, + "loss": 2.9293, + "step": 1791500 + }, + { + "epoch": 14.68, + "learning_rate": 4.140952506205922e-06, + "loss": 2.93, + "step": 1792000 + }, + { + "epoch": 14.69, + "learning_rate": 4.1393085534860024e-06, + "loss": 2.9313, + "step": 1792500 + }, + { + "epoch": 14.69, + "learning_rate": 4.1376646007660826e-06, + "loss": 2.9321, + "step": 1793000 + }, + { + "epoch": 14.69, + "learning_rate": 4.136020648046163e-06, + "loss": 2.9249, + "step": 1793500 + }, + { + "epoch": 14.7, + "learning_rate": 4.134376695326243e-06, + "loss": 2.9286, + "step": 1794000 + }, + { + "epoch": 14.7, + "learning_rate": 4.132732742606323e-06, + "loss": 2.9354, + "step": 1794500 + }, + { + "epoch": 14.71, + "learning_rate": 4.131088789886403e-06, + "loss": 2.9287, + "step": 1795000 + }, + { + "epoch": 14.71, + "learning_rate": 4.129444837166483e-06, + "loss": 2.9271, + "step": 1795500 + }, + { + "epoch": 14.71, + "learning_rate": 4.127800884446564e-06, + "loss": 2.9335, + "step": 1796000 + }, + { + "epoch": 14.72, + "learning_rate": 4.1261569317266434e-06, + "loss": 2.9235, + "step": 1796500 + }, + { + "epoch": 14.72, + "learning_rate": 4.1245129790067236e-06, + "loss": 2.9245, + "step": 1797000 + }, + { + "epoch": 14.73, + "learning_rate": 4.1228690262868045e-06, + "loss": 2.925, + "step": 1797500 + }, + { + "epoch": 14.73, + "learning_rate": 4.121225073566885e-06, + "loss": 2.9244, + "step": 1798000 + }, + { + "epoch": 14.73, + "learning_rate": 4.119581120846965e-06, + "loss": 2.9306, + "step": 1798500 + }, + { + "epoch": 14.74, + "learning_rate": 4.117937168127045e-06, + "loss": 2.9243, + "step": 1799000 + }, + { + "epoch": 14.74, + "learning_rate": 4.116293215407125e-06, + "loss": 2.9272, + "step": 1799500 + }, + { + "epoch": 14.75, + "learning_rate": 4.114649262687205e-06, + "loss": 2.9319, + "step": 1800000 + }, + { + "epoch": 14.75, + "eval_accuracy": 0.4989067207393835, + "eval_loss": 2.794658899307251, + "eval_runtime": 404.1884, + "eval_samples_per_second": 762.902, + "eval_steps_per_second": 15.896, + "step": 1800000 + }, + { + "epoch": 14.75, + "learning_rate": 4.113005309967285e-06, + "loss": 2.9225, + "step": 1800500 + }, + { + "epoch": 14.76, + "learning_rate": 4.111361357247366e-06, + "loss": 2.9304, + "step": 1801000 + }, + { + "epoch": 14.76, + "learning_rate": 4.109717404527446e-06, + "loss": 2.9258, + "step": 1801500 + }, + { + "epoch": 14.76, + "learning_rate": 4.1080734518075265e-06, + "loss": 2.9214, + "step": 1802000 + }, + { + "epoch": 14.77, + "learning_rate": 4.106429499087607e-06, + "loss": 2.9272, + "step": 1802500 + }, + { + "epoch": 14.77, + "learning_rate": 4.104785546367687e-06, + "loss": 2.9293, + "step": 1803000 + }, + { + "epoch": 14.78, + "learning_rate": 4.103141593647767e-06, + "loss": 2.9307, + "step": 1803500 + }, + { + "epoch": 14.78, + "learning_rate": 4.101497640927847e-06, + "loss": 2.9316, + "step": 1804000 + }, + { + "epoch": 14.78, + "learning_rate": 4.099853688207928e-06, + "loss": 2.9265, + "step": 1804500 + }, + { + "epoch": 14.79, + "learning_rate": 4.098209735488008e-06, + "loss": 2.9225, + "step": 1805000 + }, + { + "epoch": 14.79, + "learning_rate": 4.096565782768087e-06, + "loss": 2.9219, + "step": 1805500 + }, + { + "epoch": 14.8, + "learning_rate": 4.094921830048168e-06, + "loss": 2.9265, + "step": 1806000 + }, + { + "epoch": 14.8, + "learning_rate": 4.0932778773282485e-06, + "loss": 2.9258, + "step": 1806500 + }, + { + "epoch": 14.8, + "learning_rate": 4.091633924608329e-06, + "loss": 2.9324, + "step": 1807000 + }, + { + "epoch": 14.81, + "learning_rate": 4.089989971888409e-06, + "loss": 2.9335, + "step": 1807500 + }, + { + "epoch": 14.81, + "learning_rate": 4.088346019168489e-06, + "loss": 2.9219, + "step": 1808000 + }, + { + "epoch": 14.82, + "learning_rate": 4.086702066448569e-06, + "loss": 2.9355, + "step": 1808500 + }, + { + "epoch": 14.82, + "learning_rate": 4.085058113728649e-06, + "loss": 2.9275, + "step": 1809000 + }, + { + "epoch": 14.82, + "learning_rate": 4.083414161008729e-06, + "loss": 2.9191, + "step": 1809500 + }, + { + "epoch": 14.83, + "learning_rate": 4.08177020828881e-06, + "loss": 2.9248, + "step": 1810000 + }, + { + "epoch": 14.83, + "learning_rate": 4.08012625556889e-06, + "loss": 2.9263, + "step": 1810500 + }, + { + "epoch": 14.84, + "learning_rate": 4.0784823028489705e-06, + "loss": 2.9324, + "step": 1811000 + }, + { + "epoch": 14.84, + "learning_rate": 4.076838350129051e-06, + "loss": 2.9256, + "step": 1811500 + }, + { + "epoch": 14.85, + "learning_rate": 4.075194397409131e-06, + "loss": 2.9276, + "step": 1812000 + }, + { + "epoch": 14.85, + "learning_rate": 4.073550444689211e-06, + "loss": 2.9319, + "step": 1812500 + }, + { + "epoch": 14.85, + "learning_rate": 4.071906491969291e-06, + "loss": 2.9265, + "step": 1813000 + }, + { + "epoch": 14.86, + "learning_rate": 4.070262539249372e-06, + "loss": 2.9284, + "step": 1813500 + }, + { + "epoch": 14.86, + "learning_rate": 4.068618586529452e-06, + "loss": 2.9313, + "step": 1814000 + }, + { + "epoch": 14.87, + "learning_rate": 4.066974633809531e-06, + "loss": 2.9201, + "step": 1814500 + }, + { + "epoch": 14.87, + "learning_rate": 4.065330681089612e-06, + "loss": 2.9272, + "step": 1815000 + }, + { + "epoch": 14.87, + "learning_rate": 4.0636867283696925e-06, + "loss": 2.929, + "step": 1815500 + }, + { + "epoch": 14.88, + "learning_rate": 4.062042775649773e-06, + "loss": 2.9221, + "step": 1816000 + }, + { + "epoch": 14.88, + "learning_rate": 4.060398822929853e-06, + "loss": 2.9246, + "step": 1816500 + }, + { + "epoch": 14.89, + "learning_rate": 4.058754870209934e-06, + "loss": 2.9316, + "step": 1817000 + }, + { + "epoch": 14.89, + "learning_rate": 4.057110917490013e-06, + "loss": 2.9285, + "step": 1817500 + }, + { + "epoch": 14.89, + "learning_rate": 4.055466964770093e-06, + "loss": 2.9272, + "step": 1818000 + }, + { + "epoch": 14.9, + "learning_rate": 4.053823012050174e-06, + "loss": 2.928, + "step": 1818500 + }, + { + "epoch": 14.9, + "learning_rate": 4.052179059330254e-06, + "loss": 2.9296, + "step": 1819000 + }, + { + "epoch": 14.91, + "learning_rate": 4.050535106610334e-06, + "loss": 2.9252, + "step": 1819500 + }, + { + "epoch": 14.91, + "learning_rate": 4.0488911538904145e-06, + "loss": 2.9287, + "step": 1820000 + }, + { + "epoch": 14.92, + "learning_rate": 4.047247201170495e-06, + "loss": 2.9318, + "step": 1820500 + }, + { + "epoch": 14.92, + "learning_rate": 4.045603248450575e-06, + "loss": 2.927, + "step": 1821000 + }, + { + "epoch": 14.92, + "learning_rate": 4.043959295730655e-06, + "loss": 2.9241, + "step": 1821500 + }, + { + "epoch": 14.93, + "learning_rate": 4.042315343010735e-06, + "loss": 2.9234, + "step": 1822000 + }, + { + "epoch": 14.93, + "learning_rate": 4.040671390290816e-06, + "loss": 2.9218, + "step": 1822500 + }, + { + "epoch": 14.94, + "learning_rate": 4.039027437570896e-06, + "loss": 2.9306, + "step": 1823000 + }, + { + "epoch": 14.94, + "learning_rate": 4.037383484850975e-06, + "loss": 2.9271, + "step": 1823500 + }, + { + "epoch": 14.94, + "learning_rate": 4.035739532131056e-06, + "loss": 2.9231, + "step": 1824000 + }, + { + "epoch": 14.95, + "learning_rate": 4.0340955794111365e-06, + "loss": 2.9276, + "step": 1824500 + }, + { + "epoch": 14.95, + "learning_rate": 4.032451626691217e-06, + "loss": 2.9269, + "step": 1825000 + }, + { + "epoch": 14.96, + "learning_rate": 4.030807673971297e-06, + "loss": 2.9248, + "step": 1825500 + }, + { + "epoch": 14.96, + "learning_rate": 4.029163721251378e-06, + "loss": 2.927, + "step": 1826000 + }, + { + "epoch": 14.96, + "learning_rate": 4.027519768531457e-06, + "loss": 2.932, + "step": 1826500 + }, + { + "epoch": 14.97, + "learning_rate": 4.025875815811537e-06, + "loss": 2.9253, + "step": 1827000 + }, + { + "epoch": 14.97, + "learning_rate": 4.024231863091618e-06, + "loss": 2.9283, + "step": 1827500 + }, + { + "epoch": 14.98, + "learning_rate": 4.022587910371698e-06, + "loss": 2.9251, + "step": 1828000 + }, + { + "epoch": 14.98, + "learning_rate": 4.020943957651778e-06, + "loss": 2.9207, + "step": 1828500 + }, + { + "epoch": 14.98, + "learning_rate": 4.0193000049318584e-06, + "loss": 2.931, + "step": 1829000 + }, + { + "epoch": 14.99, + "learning_rate": 4.0176560522119386e-06, + "loss": 2.922, + "step": 1829500 + }, + { + "epoch": 14.99, + "learning_rate": 4.016012099492019e-06, + "loss": 2.9304, + "step": 1830000 + }, + { + "epoch": 14.99, + "eval_accuracy": 0.49915539316259305, + "eval_loss": 2.7920210361480713, + "eval_runtime": 408.9888, + "eval_samples_per_second": 753.947, + "eval_steps_per_second": 15.709, + "step": 1830000 + }, + { + "epoch": 15.0, + "learning_rate": 4.014368146772099e-06, + "loss": 2.924, + "step": 1830500 + }, + { + "epoch": 15.0, + "learning_rate": 4.01272419405218e-06, + "loss": 2.9216, + "step": 1831000 + }, + { + "epoch": 15.01, + "learning_rate": 4.01108024133226e-06, + "loss": 2.9301, + "step": 1831500 + }, + { + "epoch": 15.01, + "learning_rate": 4.00943628861234e-06, + "loss": 2.9275, + "step": 1832000 + }, + { + "epoch": 15.01, + "learning_rate": 4.00779233589242e-06, + "loss": 2.9231, + "step": 1832500 + }, + { + "epoch": 15.02, + "learning_rate": 4.0061483831725e-06, + "loss": 2.9206, + "step": 1833000 + }, + { + "epoch": 15.02, + "learning_rate": 4.0045044304525804e-06, + "loss": 2.9231, + "step": 1833500 + }, + { + "epoch": 15.03, + "learning_rate": 4.0028604777326606e-06, + "loss": 2.9246, + "step": 1834000 + }, + { + "epoch": 15.03, + "learning_rate": 4.001216525012741e-06, + "loss": 2.9253, + "step": 1834500 + }, + { + "epoch": 15.03, + "learning_rate": 3.999572572292822e-06, + "loss": 2.9233, + "step": 1835000 + }, + { + "epoch": 15.04, + "learning_rate": 3.997928619572902e-06, + "loss": 2.9254, + "step": 1835500 + }, + { + "epoch": 15.04, + "learning_rate": 3.996284666852981e-06, + "loss": 2.9231, + "step": 1836000 + }, + { + "epoch": 15.05, + "learning_rate": 3.994640714133062e-06, + "loss": 2.9289, + "step": 1836500 + }, + { + "epoch": 15.05, + "learning_rate": 3.992996761413142e-06, + "loss": 2.9245, + "step": 1837000 + }, + { + "epoch": 15.05, + "learning_rate": 3.991352808693222e-06, + "loss": 2.9221, + "step": 1837500 + }, + { + "epoch": 15.06, + "learning_rate": 3.989708855973302e-06, + "loss": 2.9205, + "step": 1838000 + }, + { + "epoch": 15.06, + "learning_rate": 3.988064903253383e-06, + "loss": 2.9306, + "step": 1838500 + }, + { + "epoch": 15.07, + "learning_rate": 3.986420950533463e-06, + "loss": 2.9248, + "step": 1839000 + }, + { + "epoch": 15.07, + "learning_rate": 3.984776997813543e-06, + "loss": 2.9229, + "step": 1839500 + }, + { + "epoch": 15.07, + "learning_rate": 3.983133045093624e-06, + "loss": 2.9208, + "step": 1840000 + }, + { + "epoch": 15.08, + "learning_rate": 3.981489092373704e-06, + "loss": 2.9246, + "step": 1840500 + }, + { + "epoch": 15.08, + "learning_rate": 3.979845139653784e-06, + "loss": 2.9267, + "step": 1841000 + }, + { + "epoch": 15.09, + "learning_rate": 3.978201186933864e-06, + "loss": 2.9254, + "step": 1841500 + }, + { + "epoch": 15.09, + "step": 1841687, + "total_flos": 9.934758834172068e+17, + "train_loss": 3.300056499910879, + "train_runtime": 215976.5267, + "train_samples_per_second": 678.171, + "train_steps_per_second": 14.129 + } + ], + "max_steps": 3051450, + "num_train_epochs": 25, + "total_flos": 9.934758834172068e+17, + "trial_name": null, + "trial_params": null +}