{ "best_metric": 2.7920210361480713, "best_model_checkpoint": "model_v1_complete_training_wt_init_48_mini/checkpoint-1830000", "epoch": 15.088621802749513, "global_step": 1841687, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 5.000000000000001e-07, "loss": 10.2892, "step": 500 }, { "epoch": 0.01, "learning_rate": 1.0000000000000002e-06, "loss": 8.8433, "step": 1000 }, { "epoch": 0.01, "learning_rate": 1.5e-06, "loss": 8.1625, "step": 1500 }, { "epoch": 0.02, "learning_rate": 2.0000000000000003e-06, "loss": 7.85, "step": 2000 }, { "epoch": 0.02, "learning_rate": 2.5e-06, "loss": 7.6776, "step": 2500 }, { "epoch": 0.02, "learning_rate": 3e-06, "loss": 7.5065, "step": 3000 }, { "epoch": 0.03, "learning_rate": 3.5e-06, "loss": 7.3363, "step": 3500 }, { "epoch": 0.03, "learning_rate": 4.000000000000001e-06, "loss": 7.2137, "step": 4000 }, { "epoch": 0.04, "learning_rate": 4.5e-06, "loss": 7.1039, "step": 4500 }, { "epoch": 0.04, "learning_rate": 5e-06, "loss": 7.0197, "step": 5000 }, { "epoch": 0.05, "learning_rate": 5.500000000000001e-06, "loss": 6.9294, "step": 5500 }, { "epoch": 0.05, "learning_rate": 6e-06, "loss": 6.854, "step": 6000 }, { "epoch": 0.05, "learning_rate": 6.5000000000000004e-06, "loss": 6.7942, "step": 6500 }, { "epoch": 0.06, "learning_rate": 7e-06, "loss": 6.7328, "step": 7000 }, { "epoch": 0.06, "learning_rate": 7.500000000000001e-06, "loss": 6.685, "step": 7500 }, { "epoch": 0.07, "learning_rate": 8.000000000000001e-06, "loss": 6.6331, "step": 8000 }, { "epoch": 0.07, "learning_rate": 8.5e-06, "loss": 6.5921, "step": 8500 }, { "epoch": 0.07, "learning_rate": 9e-06, "loss": 6.5616, "step": 9000 }, { "epoch": 0.08, "learning_rate": 9.5e-06, "loss": 6.5268, "step": 9500 }, { "epoch": 0.08, "learning_rate": 1e-05, "loss": 6.5012, "step": 10000 }, { "epoch": 0.09, "learning_rate": 9.998356047280082e-06, "loss": 6.4694, "step": 10500 }, { "epoch": 0.09, "learning_rate": 9.996712094560161e-06, "loss": 6.4482, "step": 11000 }, { "epoch": 0.09, "learning_rate": 9.995068141840242e-06, "loss": 6.4289, "step": 11500 }, { "epoch": 0.1, "learning_rate": 9.993424189120321e-06, "loss": 6.4084, "step": 12000 }, { "epoch": 0.1, "learning_rate": 9.991780236400402e-06, "loss": 6.3823, "step": 12500 }, { "epoch": 0.11, "learning_rate": 9.990136283680483e-06, "loss": 6.3696, "step": 13000 }, { "epoch": 0.11, "learning_rate": 9.988492330960563e-06, "loss": 6.359, "step": 13500 }, { "epoch": 0.11, "learning_rate": 9.986848378240642e-06, "loss": 6.3447, "step": 14000 }, { "epoch": 0.12, "learning_rate": 9.985204425520723e-06, "loss": 6.327, "step": 14500 }, { "epoch": 0.12, "learning_rate": 9.983560472800802e-06, "loss": 6.3197, "step": 15000 }, { "epoch": 0.13, "learning_rate": 9.981916520080883e-06, "loss": 6.3012, "step": 15500 }, { "epoch": 0.13, "learning_rate": 9.980272567360964e-06, "loss": 6.2978, "step": 16000 }, { "epoch": 0.14, "learning_rate": 9.978628614641043e-06, "loss": 6.2945, "step": 16500 }, { "epoch": 0.14, "learning_rate": 9.976984661921124e-06, "loss": 6.2789, "step": 17000 }, { "epoch": 0.14, "learning_rate": 9.975340709201205e-06, "loss": 6.2788, "step": 17500 }, { "epoch": 0.15, "learning_rate": 9.973696756481285e-06, "loss": 6.2633, "step": 18000 }, { "epoch": 0.15, "learning_rate": 9.972052803761366e-06, "loss": 6.2471, "step": 18500 }, { "epoch": 0.16, "learning_rate": 9.970408851041445e-06, "loss": 6.1923, "step": 19000 }, { "epoch": 0.16, "learning_rate": 9.968764898321524e-06, "loss": 6.1695, "step": 19500 }, { "epoch": 0.16, "learning_rate": 9.967120945601605e-06, "loss": 6.1645, "step": 20000 }, { "epoch": 0.17, "learning_rate": 9.965476992881686e-06, "loss": 6.1372, "step": 20500 }, { "epoch": 0.17, "learning_rate": 9.963833040161765e-06, "loss": 6.1184, "step": 21000 }, { "epoch": 0.18, "learning_rate": 9.962189087441846e-06, "loss": 6.1077, "step": 21500 }, { "epoch": 0.18, "learning_rate": 9.960545134721926e-06, "loss": 6.0858, "step": 22000 }, { "epoch": 0.18, "learning_rate": 9.958901182002007e-06, "loss": 6.0807, "step": 22500 }, { "epoch": 0.19, "learning_rate": 9.957257229282087e-06, "loss": 6.0578, "step": 23000 }, { "epoch": 0.19, "learning_rate": 9.955613276562167e-06, "loss": 6.0571, "step": 23500 }, { "epoch": 0.2, "learning_rate": 9.953969323842248e-06, "loss": 6.0397, "step": 24000 }, { "epoch": 0.2, "learning_rate": 9.952325371122327e-06, "loss": 6.0305, "step": 24500 }, { "epoch": 0.2, "learning_rate": 9.950681418402408e-06, "loss": 6.0192, "step": 25000 }, { "epoch": 0.21, "learning_rate": 9.949037465682487e-06, "loss": 6.003, "step": 25500 }, { "epoch": 0.21, "learning_rate": 9.947393512962568e-06, "loss": 5.9987, "step": 26000 }, { "epoch": 0.22, "learning_rate": 9.945749560242648e-06, "loss": 5.9946, "step": 26500 }, { "epoch": 0.22, "learning_rate": 9.944105607522729e-06, "loss": 5.9855, "step": 27000 }, { "epoch": 0.23, "learning_rate": 9.942461654802808e-06, "loss": 5.9723, "step": 27500 }, { "epoch": 0.23, "learning_rate": 9.940817702082889e-06, "loss": 5.9701, "step": 28000 }, { "epoch": 0.23, "learning_rate": 9.93917374936297e-06, "loss": 5.9636, "step": 28500 }, { "epoch": 0.24, "learning_rate": 9.937529796643049e-06, "loss": 5.9503, "step": 29000 }, { "epoch": 0.24, "learning_rate": 9.93588584392313e-06, "loss": 5.9404, "step": 29500 }, { "epoch": 0.25, "learning_rate": 9.934241891203211e-06, "loss": 5.9411, "step": 30000 }, { "epoch": 0.25, "eval_accuracy": 0.1518340698223809, "eval_loss": 5.883250713348389, "eval_runtime": 404.4584, "eval_samples_per_second": 762.392, "eval_steps_per_second": 15.885, "step": 30000 }, { "epoch": 0.25, "learning_rate": 9.93259793848329e-06, "loss": 5.9333, "step": 30500 }, { "epoch": 0.25, "learning_rate": 9.930953985763371e-06, "loss": 5.9235, "step": 31000 }, { "epoch": 0.26, "learning_rate": 9.92931003304345e-06, "loss": 5.9169, "step": 31500 }, { "epoch": 0.26, "learning_rate": 9.92766608032353e-06, "loss": 5.9136, "step": 32000 }, { "epoch": 0.27, "learning_rate": 9.92602212760361e-06, "loss": 5.9025, "step": 32500 }, { "epoch": 0.27, "learning_rate": 9.924378174883692e-06, "loss": 5.9008, "step": 33000 }, { "epoch": 0.27, "learning_rate": 9.922734222163771e-06, "loss": 5.8908, "step": 33500 }, { "epoch": 0.28, "learning_rate": 9.921090269443852e-06, "loss": 5.8871, "step": 34000 }, { "epoch": 0.28, "learning_rate": 9.919446316723931e-06, "loss": 5.8785, "step": 34500 }, { "epoch": 0.29, "learning_rate": 9.917802364004012e-06, "loss": 5.877, "step": 35000 }, { "epoch": 0.29, "learning_rate": 9.916158411284093e-06, "loss": 5.8796, "step": 35500 }, { "epoch": 0.29, "learning_rate": 9.914514458564172e-06, "loss": 5.866, "step": 36000 }, { "epoch": 0.3, "learning_rate": 9.912870505844253e-06, "loss": 5.8613, "step": 36500 }, { "epoch": 0.3, "learning_rate": 9.911226553124333e-06, "loss": 5.8509, "step": 37000 }, { "epoch": 0.31, "learning_rate": 9.909582600404412e-06, "loss": 5.8453, "step": 37500 }, { "epoch": 0.31, "learning_rate": 9.907938647684493e-06, "loss": 5.843, "step": 38000 }, { "epoch": 0.32, "learning_rate": 9.906294694964574e-06, "loss": 5.8326, "step": 38500 }, { "epoch": 0.32, "learning_rate": 9.904650742244653e-06, "loss": 5.8313, "step": 39000 }, { "epoch": 0.32, "learning_rate": 9.903006789524734e-06, "loss": 5.8303, "step": 39500 }, { "epoch": 0.33, "learning_rate": 9.901362836804813e-06, "loss": 5.825, "step": 40000 }, { "epoch": 0.33, "learning_rate": 9.899718884084894e-06, "loss": 5.809, "step": 40500 }, { "epoch": 0.34, "learning_rate": 9.898074931364975e-06, "loss": 5.8117, "step": 41000 }, { "epoch": 0.34, "learning_rate": 9.896430978645055e-06, "loss": 5.8035, "step": 41500 }, { "epoch": 0.34, "learning_rate": 9.894787025925136e-06, "loss": 5.7973, "step": 42000 }, { "epoch": 0.35, "learning_rate": 9.893143073205217e-06, "loss": 5.7966, "step": 42500 }, { "epoch": 0.35, "learning_rate": 9.891499120485296e-06, "loss": 5.789, "step": 43000 }, { "epoch": 0.36, "learning_rate": 9.889855167765375e-06, "loss": 5.7827, "step": 43500 }, { "epoch": 0.36, "learning_rate": 9.888211215045456e-06, "loss": 5.7801, "step": 44000 }, { "epoch": 0.36, "learning_rate": 9.886567262325535e-06, "loss": 5.7761, "step": 44500 }, { "epoch": 0.37, "learning_rate": 9.884923309605616e-06, "loss": 5.7702, "step": 45000 }, { "epoch": 0.37, "learning_rate": 9.883279356885697e-06, "loss": 5.7638, "step": 45500 }, { "epoch": 0.38, "learning_rate": 9.881635404165777e-06, "loss": 5.7549, "step": 46000 }, { "epoch": 0.38, "learning_rate": 9.879991451445858e-06, "loss": 5.7496, "step": 46500 }, { "epoch": 0.39, "learning_rate": 9.878347498725937e-06, "loss": 5.7451, "step": 47000 }, { "epoch": 0.39, "learning_rate": 9.876703546006018e-06, "loss": 5.7447, "step": 47500 }, { "epoch": 0.39, "learning_rate": 9.875059593286099e-06, "loss": 5.7451, "step": 48000 }, { "epoch": 0.4, "learning_rate": 9.873415640566178e-06, "loss": 5.7393, "step": 48500 }, { "epoch": 0.4, "learning_rate": 9.87177168784626e-06, "loss": 5.7319, "step": 49000 }, { "epoch": 0.41, "learning_rate": 9.870127735126338e-06, "loss": 5.73, "step": 49500 }, { "epoch": 0.41, "learning_rate": 9.868483782406418e-06, "loss": 5.7289, "step": 50000 }, { "epoch": 0.41, "learning_rate": 9.866839829686499e-06, "loss": 5.7158, "step": 50500 }, { "epoch": 0.42, "learning_rate": 9.86519587696658e-06, "loss": 5.713, "step": 51000 }, { "epoch": 0.42, "learning_rate": 9.863551924246659e-06, "loss": 5.7082, "step": 51500 }, { "epoch": 0.43, "learning_rate": 9.86190797152674e-06, "loss": 5.704, "step": 52000 }, { "epoch": 0.43, "learning_rate": 9.86026401880682e-06, "loss": 5.7021, "step": 52500 }, { "epoch": 0.43, "learning_rate": 9.8586200660869e-06, "loss": 5.7041, "step": 53000 }, { "epoch": 0.44, "learning_rate": 9.856976113366981e-06, "loss": 5.6927, "step": 53500 }, { "epoch": 0.44, "learning_rate": 9.85533216064706e-06, "loss": 5.6896, "step": 54000 }, { "epoch": 0.45, "learning_rate": 9.853688207927141e-06, "loss": 5.6803, "step": 54500 }, { "epoch": 0.45, "learning_rate": 9.85204425520722e-06, "loss": 5.6817, "step": 55000 }, { "epoch": 0.45, "learning_rate": 9.8504003024873e-06, "loss": 5.6833, "step": 55500 }, { "epoch": 0.46, "learning_rate": 9.848756349767381e-06, "loss": 5.6681, "step": 56000 }, { "epoch": 0.46, "learning_rate": 9.847112397047462e-06, "loss": 5.6702, "step": 56500 }, { "epoch": 0.47, "learning_rate": 9.845468444327541e-06, "loss": 5.6569, "step": 57000 }, { "epoch": 0.47, "learning_rate": 9.843824491607622e-06, "loss": 5.6653, "step": 57500 }, { "epoch": 0.48, "learning_rate": 9.842180538887703e-06, "loss": 5.6525, "step": 58000 }, { "epoch": 0.48, "learning_rate": 9.840536586167782e-06, "loss": 5.6518, "step": 58500 }, { "epoch": 0.48, "learning_rate": 9.838892633447863e-06, "loss": 5.6497, "step": 59000 }, { "epoch": 0.49, "learning_rate": 9.837248680727943e-06, "loss": 5.6454, "step": 59500 }, { "epoch": 0.49, "learning_rate": 9.835604728008024e-06, "loss": 5.6408, "step": 60000 }, { "epoch": 0.49, "eval_accuracy": 0.19079809901308453, "eval_loss": 5.526524543762207, "eval_runtime": 411.2166, "eval_samples_per_second": 749.863, "eval_steps_per_second": 15.624, "step": 60000 }, { "epoch": 0.5, "learning_rate": 9.833960775288105e-06, "loss": 5.6208, "step": 60500 }, { "epoch": 0.5, "learning_rate": 9.832316822568184e-06, "loss": 5.5933, "step": 61000 }, { "epoch": 0.5, "learning_rate": 9.830672869848265e-06, "loss": 5.5664, "step": 61500 }, { "epoch": 0.51, "learning_rate": 9.829028917128344e-06, "loss": 5.5416, "step": 62000 }, { "epoch": 0.51, "learning_rate": 9.827384964408423e-06, "loss": 5.5315, "step": 62500 }, { "epoch": 0.52, "learning_rate": 9.825741011688504e-06, "loss": 5.5227, "step": 63000 }, { "epoch": 0.52, "learning_rate": 9.824097058968585e-06, "loss": 5.4887, "step": 63500 }, { "epoch": 0.52, "learning_rate": 9.822453106248665e-06, "loss": 5.4846, "step": 64000 }, { "epoch": 0.53, "learning_rate": 9.820809153528746e-06, "loss": 5.4683, "step": 64500 }, { "epoch": 0.53, "learning_rate": 9.819165200808825e-06, "loss": 5.4631, "step": 65000 }, { "epoch": 0.54, "learning_rate": 9.817521248088906e-06, "loss": 5.4465, "step": 65500 }, { "epoch": 0.54, "learning_rate": 9.815877295368987e-06, "loss": 5.4323, "step": 66000 }, { "epoch": 0.54, "learning_rate": 9.814233342649066e-06, "loss": 5.415, "step": 66500 }, { "epoch": 0.55, "learning_rate": 9.812589389929147e-06, "loss": 5.4046, "step": 67000 }, { "epoch": 0.55, "learning_rate": 9.810945437209226e-06, "loss": 5.3932, "step": 67500 }, { "epoch": 0.56, "learning_rate": 9.809301484489306e-06, "loss": 5.3858, "step": 68000 }, { "epoch": 0.56, "learning_rate": 9.807657531769387e-06, "loss": 5.3667, "step": 68500 }, { "epoch": 0.57, "learning_rate": 9.806013579049468e-06, "loss": 5.3536, "step": 69000 }, { "epoch": 0.57, "learning_rate": 9.804369626329547e-06, "loss": 5.3413, "step": 69500 }, { "epoch": 0.57, "learning_rate": 9.802725673609628e-06, "loss": 5.327, "step": 70000 }, { "epoch": 0.58, "learning_rate": 9.801081720889709e-06, "loss": 5.3028, "step": 70500 }, { "epoch": 0.58, "learning_rate": 9.799437768169788e-06, "loss": 5.2809, "step": 71000 }, { "epoch": 0.59, "learning_rate": 9.797793815449869e-06, "loss": 5.2594, "step": 71500 }, { "epoch": 0.59, "learning_rate": 9.796149862729948e-06, "loss": 5.2433, "step": 72000 }, { "epoch": 0.59, "learning_rate": 9.79450591001003e-06, "loss": 5.2326, "step": 72500 }, { "epoch": 0.6, "learning_rate": 9.79286195729011e-06, "loss": 5.2018, "step": 73000 }, { "epoch": 0.6, "learning_rate": 9.79121800457019e-06, "loss": 5.1855, "step": 73500 }, { "epoch": 0.61, "learning_rate": 9.789574051850269e-06, "loss": 5.1629, "step": 74000 }, { "epoch": 0.61, "learning_rate": 9.78793009913035e-06, "loss": 5.1439, "step": 74500 }, { "epoch": 0.61, "learning_rate": 9.786286146410429e-06, "loss": 5.1354, "step": 75000 }, { "epoch": 0.62, "learning_rate": 9.78464219369051e-06, "loss": 5.1111, "step": 75500 }, { "epoch": 0.62, "learning_rate": 9.782998240970591e-06, "loss": 5.0945, "step": 76000 }, { "epoch": 0.63, "learning_rate": 9.78135428825067e-06, "loss": 5.0785, "step": 76500 }, { "epoch": 0.63, "learning_rate": 9.779710335530751e-06, "loss": 5.0583, "step": 77000 }, { "epoch": 0.63, "learning_rate": 9.77806638281083e-06, "loss": 5.0422, "step": 77500 }, { "epoch": 0.64, "learning_rate": 9.776422430090912e-06, "loss": 5.0259, "step": 78000 }, { "epoch": 0.64, "learning_rate": 9.774778477370993e-06, "loss": 5.0104, "step": 78500 }, { "epoch": 0.65, "learning_rate": 9.773134524651072e-06, "loss": 4.9894, "step": 79000 }, { "epoch": 0.65, "learning_rate": 9.771490571931153e-06, "loss": 4.9678, "step": 79500 }, { "epoch": 0.66, "learning_rate": 9.769846619211232e-06, "loss": 4.9547, "step": 80000 }, { "epoch": 0.66, "learning_rate": 9.768202666491311e-06, "loss": 4.9365, "step": 80500 }, { "epoch": 0.66, "learning_rate": 9.766558713771392e-06, "loss": 4.9222, "step": 81000 }, { "epoch": 0.67, "learning_rate": 9.764914761051473e-06, "loss": 4.9002, "step": 81500 }, { "epoch": 0.67, "learning_rate": 9.763270808331553e-06, "loss": 4.8928, "step": 82000 }, { "epoch": 0.68, "learning_rate": 9.761626855611634e-06, "loss": 4.866, "step": 82500 }, { "epoch": 0.68, "learning_rate": 9.759982902891713e-06, "loss": 4.8529, "step": 83000 }, { "epoch": 0.68, "learning_rate": 9.758338950171794e-06, "loss": 4.841, "step": 83500 }, { "epoch": 0.69, "learning_rate": 9.756694997451875e-06, "loss": 4.8105, "step": 84000 }, { "epoch": 0.69, "learning_rate": 9.755051044731954e-06, "loss": 4.7817, "step": 84500 }, { "epoch": 0.7, "learning_rate": 9.753407092012035e-06, "loss": 4.7561, "step": 85000 }, { "epoch": 0.7, "learning_rate": 9.751763139292116e-06, "loss": 4.7291, "step": 85500 }, { "epoch": 0.7, "learning_rate": 9.750119186572195e-06, "loss": 4.6946, "step": 86000 }, { "epoch": 0.71, "learning_rate": 9.748475233852275e-06, "loss": 4.6735, "step": 86500 }, { "epoch": 0.71, "learning_rate": 9.746831281132356e-06, "loss": 4.6509, "step": 87000 }, { "epoch": 0.72, "learning_rate": 9.745187328412435e-06, "loss": 4.6292, "step": 87500 }, { "epoch": 0.72, "learning_rate": 9.743543375692516e-06, "loss": 4.6109, "step": 88000 }, { "epoch": 0.73, "learning_rate": 9.741899422972597e-06, "loss": 4.5882, "step": 88500 }, { "epoch": 0.73, "learning_rate": 9.740255470252676e-06, "loss": 4.573, "step": 89000 }, { "epoch": 0.73, "learning_rate": 9.738611517532757e-06, "loss": 4.5576, "step": 89500 }, { "epoch": 0.74, "learning_rate": 9.736967564812836e-06, "loss": 4.5385, "step": 90000 }, { "epoch": 0.74, "eval_accuracy": 0.31378937363065973, "eval_loss": 4.313334941864014, "eval_runtime": 406.1908, "eval_samples_per_second": 759.141, "eval_steps_per_second": 15.818, "step": 90000 }, { "epoch": 0.74, "learning_rate": 9.735323612092917e-06, "loss": 4.5261, "step": 90500 }, { "epoch": 0.75, "learning_rate": 9.733679659372998e-06, "loss": 4.5152, "step": 91000 }, { "epoch": 0.75, "learning_rate": 9.732035706653078e-06, "loss": 4.5018, "step": 91500 }, { "epoch": 0.75, "learning_rate": 9.730391753933157e-06, "loss": 4.4877, "step": 92000 }, { "epoch": 0.76, "learning_rate": 9.728747801213238e-06, "loss": 4.4749, "step": 92500 }, { "epoch": 0.76, "learning_rate": 9.727103848493317e-06, "loss": 4.4638, "step": 93000 }, { "epoch": 0.77, "learning_rate": 9.725459895773398e-06, "loss": 4.4451, "step": 93500 }, { "epoch": 0.77, "learning_rate": 9.723815943053479e-06, "loss": 4.437, "step": 94000 }, { "epoch": 0.77, "learning_rate": 9.722171990333558e-06, "loss": 4.433, "step": 94500 }, { "epoch": 0.78, "learning_rate": 9.72052803761364e-06, "loss": 4.4104, "step": 95000 }, { "epoch": 0.78, "learning_rate": 9.718884084893719e-06, "loss": 4.4126, "step": 95500 }, { "epoch": 0.79, "learning_rate": 9.7172401321738e-06, "loss": 4.4033, "step": 96000 }, { "epoch": 0.79, "learning_rate": 9.71559617945388e-06, "loss": 4.3898, "step": 96500 }, { "epoch": 0.79, "learning_rate": 9.71395222673396e-06, "loss": 4.3815, "step": 97000 }, { "epoch": 0.8, "learning_rate": 9.71230827401404e-06, "loss": 4.3669, "step": 97500 }, { "epoch": 0.8, "learning_rate": 9.71066432129412e-06, "loss": 4.359, "step": 98000 }, { "epoch": 0.81, "learning_rate": 9.709020368574201e-06, "loss": 4.3495, "step": 98500 }, { "epoch": 0.81, "learning_rate": 9.70737641585428e-06, "loss": 4.3459, "step": 99000 }, { "epoch": 0.82, "learning_rate": 9.705732463134361e-06, "loss": 4.3349, "step": 99500 }, { "epoch": 0.82, "learning_rate": 9.70408851041444e-06, "loss": 4.3202, "step": 100000 }, { "epoch": 0.82, "learning_rate": 9.702444557694522e-06, "loss": 4.32, "step": 100500 }, { "epoch": 0.83, "learning_rate": 9.700800604974602e-06, "loss": 4.3064, "step": 101000 }, { "epoch": 0.83, "learning_rate": 9.699156652254682e-06, "loss": 4.3046, "step": 101500 }, { "epoch": 0.84, "learning_rate": 9.697512699534763e-06, "loss": 4.2962, "step": 102000 }, { "epoch": 0.84, "learning_rate": 9.695868746814842e-06, "loss": 4.2827, "step": 102500 }, { "epoch": 0.84, "learning_rate": 9.694224794094923e-06, "loss": 4.2798, "step": 103000 }, { "epoch": 0.85, "learning_rate": 9.692580841375004e-06, "loss": 4.2678, "step": 103500 }, { "epoch": 0.85, "learning_rate": 9.690936888655083e-06, "loss": 4.2663, "step": 104000 }, { "epoch": 0.86, "learning_rate": 9.689292935935163e-06, "loss": 4.2641, "step": 104500 }, { "epoch": 0.86, "learning_rate": 9.687648983215243e-06, "loss": 4.2555, "step": 105000 }, { "epoch": 0.86, "learning_rate": 9.686005030495323e-06, "loss": 4.2571, "step": 105500 }, { "epoch": 0.87, "learning_rate": 9.684361077775404e-06, "loss": 4.2354, "step": 106000 }, { "epoch": 0.87, "learning_rate": 9.682717125055485e-06, "loss": 4.2328, "step": 106500 }, { "epoch": 0.88, "learning_rate": 9.681073172335564e-06, "loss": 4.2232, "step": 107000 }, { "epoch": 0.88, "learning_rate": 9.679429219615645e-06, "loss": 4.2218, "step": 107500 }, { "epoch": 0.88, "learning_rate": 9.677785266895724e-06, "loss": 4.2115, "step": 108000 }, { "epoch": 0.89, "learning_rate": 9.676141314175805e-06, "loss": 4.2085, "step": 108500 }, { "epoch": 0.89, "learning_rate": 9.674497361455886e-06, "loss": 4.1947, "step": 109000 }, { "epoch": 0.9, "learning_rate": 9.672853408735965e-06, "loss": 4.1982, "step": 109500 }, { "epoch": 0.9, "learning_rate": 9.671209456016045e-06, "loss": 4.1942, "step": 110000 }, { "epoch": 0.91, "learning_rate": 9.669565503296126e-06, "loss": 4.1869, "step": 110500 }, { "epoch": 0.91, "learning_rate": 9.667921550576205e-06, "loss": 4.1827, "step": 111000 }, { "epoch": 0.91, "learning_rate": 9.666277597856286e-06, "loss": 4.174, "step": 111500 }, { "epoch": 0.92, "learning_rate": 9.664633645136367e-06, "loss": 4.1719, "step": 112000 }, { "epoch": 0.92, "learning_rate": 9.662989692416446e-06, "loss": 4.1692, "step": 112500 }, { "epoch": 0.93, "learning_rate": 9.661345739696527e-06, "loss": 4.1603, "step": 113000 }, { "epoch": 0.93, "learning_rate": 9.659701786976608e-06, "loss": 4.1583, "step": 113500 }, { "epoch": 0.93, "learning_rate": 9.658057834256687e-06, "loss": 4.1563, "step": 114000 }, { "epoch": 0.94, "learning_rate": 9.656413881536768e-06, "loss": 4.1527, "step": 114500 }, { "epoch": 0.94, "learning_rate": 9.654769928816848e-06, "loss": 4.1418, "step": 115000 }, { "epoch": 0.95, "learning_rate": 9.653125976096929e-06, "loss": 4.1333, "step": 115500 }, { "epoch": 0.95, "learning_rate": 9.65148202337701e-06, "loss": 4.1356, "step": 116000 }, { "epoch": 0.95, "learning_rate": 9.649838070657089e-06, "loss": 4.1236, "step": 116500 }, { "epoch": 0.96, "learning_rate": 9.648194117937168e-06, "loss": 4.126, "step": 117000 }, { "epoch": 0.96, "learning_rate": 9.64655016521725e-06, "loss": 4.1232, "step": 117500 }, { "epoch": 0.97, "learning_rate": 9.644906212497328e-06, "loss": 4.1148, "step": 118000 }, { "epoch": 0.97, "learning_rate": 9.64326225977741e-06, "loss": 4.1131, "step": 118500 }, { "epoch": 0.97, "learning_rate": 9.64161830705749e-06, "loss": 4.1212, "step": 119000 }, { "epoch": 0.98, "learning_rate": 9.63997435433757e-06, "loss": 4.1011, "step": 119500 }, { "epoch": 0.98, "learning_rate": 9.63833040161765e-06, "loss": 4.1015, "step": 120000 }, { "epoch": 0.98, "eval_accuracy": 0.35834025167851113, "eval_loss": 3.899629831314087, "eval_runtime": 406.9878, "eval_samples_per_second": 757.654, "eval_steps_per_second": 15.787, "step": 120000 }, { "epoch": 0.99, "learning_rate": 9.63668644889773e-06, "loss": 4.0954, "step": 120500 }, { "epoch": 0.99, "learning_rate": 9.635042496177811e-06, "loss": 4.0893, "step": 121000 }, { "epoch": 1.0, "learning_rate": 9.633398543457892e-06, "loss": 4.0861, "step": 121500 }, { "epoch": 1.0, "learning_rate": 9.631754590737971e-06, "loss": 4.0811, "step": 122000 }, { "epoch": 1.0, "learning_rate": 9.63011063801805e-06, "loss": 4.0771, "step": 122500 }, { "epoch": 1.01, "learning_rate": 9.628466685298131e-06, "loss": 4.0736, "step": 123000 }, { "epoch": 1.01, "learning_rate": 9.62682273257821e-06, "loss": 4.0716, "step": 123500 }, { "epoch": 1.02, "learning_rate": 9.625178779858292e-06, "loss": 4.064, "step": 124000 }, { "epoch": 1.02, "learning_rate": 9.623534827138373e-06, "loss": 4.0587, "step": 124500 }, { "epoch": 1.02, "learning_rate": 9.621890874418452e-06, "loss": 4.056, "step": 125000 }, { "epoch": 1.03, "learning_rate": 9.620246921698533e-06, "loss": 4.0621, "step": 125500 }, { "epoch": 1.03, "learning_rate": 9.618602968978614e-06, "loss": 4.0473, "step": 126000 }, { "epoch": 1.04, "learning_rate": 9.616959016258693e-06, "loss": 4.0481, "step": 126500 }, { "epoch": 1.04, "learning_rate": 9.615315063538774e-06, "loss": 4.0496, "step": 127000 }, { "epoch": 1.04, "learning_rate": 9.613671110818853e-06, "loss": 4.041, "step": 127500 }, { "epoch": 1.05, "learning_rate": 9.612027158098934e-06, "loss": 4.0395, "step": 128000 }, { "epoch": 1.05, "learning_rate": 9.610383205379014e-06, "loss": 4.0357, "step": 128500 }, { "epoch": 1.06, "learning_rate": 9.608739252659095e-06, "loss": 4.0339, "step": 129000 }, { "epoch": 1.06, "learning_rate": 9.607095299939174e-06, "loss": 4.0269, "step": 129500 }, { "epoch": 1.07, "learning_rate": 9.605451347219255e-06, "loss": 4.0204, "step": 130000 }, { "epoch": 1.07, "learning_rate": 9.603807394499334e-06, "loss": 4.0235, "step": 130500 }, { "epoch": 1.07, "learning_rate": 9.602163441779415e-06, "loss": 4.0276, "step": 131000 }, { "epoch": 1.08, "learning_rate": 9.600519489059496e-06, "loss": 4.0172, "step": 131500 }, { "epoch": 1.08, "learning_rate": 9.598875536339575e-06, "loss": 4.0097, "step": 132000 }, { "epoch": 1.09, "learning_rate": 9.597231583619656e-06, "loss": 4.0083, "step": 132500 }, { "epoch": 1.09, "learning_rate": 9.595587630899736e-06, "loss": 4.0063, "step": 133000 }, { "epoch": 1.09, "learning_rate": 9.593943678179817e-06, "loss": 4.0038, "step": 133500 }, { "epoch": 1.1, "learning_rate": 9.592299725459898e-06, "loss": 3.995, "step": 134000 }, { "epoch": 1.1, "learning_rate": 9.590655772739977e-06, "loss": 3.9927, "step": 134500 }, { "epoch": 1.11, "learning_rate": 9.589011820020056e-06, "loss": 3.9941, "step": 135000 }, { "epoch": 1.11, "learning_rate": 9.587367867300137e-06, "loss": 3.997, "step": 135500 }, { "epoch": 1.11, "learning_rate": 9.585723914580216e-06, "loss": 3.986, "step": 136000 }, { "epoch": 1.12, "learning_rate": 9.584079961860297e-06, "loss": 3.9807, "step": 136500 }, { "epoch": 1.12, "learning_rate": 9.582436009140378e-06, "loss": 3.9846, "step": 137000 }, { "epoch": 1.13, "learning_rate": 9.580792056420458e-06, "loss": 3.9712, "step": 137500 }, { "epoch": 1.13, "learning_rate": 9.579148103700539e-06, "loss": 3.9697, "step": 138000 }, { "epoch": 1.13, "learning_rate": 9.57750415098062e-06, "loss": 3.9665, "step": 138500 }, { "epoch": 1.14, "learning_rate": 9.575860198260699e-06, "loss": 3.9676, "step": 139000 }, { "epoch": 1.14, "learning_rate": 9.57421624554078e-06, "loss": 3.9659, "step": 139500 }, { "epoch": 1.15, "learning_rate": 9.572572292820859e-06, "loss": 3.9657, "step": 140000 }, { "epoch": 1.15, "learning_rate": 9.570928340100938e-06, "loss": 3.9616, "step": 140500 }, { "epoch": 1.16, "learning_rate": 9.56928438738102e-06, "loss": 3.95, "step": 141000 }, { "epoch": 1.16, "learning_rate": 9.5676404346611e-06, "loss": 3.9508, "step": 141500 }, { "epoch": 1.16, "learning_rate": 9.56599648194118e-06, "loss": 3.9479, "step": 142000 }, { "epoch": 1.17, "learning_rate": 9.56435252922126e-06, "loss": 3.9495, "step": 142500 }, { "epoch": 1.17, "learning_rate": 9.56270857650134e-06, "loss": 3.9443, "step": 143000 }, { "epoch": 1.18, "learning_rate": 9.561064623781421e-06, "loss": 3.9392, "step": 143500 }, { "epoch": 1.18, "learning_rate": 9.559420671061502e-06, "loss": 3.943, "step": 144000 }, { "epoch": 1.18, "learning_rate": 9.557776718341581e-06, "loss": 3.9397, "step": 144500 }, { "epoch": 1.19, "learning_rate": 9.556132765621662e-06, "loss": 3.9338, "step": 145000 }, { "epoch": 1.19, "learning_rate": 9.554488812901741e-06, "loss": 3.9284, "step": 145500 }, { "epoch": 1.2, "learning_rate": 9.552844860181822e-06, "loss": 3.9276, "step": 146000 }, { "epoch": 1.2, "learning_rate": 9.551200907461902e-06, "loss": 3.9332, "step": 146500 }, { "epoch": 1.2, "learning_rate": 9.549556954741983e-06, "loss": 3.9263, "step": 147000 }, { "epoch": 1.21, "learning_rate": 9.547913002022062e-06, "loss": 3.9198, "step": 147500 }, { "epoch": 1.21, "learning_rate": 9.546269049302143e-06, "loss": 3.9173, "step": 148000 }, { "epoch": 1.22, "learning_rate": 9.544625096582222e-06, "loss": 3.919, "step": 148500 }, { "epoch": 1.22, "learning_rate": 9.542981143862303e-06, "loss": 3.917, "step": 149000 }, { "epoch": 1.22, "learning_rate": 9.541337191142384e-06, "loss": 3.9189, "step": 149500 }, { "epoch": 1.23, "learning_rate": 9.539693238422463e-06, "loss": 3.9119, "step": 150000 }, { "epoch": 1.23, "eval_accuracy": 0.3782837597418556, "eval_loss": 3.7199175357818604, "eval_runtime": 410.4253, "eval_samples_per_second": 751.308, "eval_steps_per_second": 15.654, "step": 150000 }, { "epoch": 1.23, "learning_rate": 9.538049285702544e-06, "loss": 3.9076, "step": 150500 }, { "epoch": 1.24, "learning_rate": 9.536405332982625e-06, "loss": 3.8992, "step": 151000 }, { "epoch": 1.24, "learning_rate": 9.534761380262705e-06, "loss": 3.8987, "step": 151500 }, { "epoch": 1.25, "learning_rate": 9.533117427542786e-06, "loss": 3.8942, "step": 152000 }, { "epoch": 1.25, "learning_rate": 9.531473474822865e-06, "loss": 3.9044, "step": 152500 }, { "epoch": 1.25, "learning_rate": 9.529829522102944e-06, "loss": 3.8889, "step": 153000 }, { "epoch": 1.26, "learning_rate": 9.528185569383025e-06, "loss": 3.8895, "step": 153500 }, { "epoch": 1.26, "learning_rate": 9.526541616663106e-06, "loss": 3.8905, "step": 154000 }, { "epoch": 1.27, "learning_rate": 9.524897663943185e-06, "loss": 3.8842, "step": 154500 }, { "epoch": 1.27, "learning_rate": 9.523253711223266e-06, "loss": 3.8843, "step": 155000 }, { "epoch": 1.27, "learning_rate": 9.521609758503346e-06, "loss": 3.8713, "step": 155500 }, { "epoch": 1.28, "learning_rate": 9.519965805783427e-06, "loss": 3.8828, "step": 156000 }, { "epoch": 1.28, "learning_rate": 9.518321853063508e-06, "loss": 3.8808, "step": 156500 }, { "epoch": 1.29, "learning_rate": 9.516677900343587e-06, "loss": 3.8747, "step": 157000 }, { "epoch": 1.29, "learning_rate": 9.515033947623668e-06, "loss": 3.8737, "step": 157500 }, { "epoch": 1.29, "learning_rate": 9.513389994903747e-06, "loss": 3.8709, "step": 158000 }, { "epoch": 1.3, "learning_rate": 9.511746042183826e-06, "loss": 3.8574, "step": 158500 }, { "epoch": 1.3, "learning_rate": 9.510102089463907e-06, "loss": 3.8695, "step": 159000 }, { "epoch": 1.31, "learning_rate": 9.508458136743988e-06, "loss": 3.8644, "step": 159500 }, { "epoch": 1.31, "learning_rate": 9.506814184024068e-06, "loss": 3.8627, "step": 160000 }, { "epoch": 1.31, "learning_rate": 9.505170231304149e-06, "loss": 3.8545, "step": 160500 }, { "epoch": 1.32, "learning_rate": 9.503526278584228e-06, "loss": 3.852, "step": 161000 }, { "epoch": 1.32, "learning_rate": 9.501882325864309e-06, "loss": 3.8451, "step": 161500 }, { "epoch": 1.33, "learning_rate": 9.50023837314439e-06, "loss": 3.8514, "step": 162000 }, { "epoch": 1.33, "learning_rate": 9.498594420424469e-06, "loss": 3.8516, "step": 162500 }, { "epoch": 1.34, "learning_rate": 9.49695046770455e-06, "loss": 3.8459, "step": 163000 }, { "epoch": 1.34, "learning_rate": 9.495306514984631e-06, "loss": 3.8456, "step": 163500 }, { "epoch": 1.34, "learning_rate": 9.49366256226471e-06, "loss": 3.8393, "step": 164000 }, { "epoch": 1.35, "learning_rate": 9.492018609544791e-06, "loss": 3.8407, "step": 164500 }, { "epoch": 1.35, "learning_rate": 9.49037465682487e-06, "loss": 3.8337, "step": 165000 }, { "epoch": 1.36, "learning_rate": 9.48873070410495e-06, "loss": 3.8352, "step": 165500 }, { "epoch": 1.36, "learning_rate": 9.48708675138503e-06, "loss": 3.8305, "step": 166000 }, { "epoch": 1.36, "learning_rate": 9.485442798665112e-06, "loss": 3.8327, "step": 166500 }, { "epoch": 1.37, "learning_rate": 9.483798845945191e-06, "loss": 3.8242, "step": 167000 }, { "epoch": 1.37, "learning_rate": 9.482154893225272e-06, "loss": 3.8248, "step": 167500 }, { "epoch": 1.38, "learning_rate": 9.480510940505351e-06, "loss": 3.8208, "step": 168000 }, { "epoch": 1.38, "learning_rate": 9.478866987785432e-06, "loss": 3.8195, "step": 168500 }, { "epoch": 1.38, "learning_rate": 9.477223035065513e-06, "loss": 3.8262, "step": 169000 }, { "epoch": 1.39, "learning_rate": 9.475579082345593e-06, "loss": 3.8205, "step": 169500 }, { "epoch": 1.39, "learning_rate": 9.473935129625673e-06, "loss": 3.823, "step": 170000 }, { "epoch": 1.4, "learning_rate": 9.472291176905753e-06, "loss": 3.8127, "step": 170500 }, { "epoch": 1.4, "learning_rate": 9.470647224185832e-06, "loss": 3.813, "step": 171000 }, { "epoch": 1.41, "learning_rate": 9.469003271465913e-06, "loss": 3.8126, "step": 171500 }, { "epoch": 1.41, "learning_rate": 9.467359318745994e-06, "loss": 3.808, "step": 172000 }, { "epoch": 1.41, "learning_rate": 9.465715366026073e-06, "loss": 3.8051, "step": 172500 }, { "epoch": 1.42, "learning_rate": 9.464071413306154e-06, "loss": 3.8038, "step": 173000 }, { "epoch": 1.42, "learning_rate": 9.462427460586234e-06, "loss": 3.7961, "step": 173500 }, { "epoch": 1.43, "learning_rate": 9.460783507866314e-06, "loss": 3.7979, "step": 174000 }, { "epoch": 1.43, "learning_rate": 9.459139555146395e-06, "loss": 3.795, "step": 174500 }, { "epoch": 1.43, "learning_rate": 9.457495602426475e-06, "loss": 3.8004, "step": 175000 }, { "epoch": 1.44, "learning_rate": 9.455851649706556e-06, "loss": 3.7958, "step": 175500 }, { "epoch": 1.44, "learning_rate": 9.454207696986637e-06, "loss": 3.7909, "step": 176000 }, { "epoch": 1.45, "learning_rate": 9.452563744266716e-06, "loss": 3.8, "step": 176500 }, { "epoch": 1.45, "learning_rate": 9.450919791546795e-06, "loss": 3.7826, "step": 177000 }, { "epoch": 1.45, "learning_rate": 9.449275838826876e-06, "loss": 3.7857, "step": 177500 }, { "epoch": 1.46, "learning_rate": 9.447631886106956e-06, "loss": 3.7867, "step": 178000 }, { "epoch": 1.46, "learning_rate": 9.445987933387036e-06, "loss": 3.7935, "step": 178500 }, { "epoch": 1.47, "learning_rate": 9.444343980667117e-06, "loss": 3.7803, "step": 179000 }, { "epoch": 1.47, "learning_rate": 9.442700027947197e-06, "loss": 3.782, "step": 179500 }, { "epoch": 1.47, "learning_rate": 9.441056075227278e-06, "loss": 3.7832, "step": 180000 }, { "epoch": 1.47, "eval_accuracy": 0.39196367058721415, "eval_loss": 3.6038873195648193, "eval_runtime": 407.692, "eval_samples_per_second": 756.345, "eval_steps_per_second": 15.759, "step": 180000 }, { "epoch": 1.48, "learning_rate": 9.439412122507357e-06, "loss": 3.775, "step": 180500 }, { "epoch": 1.48, "learning_rate": 9.437768169787438e-06, "loss": 3.7777, "step": 181000 }, { "epoch": 1.49, "learning_rate": 9.436124217067519e-06, "loss": 3.769, "step": 181500 }, { "epoch": 1.49, "learning_rate": 9.434480264347598e-06, "loss": 3.7712, "step": 182000 }, { "epoch": 1.5, "learning_rate": 9.43283631162768e-06, "loss": 3.7638, "step": 182500 }, { "epoch": 1.5, "learning_rate": 9.431192358907758e-06, "loss": 3.7686, "step": 183000 }, { "epoch": 1.5, "learning_rate": 9.429548406187838e-06, "loss": 3.7652, "step": 183500 }, { "epoch": 1.51, "learning_rate": 9.427904453467919e-06, "loss": 3.7649, "step": 184000 }, { "epoch": 1.51, "learning_rate": 9.426260500748e-06, "loss": 3.7608, "step": 184500 }, { "epoch": 1.52, "learning_rate": 9.424616548028079e-06, "loss": 3.762, "step": 185000 }, { "epoch": 1.52, "learning_rate": 9.42297259530816e-06, "loss": 3.7551, "step": 185500 }, { "epoch": 1.52, "learning_rate": 9.42132864258824e-06, "loss": 3.7545, "step": 186000 }, { "epoch": 1.53, "learning_rate": 9.41968468986832e-06, "loss": 3.7556, "step": 186500 }, { "epoch": 1.53, "learning_rate": 9.418040737148401e-06, "loss": 3.7507, "step": 187000 }, { "epoch": 1.54, "learning_rate": 9.41639678442848e-06, "loss": 3.7467, "step": 187500 }, { "epoch": 1.54, "learning_rate": 9.414752831708561e-06, "loss": 3.7507, "step": 188000 }, { "epoch": 1.54, "learning_rate": 9.413108878988642e-06, "loss": 3.7384, "step": 188500 }, { "epoch": 1.55, "learning_rate": 9.41146492626872e-06, "loss": 3.7473, "step": 189000 }, { "epoch": 1.55, "learning_rate": 9.409820973548801e-06, "loss": 3.7429, "step": 189500 }, { "epoch": 1.56, "learning_rate": 9.408177020828882e-06, "loss": 3.7427, "step": 190000 }, { "epoch": 1.56, "learning_rate": 9.406533068108961e-06, "loss": 3.74, "step": 190500 }, { "epoch": 1.56, "learning_rate": 9.404889115389042e-06, "loss": 3.7361, "step": 191000 }, { "epoch": 1.57, "learning_rate": 9.403245162669123e-06, "loss": 3.7427, "step": 191500 }, { "epoch": 1.57, "learning_rate": 9.401601209949202e-06, "loss": 3.7388, "step": 192000 }, { "epoch": 1.58, "learning_rate": 9.399957257229283e-06, "loss": 3.7349, "step": 192500 }, { "epoch": 1.58, "learning_rate": 9.398313304509363e-06, "loss": 3.7302, "step": 193000 }, { "epoch": 1.59, "learning_rate": 9.396669351789444e-06, "loss": 3.7408, "step": 193500 }, { "epoch": 1.59, "learning_rate": 9.395025399069525e-06, "loss": 3.7239, "step": 194000 }, { "epoch": 1.59, "learning_rate": 9.393381446349604e-06, "loss": 3.7258, "step": 194500 }, { "epoch": 1.6, "learning_rate": 9.391737493629683e-06, "loss": 3.7149, "step": 195000 }, { "epoch": 1.6, "learning_rate": 9.390093540909764e-06, "loss": 3.7213, "step": 195500 }, { "epoch": 1.61, "learning_rate": 9.388449588189843e-06, "loss": 3.7218, "step": 196000 }, { "epoch": 1.61, "learning_rate": 9.386805635469924e-06, "loss": 3.7175, "step": 196500 }, { "epoch": 1.61, "learning_rate": 9.385161682750005e-06, "loss": 3.7088, "step": 197000 }, { "epoch": 1.62, "learning_rate": 9.383517730030085e-06, "loss": 3.7147, "step": 197500 }, { "epoch": 1.62, "learning_rate": 9.381873777310166e-06, "loss": 3.7194, "step": 198000 }, { "epoch": 1.63, "learning_rate": 9.380229824590245e-06, "loss": 3.7048, "step": 198500 }, { "epoch": 1.63, "learning_rate": 9.378585871870326e-06, "loss": 3.7022, "step": 199000 }, { "epoch": 1.63, "learning_rate": 9.376941919150407e-06, "loss": 3.6975, "step": 199500 }, { "epoch": 1.64, "learning_rate": 9.375297966430486e-06, "loss": 3.6994, "step": 200000 }, { "epoch": 1.64, "learning_rate": 9.373654013710567e-06, "loss": 3.6963, "step": 200500 }, { "epoch": 1.65, "learning_rate": 9.372010060990646e-06, "loss": 3.6973, "step": 201000 }, { "epoch": 1.65, "learning_rate": 9.370366108270726e-06, "loss": 3.6949, "step": 201500 }, { "epoch": 1.65, "learning_rate": 9.368722155550807e-06, "loss": 3.6867, "step": 202000 }, { "epoch": 1.66, "learning_rate": 9.367078202830888e-06, "loss": 3.6865, "step": 202500 }, { "epoch": 1.66, "learning_rate": 9.365434250110967e-06, "loss": 3.6862, "step": 203000 }, { "epoch": 1.67, "learning_rate": 9.363790297391048e-06, "loss": 3.682, "step": 203500 }, { "epoch": 1.67, "learning_rate": 9.362146344671129e-06, "loss": 3.6823, "step": 204000 }, { "epoch": 1.68, "learning_rate": 9.360502391951208e-06, "loss": 3.6839, "step": 204500 }, { "epoch": 1.68, "learning_rate": 9.358858439231289e-06, "loss": 3.6768, "step": 205000 }, { "epoch": 1.68, "learning_rate": 9.357214486511368e-06, "loss": 3.6763, "step": 205500 }, { "epoch": 1.69, "learning_rate": 9.35557053379145e-06, "loss": 3.6763, "step": 206000 }, { "epoch": 1.69, "learning_rate": 9.35392658107153e-06, "loss": 3.6788, "step": 206500 }, { "epoch": 1.7, "learning_rate": 9.35228262835161e-06, "loss": 3.6731, "step": 207000 }, { "epoch": 1.7, "learning_rate": 9.350638675631689e-06, "loss": 3.6703, "step": 207500 }, { "epoch": 1.7, "learning_rate": 9.34899472291177e-06, "loss": 3.6735, "step": 208000 }, { "epoch": 1.71, "learning_rate": 9.347350770191849e-06, "loss": 3.6659, "step": 208500 }, { "epoch": 1.71, "learning_rate": 9.34570681747193e-06, "loss": 3.6618, "step": 209000 }, { "epoch": 1.72, "learning_rate": 9.344062864752011e-06, "loss": 3.66, "step": 209500 }, { "epoch": 1.72, "learning_rate": 9.34241891203209e-06, "loss": 3.6686, "step": 210000 }, { "epoch": 1.72, "eval_accuracy": 0.4032999985682802, "eval_loss": 3.505682945251465, "eval_runtime": 407.8472, "eval_samples_per_second": 756.058, "eval_steps_per_second": 15.753, "step": 210000 }, { "epoch": 1.72, "learning_rate": 9.340774959312171e-06, "loss": 3.6608, "step": 210500 }, { "epoch": 1.73, "learning_rate": 9.33913100659225e-06, "loss": 3.6528, "step": 211000 }, { "epoch": 1.73, "learning_rate": 9.337487053872332e-06, "loss": 3.6607, "step": 211500 }, { "epoch": 1.74, "learning_rate": 9.335843101152413e-06, "loss": 3.6561, "step": 212000 }, { "epoch": 1.74, "learning_rate": 9.334199148432492e-06, "loss": 3.6495, "step": 212500 }, { "epoch": 1.75, "learning_rate": 9.332555195712573e-06, "loss": 3.6438, "step": 213000 }, { "epoch": 1.75, "learning_rate": 9.330911242992652e-06, "loss": 3.6483, "step": 213500 }, { "epoch": 1.75, "learning_rate": 9.329267290272731e-06, "loss": 3.6504, "step": 214000 }, { "epoch": 1.76, "learning_rate": 9.327623337552812e-06, "loss": 3.6394, "step": 214500 }, { "epoch": 1.76, "learning_rate": 9.325979384832893e-06, "loss": 3.6413, "step": 215000 }, { "epoch": 1.77, "learning_rate": 9.324335432112973e-06, "loss": 3.6425, "step": 215500 }, { "epoch": 1.77, "learning_rate": 9.322691479393054e-06, "loss": 3.6369, "step": 216000 }, { "epoch": 1.77, "learning_rate": 9.321047526673135e-06, "loss": 3.6366, "step": 216500 }, { "epoch": 1.78, "learning_rate": 9.319403573953214e-06, "loss": 3.637, "step": 217000 }, { "epoch": 1.78, "learning_rate": 9.317759621233295e-06, "loss": 3.638, "step": 217500 }, { "epoch": 1.79, "learning_rate": 9.316115668513374e-06, "loss": 3.6389, "step": 218000 }, { "epoch": 1.79, "learning_rate": 9.314471715793455e-06, "loss": 3.6397, "step": 218500 }, { "epoch": 1.79, "learning_rate": 9.312827763073536e-06, "loss": 3.6262, "step": 219000 }, { "epoch": 1.8, "learning_rate": 9.311183810353615e-06, "loss": 3.6296, "step": 219500 }, { "epoch": 1.8, "learning_rate": 9.309539857633695e-06, "loss": 3.6228, "step": 220000 }, { "epoch": 1.81, "learning_rate": 9.307895904913776e-06, "loss": 3.6257, "step": 220500 }, { "epoch": 1.81, "learning_rate": 9.306251952193855e-06, "loss": 3.6242, "step": 221000 }, { "epoch": 1.81, "learning_rate": 9.304607999473936e-06, "loss": 3.6263, "step": 221500 }, { "epoch": 1.82, "learning_rate": 9.302964046754017e-06, "loss": 3.6204, "step": 222000 }, { "epoch": 1.82, "learning_rate": 9.301320094034096e-06, "loss": 3.6092, "step": 222500 }, { "epoch": 1.83, "learning_rate": 9.299676141314177e-06, "loss": 3.6146, "step": 223000 }, { "epoch": 1.83, "learning_rate": 9.298032188594256e-06, "loss": 3.6233, "step": 223500 }, { "epoch": 1.84, "learning_rate": 9.296388235874337e-06, "loss": 3.62, "step": 224000 }, { "epoch": 1.84, "learning_rate": 9.294744283154418e-06, "loss": 3.6191, "step": 224500 }, { "epoch": 1.84, "learning_rate": 9.293100330434498e-06, "loss": 3.616, "step": 225000 }, { "epoch": 1.85, "learning_rate": 9.291456377714577e-06, "loss": 3.6147, "step": 225500 }, { "epoch": 1.85, "learning_rate": 9.289812424994658e-06, "loss": 3.6147, "step": 226000 }, { "epoch": 1.86, "learning_rate": 9.288168472274737e-06, "loss": 3.6144, "step": 226500 }, { "epoch": 1.86, "learning_rate": 9.286524519554818e-06, "loss": 3.6047, "step": 227000 }, { "epoch": 1.86, "learning_rate": 9.284880566834899e-06, "loss": 3.6106, "step": 227500 }, { "epoch": 1.87, "learning_rate": 9.283236614114978e-06, "loss": 3.6059, "step": 228000 }, { "epoch": 1.87, "learning_rate": 9.28159266139506e-06, "loss": 3.6122, "step": 228500 }, { "epoch": 1.88, "learning_rate": 9.279948708675139e-06, "loss": 3.6052, "step": 229000 }, { "epoch": 1.88, "learning_rate": 9.27830475595522e-06, "loss": 3.6047, "step": 229500 }, { "epoch": 1.88, "learning_rate": 9.2766608032353e-06, "loss": 3.6021, "step": 230000 }, { "epoch": 1.89, "learning_rate": 9.27501685051538e-06, "loss": 3.6023, "step": 230500 }, { "epoch": 1.89, "learning_rate": 9.27337289779546e-06, "loss": 3.6019, "step": 231000 }, { "epoch": 1.9, "learning_rate": 9.27172894507554e-06, "loss": 3.595, "step": 231500 }, { "epoch": 1.9, "learning_rate": 9.270084992355621e-06, "loss": 3.5902, "step": 232000 }, { "epoch": 1.9, "learning_rate": 9.2684410396357e-06, "loss": 3.5964, "step": 232500 }, { "epoch": 1.91, "learning_rate": 9.266797086915781e-06, "loss": 3.5953, "step": 233000 }, { "epoch": 1.91, "learning_rate": 9.26515313419586e-06, "loss": 3.5934, "step": 233500 }, { "epoch": 1.92, "learning_rate": 9.263509181475942e-06, "loss": 3.5851, "step": 234000 }, { "epoch": 1.92, "learning_rate": 9.261865228756023e-06, "loss": 3.5861, "step": 234500 }, { "epoch": 1.93, "learning_rate": 9.260221276036102e-06, "loss": 3.5849, "step": 235000 }, { "epoch": 1.93, "learning_rate": 9.258577323316183e-06, "loss": 3.5883, "step": 235500 }, { "epoch": 1.93, "learning_rate": 9.256933370596262e-06, "loss": 3.5822, "step": 236000 }, { "epoch": 1.94, "learning_rate": 9.255289417876343e-06, "loss": 3.5742, "step": 236500 }, { "epoch": 1.94, "learning_rate": 9.253645465156424e-06, "loss": 3.5809, "step": 237000 }, { "epoch": 1.95, "learning_rate": 9.252001512436503e-06, "loss": 3.5805, "step": 237500 }, { "epoch": 1.95, "learning_rate": 9.250357559716583e-06, "loss": 3.5813, "step": 238000 }, { "epoch": 1.95, "learning_rate": 9.248713606996664e-06, "loss": 3.5793, "step": 238500 }, { "epoch": 1.96, "learning_rate": 9.247069654276743e-06, "loss": 3.5819, "step": 239000 }, { "epoch": 1.96, "learning_rate": 9.245425701556824e-06, "loss": 3.5771, "step": 239500 }, { "epoch": 1.97, "learning_rate": 9.243781748836905e-06, "loss": 3.5793, "step": 240000 }, { "epoch": 1.97, "eval_accuracy": 0.4137328604632164, "eval_loss": 3.4226527214050293, "eval_runtime": 405.6879, "eval_samples_per_second": 760.082, "eval_steps_per_second": 15.837, "step": 240000 }, { "epoch": 1.97, "learning_rate": 9.242137796116984e-06, "loss": 3.5746, "step": 240500 }, { "epoch": 1.97, "learning_rate": 9.240493843397065e-06, "loss": 3.571, "step": 241000 }, { "epoch": 1.98, "learning_rate": 9.238849890677144e-06, "loss": 3.5759, "step": 241500 }, { "epoch": 1.98, "learning_rate": 9.237205937957225e-06, "loss": 3.5681, "step": 242000 }, { "epoch": 1.99, "learning_rate": 9.235561985237306e-06, "loss": 3.5718, "step": 242500 }, { "epoch": 1.99, "learning_rate": 9.233918032517386e-06, "loss": 3.5656, "step": 243000 }, { "epoch": 1.99, "learning_rate": 9.232274079797465e-06, "loss": 3.5635, "step": 243500 }, { "epoch": 2.0, "learning_rate": 9.230630127077546e-06, "loss": 3.5619, "step": 244000 }, { "epoch": 2.0, "learning_rate": 9.228986174357627e-06, "loss": 3.5596, "step": 244500 }, { "epoch": 2.01, "learning_rate": 9.227342221637706e-06, "loss": 3.5599, "step": 245000 }, { "epoch": 2.01, "learning_rate": 9.225698268917787e-06, "loss": 3.57, "step": 245500 }, { "epoch": 2.02, "learning_rate": 9.224054316197866e-06, "loss": 3.5608, "step": 246000 }, { "epoch": 2.02, "learning_rate": 9.222410363477947e-06, "loss": 3.5601, "step": 246500 }, { "epoch": 2.02, "learning_rate": 9.220766410758028e-06, "loss": 3.562, "step": 247000 }, { "epoch": 2.03, "learning_rate": 9.219122458038107e-06, "loss": 3.5569, "step": 247500 }, { "epoch": 2.03, "learning_rate": 9.217478505318188e-06, "loss": 3.5563, "step": 248000 }, { "epoch": 2.04, "learning_rate": 9.215834552598268e-06, "loss": 3.5634, "step": 248500 }, { "epoch": 2.04, "learning_rate": 9.214190599878349e-06, "loss": 3.55, "step": 249000 }, { "epoch": 2.04, "learning_rate": 9.212546647158428e-06, "loss": 3.5459, "step": 249500 }, { "epoch": 2.05, "learning_rate": 9.210902694438509e-06, "loss": 3.5478, "step": 250000 }, { "epoch": 2.05, "learning_rate": 9.209258741718588e-06, "loss": 3.5526, "step": 250500 }, { "epoch": 2.06, "learning_rate": 9.20761478899867e-06, "loss": 3.5476, "step": 251000 }, { "epoch": 2.06, "learning_rate": 9.205970836278748e-06, "loss": 3.5567, "step": 251500 }, { "epoch": 2.06, "learning_rate": 9.20432688355883e-06, "loss": 3.5423, "step": 252000 }, { "epoch": 2.07, "learning_rate": 9.20268293083891e-06, "loss": 3.5487, "step": 252500 }, { "epoch": 2.07, "learning_rate": 9.20103897811899e-06, "loss": 3.5401, "step": 253000 }, { "epoch": 2.08, "learning_rate": 9.19939502539907e-06, "loss": 3.5503, "step": 253500 }, { "epoch": 2.08, "learning_rate": 9.19775107267915e-06, "loss": 3.5503, "step": 254000 }, { "epoch": 2.09, "learning_rate": 9.196107119959231e-06, "loss": 3.5316, "step": 254500 }, { "epoch": 2.09, "learning_rate": 9.194463167239312e-06, "loss": 3.541, "step": 255000 }, { "epoch": 2.09, "learning_rate": 9.192819214519391e-06, "loss": 3.5368, "step": 255500 }, { "epoch": 2.1, "learning_rate": 9.19117526179947e-06, "loss": 3.5351, "step": 256000 }, { "epoch": 2.1, "learning_rate": 9.189531309079551e-06, "loss": 3.537, "step": 256500 }, { "epoch": 2.11, "learning_rate": 9.18788735635963e-06, "loss": 3.536, "step": 257000 }, { "epoch": 2.11, "learning_rate": 9.186243403639712e-06, "loss": 3.5331, "step": 257500 }, { "epoch": 2.11, "learning_rate": 9.184599450919793e-06, "loss": 3.5408, "step": 258000 }, { "epoch": 2.12, "learning_rate": 9.182955498199872e-06, "loss": 3.5391, "step": 258500 }, { "epoch": 2.12, "learning_rate": 9.181311545479953e-06, "loss": 3.5339, "step": 259000 }, { "epoch": 2.13, "learning_rate": 9.179667592760034e-06, "loss": 3.535, "step": 259500 }, { "epoch": 2.13, "learning_rate": 9.178023640040113e-06, "loss": 3.5261, "step": 260000 }, { "epoch": 2.13, "learning_rate": 9.176379687320194e-06, "loss": 3.5266, "step": 260500 }, { "epoch": 2.14, "learning_rate": 9.174735734600273e-06, "loss": 3.5283, "step": 261000 }, { "epoch": 2.14, "learning_rate": 9.173091781880353e-06, "loss": 3.5348, "step": 261500 }, { "epoch": 2.15, "learning_rate": 9.171447829160434e-06, "loss": 3.5231, "step": 262000 }, { "epoch": 2.15, "learning_rate": 9.169803876440515e-06, "loss": 3.5138, "step": 262500 }, { "epoch": 2.15, "learning_rate": 9.168159923720594e-06, "loss": 3.5306, "step": 263000 }, { "epoch": 2.16, "learning_rate": 9.166515971000675e-06, "loss": 3.5224, "step": 263500 }, { "epoch": 2.16, "learning_rate": 9.164872018280754e-06, "loss": 3.5279, "step": 264000 }, { "epoch": 2.17, "learning_rate": 9.163228065560835e-06, "loss": 3.5207, "step": 264500 }, { "epoch": 2.17, "learning_rate": 9.161584112840916e-06, "loss": 3.5213, "step": 265000 }, { "epoch": 2.18, "learning_rate": 9.159940160120995e-06, "loss": 3.5152, "step": 265500 }, { "epoch": 2.18, "learning_rate": 9.158296207401076e-06, "loss": 3.5148, "step": 266000 }, { "epoch": 2.18, "learning_rate": 9.156652254681156e-06, "loss": 3.5185, "step": 266500 }, { "epoch": 2.19, "learning_rate": 9.155008301961237e-06, "loss": 3.5155, "step": 267000 }, { "epoch": 2.19, "learning_rate": 9.153364349241318e-06, "loss": 3.5192, "step": 267500 }, { "epoch": 2.2, "learning_rate": 9.151720396521397e-06, "loss": 3.5185, "step": 268000 }, { "epoch": 2.2, "learning_rate": 9.150076443801476e-06, "loss": 3.518, "step": 268500 }, { "epoch": 2.2, "learning_rate": 9.148432491081557e-06, "loss": 3.5159, "step": 269000 }, { "epoch": 2.21, "learning_rate": 9.146788538361636e-06, "loss": 3.5211, "step": 269500 }, { "epoch": 2.21, "learning_rate": 9.145144585641717e-06, "loss": 3.5128, "step": 270000 }, { "epoch": 2.21, "eval_accuracy": 0.42085813479275763, "eval_loss": 3.364504814147949, "eval_runtime": 411.9164, "eval_samples_per_second": 748.589, "eval_steps_per_second": 15.598, "step": 270000 }, { "epoch": 2.22, "learning_rate": 9.143500632921798e-06, "loss": 3.5162, "step": 270500 }, { "epoch": 2.22, "learning_rate": 9.141856680201878e-06, "loss": 3.5144, "step": 271000 }, { "epoch": 2.22, "learning_rate": 9.140212727481959e-06, "loss": 3.5095, "step": 271500 }, { "epoch": 2.23, "learning_rate": 9.13856877476204e-06, "loss": 3.502, "step": 272000 }, { "epoch": 2.23, "learning_rate": 9.136924822042119e-06, "loss": 3.503, "step": 272500 }, { "epoch": 2.24, "learning_rate": 9.1352808693222e-06, "loss": 3.5011, "step": 273000 }, { "epoch": 2.24, "learning_rate": 9.133636916602279e-06, "loss": 3.505, "step": 273500 }, { "epoch": 2.24, "learning_rate": 9.131992963882358e-06, "loss": 3.5071, "step": 274000 }, { "epoch": 2.25, "learning_rate": 9.13034901116244e-06, "loss": 3.5018, "step": 274500 }, { "epoch": 2.25, "learning_rate": 9.12870505844252e-06, "loss": 3.5058, "step": 275000 }, { "epoch": 2.26, "learning_rate": 9.1270611057226e-06, "loss": 3.5056, "step": 275500 }, { "epoch": 2.26, "learning_rate": 9.12541715300268e-06, "loss": 3.5085, "step": 276000 }, { "epoch": 2.27, "learning_rate": 9.12377320028276e-06, "loss": 3.5111, "step": 276500 }, { "epoch": 2.27, "learning_rate": 9.122129247562841e-06, "loss": 3.4963, "step": 277000 }, { "epoch": 2.27, "learning_rate": 9.120485294842922e-06, "loss": 3.5001, "step": 277500 }, { "epoch": 2.28, "learning_rate": 9.118841342123001e-06, "loss": 3.5004, "step": 278000 }, { "epoch": 2.28, "learning_rate": 9.117197389403082e-06, "loss": 3.5027, "step": 278500 }, { "epoch": 2.29, "learning_rate": 9.115553436683161e-06, "loss": 3.4984, "step": 279000 }, { "epoch": 2.29, "learning_rate": 9.113909483963242e-06, "loss": 3.5006, "step": 279500 }, { "epoch": 2.29, "learning_rate": 9.112265531243322e-06, "loss": 3.4952, "step": 280000 }, { "epoch": 2.3, "learning_rate": 9.110621578523403e-06, "loss": 3.4953, "step": 280500 }, { "epoch": 2.3, "learning_rate": 9.108977625803482e-06, "loss": 3.4951, "step": 281000 }, { "epoch": 2.31, "learning_rate": 9.107333673083563e-06, "loss": 3.4984, "step": 281500 }, { "epoch": 2.31, "learning_rate": 9.105689720363642e-06, "loss": 3.4849, "step": 282000 }, { "epoch": 2.31, "learning_rate": 9.104045767643723e-06, "loss": 3.4901, "step": 282500 }, { "epoch": 2.32, "learning_rate": 9.102401814923804e-06, "loss": 3.4884, "step": 283000 }, { "epoch": 2.32, "learning_rate": 9.100757862203883e-06, "loss": 3.4937, "step": 283500 }, { "epoch": 2.33, "learning_rate": 9.099113909483964e-06, "loss": 3.4915, "step": 284000 }, { "epoch": 2.33, "learning_rate": 9.097469956764045e-06, "loss": 3.4922, "step": 284500 }, { "epoch": 2.33, "learning_rate": 9.095826004044125e-06, "loss": 3.4892, "step": 285000 }, { "epoch": 2.34, "learning_rate": 9.094182051324206e-06, "loss": 3.4837, "step": 285500 }, { "epoch": 2.34, "learning_rate": 9.092538098604285e-06, "loss": 3.4821, "step": 286000 }, { "epoch": 2.35, "learning_rate": 9.090894145884364e-06, "loss": 3.4836, "step": 286500 }, { "epoch": 2.35, "learning_rate": 9.089250193164445e-06, "loss": 3.4826, "step": 287000 }, { "epoch": 2.36, "learning_rate": 9.087606240444526e-06, "loss": 3.477, "step": 287500 }, { "epoch": 2.36, "learning_rate": 9.085962287724605e-06, "loss": 3.485, "step": 288000 }, { "epoch": 2.36, "learning_rate": 9.084318335004686e-06, "loss": 3.4778, "step": 288500 }, { "epoch": 2.37, "learning_rate": 9.082674382284766e-06, "loss": 3.49, "step": 289000 }, { "epoch": 2.37, "learning_rate": 9.081030429564847e-06, "loss": 3.4814, "step": 289500 }, { "epoch": 2.38, "learning_rate": 9.079386476844928e-06, "loss": 3.4752, "step": 290000 }, { "epoch": 2.38, "learning_rate": 9.077742524125007e-06, "loss": 3.4825, "step": 290500 }, { "epoch": 2.38, "learning_rate": 9.076098571405088e-06, "loss": 3.4784, "step": 291000 }, { "epoch": 2.39, "learning_rate": 9.074454618685167e-06, "loss": 3.4804, "step": 291500 }, { "epoch": 2.39, "learning_rate": 9.072810665965246e-06, "loss": 3.4815, "step": 292000 }, { "epoch": 2.4, "learning_rate": 9.071166713245327e-06, "loss": 3.4706, "step": 292500 }, { "epoch": 2.4, "learning_rate": 9.069522760525408e-06, "loss": 3.4753, "step": 293000 }, { "epoch": 2.4, "learning_rate": 9.067878807805488e-06, "loss": 3.4782, "step": 293500 }, { "epoch": 2.41, "learning_rate": 9.066234855085569e-06, "loss": 3.4705, "step": 294000 }, { "epoch": 2.41, "learning_rate": 9.064590902365648e-06, "loss": 3.4739, "step": 294500 }, { "epoch": 2.42, "learning_rate": 9.062946949645729e-06, "loss": 3.4786, "step": 295000 }, { "epoch": 2.42, "learning_rate": 9.06130299692581e-06, "loss": 3.4724, "step": 295500 }, { "epoch": 2.43, "learning_rate": 9.059659044205889e-06, "loss": 3.4684, "step": 296000 }, { "epoch": 2.43, "learning_rate": 9.05801509148597e-06, "loss": 3.4717, "step": 296500 }, { "epoch": 2.43, "learning_rate": 9.056371138766051e-06, "loss": 3.4704, "step": 297000 }, { "epoch": 2.44, "learning_rate": 9.05472718604613e-06, "loss": 3.4656, "step": 297500 }, { "epoch": 2.44, "learning_rate": 9.05308323332621e-06, "loss": 3.4666, "step": 298000 }, { "epoch": 2.45, "learning_rate": 9.05143928060629e-06, "loss": 3.4695, "step": 298500 }, { "epoch": 2.45, "learning_rate": 9.04979532788637e-06, "loss": 3.4686, "step": 299000 }, { "epoch": 2.45, "learning_rate": 9.04815137516645e-06, "loss": 3.4652, "step": 299500 }, { "epoch": 2.46, "learning_rate": 9.046507422446532e-06, "loss": 3.4597, "step": 300000 }, { "epoch": 2.46, "eval_accuracy": 0.42611833634349283, "eval_loss": 3.321903944015503, "eval_runtime": 409.6321, "eval_samples_per_second": 752.763, "eval_steps_per_second": 15.685, "step": 300000 }, { "epoch": 2.46, "learning_rate": 9.044863469726611e-06, "loss": 3.4601, "step": 300500 }, { "epoch": 2.47, "learning_rate": 9.043219517006692e-06, "loss": 3.4717, "step": 301000 }, { "epoch": 2.47, "learning_rate": 9.041575564286771e-06, "loss": 3.463, "step": 301500 }, { "epoch": 2.47, "learning_rate": 9.039931611566852e-06, "loss": 3.463, "step": 302000 }, { "epoch": 2.48, "learning_rate": 9.038287658846933e-06, "loss": 3.4575, "step": 302500 }, { "epoch": 2.48, "learning_rate": 9.036643706127013e-06, "loss": 3.4648, "step": 303000 }, { "epoch": 2.49, "learning_rate": 9.034999753407094e-06, "loss": 3.4625, "step": 303500 }, { "epoch": 2.49, "learning_rate": 9.033355800687173e-06, "loss": 3.4562, "step": 304000 }, { "epoch": 2.49, "learning_rate": 9.031711847967252e-06, "loss": 3.4587, "step": 304500 }, { "epoch": 2.5, "learning_rate": 9.030067895247333e-06, "loss": 3.4486, "step": 305000 }, { "epoch": 2.5, "learning_rate": 9.028423942527414e-06, "loss": 3.4609, "step": 305500 }, { "epoch": 2.51, "learning_rate": 9.026779989807493e-06, "loss": 3.4502, "step": 306000 }, { "epoch": 2.51, "learning_rate": 9.025136037087574e-06, "loss": 3.4515, "step": 306500 }, { "epoch": 2.52, "learning_rate": 9.023492084367654e-06, "loss": 3.4536, "step": 307000 }, { "epoch": 2.52, "learning_rate": 9.021848131647735e-06, "loss": 3.4527, "step": 307500 }, { "epoch": 2.52, "learning_rate": 9.020204178927815e-06, "loss": 3.4506, "step": 308000 }, { "epoch": 2.53, "learning_rate": 9.018560226207895e-06, "loss": 3.4553, "step": 308500 }, { "epoch": 2.53, "learning_rate": 9.016916273487976e-06, "loss": 3.457, "step": 309000 }, { "epoch": 2.54, "learning_rate": 9.015272320768057e-06, "loss": 3.4517, "step": 309500 }, { "epoch": 2.54, "learning_rate": 9.013628368048134e-06, "loss": 3.4471, "step": 310000 }, { "epoch": 2.54, "learning_rate": 9.011984415328215e-06, "loss": 3.4469, "step": 310500 }, { "epoch": 2.55, "learning_rate": 9.010340462608296e-06, "loss": 3.4488, "step": 311000 }, { "epoch": 2.55, "learning_rate": 9.008696509888376e-06, "loss": 3.4416, "step": 311500 }, { "epoch": 2.56, "learning_rate": 9.007052557168457e-06, "loss": 3.4526, "step": 312000 }, { "epoch": 2.56, "learning_rate": 9.005408604448537e-06, "loss": 3.4508, "step": 312500 }, { "epoch": 2.56, "learning_rate": 9.003764651728617e-06, "loss": 3.4583, "step": 313000 }, { "epoch": 2.57, "learning_rate": 9.002120699008698e-06, "loss": 3.4571, "step": 313500 }, { "epoch": 2.57, "learning_rate": 9.000476746288777e-06, "loss": 3.454, "step": 314000 }, { "epoch": 2.58, "learning_rate": 8.998832793568858e-06, "loss": 3.4457, "step": 314500 }, { "epoch": 2.58, "learning_rate": 8.997188840848939e-06, "loss": 3.4385, "step": 315000 }, { "epoch": 2.58, "learning_rate": 8.995544888129018e-06, "loss": 3.4465, "step": 315500 }, { "epoch": 2.59, "learning_rate": 8.9939009354091e-06, "loss": 3.4435, "step": 316000 }, { "epoch": 2.59, "learning_rate": 8.992256982689178e-06, "loss": 3.4489, "step": 316500 }, { "epoch": 2.6, "learning_rate": 8.990613029969258e-06, "loss": 3.438, "step": 317000 }, { "epoch": 2.6, "learning_rate": 8.988969077249339e-06, "loss": 3.4412, "step": 317500 }, { "epoch": 2.61, "learning_rate": 8.98732512452942e-06, "loss": 3.4394, "step": 318000 }, { "epoch": 2.61, "learning_rate": 8.985681171809499e-06, "loss": 3.4455, "step": 318500 }, { "epoch": 2.61, "learning_rate": 8.98403721908958e-06, "loss": 3.4398, "step": 319000 }, { "epoch": 2.62, "learning_rate": 8.98239326636966e-06, "loss": 3.4448, "step": 319500 }, { "epoch": 2.62, "learning_rate": 8.98074931364974e-06, "loss": 3.4345, "step": 320000 }, { "epoch": 2.63, "learning_rate": 8.979105360929821e-06, "loss": 3.4359, "step": 320500 }, { "epoch": 2.63, "learning_rate": 8.9774614082099e-06, "loss": 3.4498, "step": 321000 }, { "epoch": 2.63, "learning_rate": 8.975817455489981e-06, "loss": 3.4363, "step": 321500 }, { "epoch": 2.64, "learning_rate": 8.974173502770062e-06, "loss": 3.4381, "step": 322000 }, { "epoch": 2.64, "learning_rate": 8.97252955005014e-06, "loss": 3.4334, "step": 322500 }, { "epoch": 2.65, "learning_rate": 8.970885597330221e-06, "loss": 3.4354, "step": 323000 }, { "epoch": 2.65, "learning_rate": 8.969241644610302e-06, "loss": 3.4338, "step": 323500 }, { "epoch": 2.65, "learning_rate": 8.967597691890381e-06, "loss": 3.4305, "step": 324000 }, { "epoch": 2.66, "learning_rate": 8.965953739170462e-06, "loss": 3.4322, "step": 324500 }, { "epoch": 2.66, "learning_rate": 8.964309786450543e-06, "loss": 3.4363, "step": 325000 }, { "epoch": 2.67, "learning_rate": 8.962665833730622e-06, "loss": 3.4398, "step": 325500 }, { "epoch": 2.67, "learning_rate": 8.961021881010703e-06, "loss": 3.4376, "step": 326000 }, { "epoch": 2.67, "learning_rate": 8.959377928290783e-06, "loss": 3.4268, "step": 326500 }, { "epoch": 2.68, "learning_rate": 8.957733975570864e-06, "loss": 3.4415, "step": 327000 }, { "epoch": 2.68, "learning_rate": 8.956090022850945e-06, "loss": 3.4253, "step": 327500 }, { "epoch": 2.69, "learning_rate": 8.954446070131024e-06, "loss": 3.4335, "step": 328000 }, { "epoch": 2.69, "learning_rate": 8.952802117411103e-06, "loss": 3.4273, "step": 328500 }, { "epoch": 2.7, "learning_rate": 8.951158164691184e-06, "loss": 3.4334, "step": 329000 }, { "epoch": 2.7, "learning_rate": 8.949514211971263e-06, "loss": 3.4288, "step": 329500 }, { "epoch": 2.7, "learning_rate": 8.947870259251344e-06, "loss": 3.4263, "step": 330000 }, { "epoch": 2.7, "eval_accuracy": 0.43119620464233216, "eval_loss": 3.284120559692383, "eval_runtime": 409.6426, "eval_samples_per_second": 752.744, "eval_steps_per_second": 15.684, "step": 330000 }, { "epoch": 2.71, "learning_rate": 8.946226306531425e-06, "loss": 3.4205, "step": 330500 }, { "epoch": 2.71, "learning_rate": 8.944582353811505e-06, "loss": 3.4233, "step": 331000 }, { "epoch": 2.72, "learning_rate": 8.942938401091586e-06, "loss": 3.4275, "step": 331500 }, { "epoch": 2.72, "learning_rate": 8.941294448371665e-06, "loss": 3.4317, "step": 332000 }, { "epoch": 2.72, "learning_rate": 8.939650495651746e-06, "loss": 3.4239, "step": 332500 }, { "epoch": 2.73, "learning_rate": 8.938006542931827e-06, "loss": 3.4237, "step": 333000 }, { "epoch": 2.73, "learning_rate": 8.936362590211906e-06, "loss": 3.4192, "step": 333500 }, { "epoch": 2.74, "learning_rate": 8.934718637491987e-06, "loss": 3.4193, "step": 334000 }, { "epoch": 2.74, "learning_rate": 8.933074684772066e-06, "loss": 3.4232, "step": 334500 }, { "epoch": 2.74, "learning_rate": 8.931430732052146e-06, "loss": 3.422, "step": 335000 }, { "epoch": 2.75, "learning_rate": 8.929786779332227e-06, "loss": 3.4168, "step": 335500 }, { "epoch": 2.75, "learning_rate": 8.928142826612308e-06, "loss": 3.4203, "step": 336000 }, { "epoch": 2.76, "learning_rate": 8.926498873892387e-06, "loss": 3.4188, "step": 336500 }, { "epoch": 2.76, "learning_rate": 8.924854921172468e-06, "loss": 3.4202, "step": 337000 }, { "epoch": 2.77, "learning_rate": 8.923210968452549e-06, "loss": 3.4217, "step": 337500 }, { "epoch": 2.77, "learning_rate": 8.921567015732628e-06, "loss": 3.4185, "step": 338000 }, { "epoch": 2.77, "learning_rate": 8.919923063012709e-06, "loss": 3.4202, "step": 338500 }, { "epoch": 2.78, "learning_rate": 8.918279110292788e-06, "loss": 3.4172, "step": 339000 }, { "epoch": 2.78, "learning_rate": 8.91663515757287e-06, "loss": 3.4221, "step": 339500 }, { "epoch": 2.79, "learning_rate": 8.91499120485295e-06, "loss": 3.4136, "step": 340000 }, { "epoch": 2.79, "learning_rate": 8.91334725213303e-06, "loss": 3.4209, "step": 340500 }, { "epoch": 2.79, "learning_rate": 8.911703299413109e-06, "loss": 3.4182, "step": 341000 }, { "epoch": 2.8, "learning_rate": 8.91005934669319e-06, "loss": 3.4152, "step": 341500 }, { "epoch": 2.8, "learning_rate": 8.90841539397327e-06, "loss": 3.4124, "step": 342000 }, { "epoch": 2.81, "learning_rate": 8.90677144125335e-06, "loss": 3.408, "step": 342500 }, { "epoch": 2.81, "learning_rate": 8.905127488533431e-06, "loss": 3.4112, "step": 343000 }, { "epoch": 2.81, "learning_rate": 8.90348353581351e-06, "loss": 3.4144, "step": 343500 }, { "epoch": 2.82, "learning_rate": 8.901839583093591e-06, "loss": 3.4173, "step": 344000 }, { "epoch": 2.82, "learning_rate": 8.90019563037367e-06, "loss": 3.4107, "step": 344500 }, { "epoch": 2.83, "learning_rate": 8.898551677653752e-06, "loss": 3.4094, "step": 345000 }, { "epoch": 2.83, "learning_rate": 8.896907724933833e-06, "loss": 3.4165, "step": 345500 }, { "epoch": 2.83, "learning_rate": 8.895263772213912e-06, "loss": 3.4163, "step": 346000 }, { "epoch": 2.84, "learning_rate": 8.893619819493991e-06, "loss": 3.4158, "step": 346500 }, { "epoch": 2.84, "learning_rate": 8.891975866774072e-06, "loss": 3.4166, "step": 347000 }, { "epoch": 2.85, "learning_rate": 8.890331914054151e-06, "loss": 3.4059, "step": 347500 }, { "epoch": 2.85, "learning_rate": 8.888687961334232e-06, "loss": 3.4079, "step": 348000 }, { "epoch": 2.86, "learning_rate": 8.887044008614313e-06, "loss": 3.4116, "step": 348500 }, { "epoch": 2.86, "learning_rate": 8.885400055894393e-06, "loss": 3.4078, "step": 349000 }, { "epoch": 2.86, "learning_rate": 8.883756103174474e-06, "loss": 3.4109, "step": 349500 }, { "epoch": 2.87, "learning_rate": 8.882112150454555e-06, "loss": 3.4022, "step": 350000 }, { "epoch": 2.87, "learning_rate": 8.880468197734634e-06, "loss": 3.4004, "step": 350500 }, { "epoch": 2.88, "learning_rate": 8.878824245014715e-06, "loss": 3.4024, "step": 351000 }, { "epoch": 2.88, "learning_rate": 8.877180292294794e-06, "loss": 3.4056, "step": 351500 }, { "epoch": 2.88, "learning_rate": 8.875536339574875e-06, "loss": 3.3985, "step": 352000 }, { "epoch": 2.89, "learning_rate": 8.873892386854954e-06, "loss": 3.3945, "step": 352500 }, { "epoch": 2.89, "learning_rate": 8.872248434135035e-06, "loss": 3.3978, "step": 353000 }, { "epoch": 2.9, "learning_rate": 8.870604481415115e-06, "loss": 3.4052, "step": 353500 }, { "epoch": 2.9, "learning_rate": 8.868960528695196e-06, "loss": 3.4048, "step": 354000 }, { "epoch": 2.9, "learning_rate": 8.867316575975275e-06, "loss": 3.4055, "step": 354500 }, { "epoch": 2.91, "learning_rate": 8.865672623255356e-06, "loss": 3.4025, "step": 355000 }, { "epoch": 2.91, "learning_rate": 8.864028670535437e-06, "loss": 3.3976, "step": 355500 }, { "epoch": 2.92, "learning_rate": 8.862384717815516e-06, "loss": 3.3962, "step": 356000 }, { "epoch": 2.92, "learning_rate": 8.860740765095597e-06, "loss": 3.3927, "step": 356500 }, { "epoch": 2.92, "learning_rate": 8.859096812375676e-06, "loss": 3.4024, "step": 357000 }, { "epoch": 2.93, "learning_rate": 8.857452859655757e-06, "loss": 3.3929, "step": 357500 }, { "epoch": 2.93, "learning_rate": 8.855808906935838e-06, "loss": 3.3911, "step": 358000 }, { "epoch": 2.94, "learning_rate": 8.854164954215918e-06, "loss": 3.3976, "step": 358500 }, { "epoch": 2.94, "learning_rate": 8.852521001495997e-06, "loss": 3.3944, "step": 359000 }, { "epoch": 2.95, "learning_rate": 8.850877048776078e-06, "loss": 3.3953, "step": 359500 }, { "epoch": 2.95, "learning_rate": 8.849233096056157e-06, "loss": 3.3909, "step": 360000 }, { "epoch": 2.95, "eval_accuracy": 0.43477661532857337, "eval_loss": 3.254718542098999, "eval_runtime": 406.1401, "eval_samples_per_second": 759.236, "eval_steps_per_second": 15.82, "step": 360000 }, { "epoch": 2.95, "learning_rate": 8.847589143336238e-06, "loss": 3.403, "step": 360500 }, { "epoch": 2.96, "learning_rate": 8.845945190616319e-06, "loss": 3.3966, "step": 361000 }, { "epoch": 2.96, "learning_rate": 8.844301237896398e-06, "loss": 3.3835, "step": 361500 }, { "epoch": 2.97, "learning_rate": 8.84265728517648e-06, "loss": 3.3896, "step": 362000 }, { "epoch": 2.97, "learning_rate": 8.84101333245656e-06, "loss": 3.3855, "step": 362500 }, { "epoch": 2.97, "learning_rate": 8.83936937973664e-06, "loss": 3.3851, "step": 363000 }, { "epoch": 2.98, "learning_rate": 8.83772542701672e-06, "loss": 3.3927, "step": 363500 }, { "epoch": 2.98, "learning_rate": 8.8360814742968e-06, "loss": 3.389, "step": 364000 }, { "epoch": 2.99, "learning_rate": 8.83443752157688e-06, "loss": 3.3861, "step": 364500 }, { "epoch": 2.99, "learning_rate": 8.83279356885696e-06, "loss": 3.3955, "step": 365000 }, { "epoch": 2.99, "learning_rate": 8.831149616137041e-06, "loss": 3.395, "step": 365500 }, { "epoch": 3.0, "learning_rate": 8.82950566341712e-06, "loss": 3.3881, "step": 366000 }, { "epoch": 3.0, "learning_rate": 8.827861710697201e-06, "loss": 3.3844, "step": 366500 }, { "epoch": 3.01, "learning_rate": 8.82621775797728e-06, "loss": 3.3886, "step": 367000 }, { "epoch": 3.01, "learning_rate": 8.824573805257362e-06, "loss": 3.3897, "step": 367500 }, { "epoch": 3.01, "learning_rate": 8.822929852537443e-06, "loss": 3.3882, "step": 368000 }, { "epoch": 3.02, "learning_rate": 8.821285899817522e-06, "loss": 3.3919, "step": 368500 }, { "epoch": 3.02, "learning_rate": 8.819641947097603e-06, "loss": 3.3815, "step": 369000 }, { "epoch": 3.03, "learning_rate": 8.817997994377682e-06, "loss": 3.3802, "step": 369500 }, { "epoch": 3.03, "learning_rate": 8.816354041657763e-06, "loss": 3.3817, "step": 370000 }, { "epoch": 3.04, "learning_rate": 8.814710088937844e-06, "loss": 3.3874, "step": 370500 }, { "epoch": 3.04, "learning_rate": 8.813066136217923e-06, "loss": 3.3875, "step": 371000 }, { "epoch": 3.04, "learning_rate": 8.811422183498003e-06, "loss": 3.3885, "step": 371500 }, { "epoch": 3.05, "learning_rate": 8.809778230778084e-06, "loss": 3.3884, "step": 372000 }, { "epoch": 3.05, "learning_rate": 8.808134278058163e-06, "loss": 3.3905, "step": 372500 }, { "epoch": 3.06, "learning_rate": 8.806490325338244e-06, "loss": 3.3829, "step": 373000 }, { "epoch": 3.06, "learning_rate": 8.804846372618325e-06, "loss": 3.3858, "step": 373500 }, { "epoch": 3.06, "learning_rate": 8.803202419898404e-06, "loss": 3.3852, "step": 374000 }, { "epoch": 3.07, "learning_rate": 8.801558467178485e-06, "loss": 3.3814, "step": 374500 }, { "epoch": 3.07, "learning_rate": 8.799914514458564e-06, "loss": 3.3849, "step": 375000 }, { "epoch": 3.08, "learning_rate": 8.798270561738645e-06, "loss": 3.3812, "step": 375500 }, { "epoch": 3.08, "learning_rate": 8.796626609018726e-06, "loss": 3.3815, "step": 376000 }, { "epoch": 3.08, "learning_rate": 8.794982656298806e-06, "loss": 3.3738, "step": 376500 }, { "epoch": 3.09, "learning_rate": 8.793338703578885e-06, "loss": 3.381, "step": 377000 }, { "epoch": 3.09, "learning_rate": 8.791694750858966e-06, "loss": 3.3762, "step": 377500 }, { "epoch": 3.1, "learning_rate": 8.790050798139047e-06, "loss": 3.3709, "step": 378000 }, { "epoch": 3.1, "learning_rate": 8.788406845419126e-06, "loss": 3.3779, "step": 378500 }, { "epoch": 3.11, "learning_rate": 8.786762892699207e-06, "loss": 3.3774, "step": 379000 }, { "epoch": 3.11, "learning_rate": 8.785118939979286e-06, "loss": 3.3692, "step": 379500 }, { "epoch": 3.11, "learning_rate": 8.783474987259367e-06, "loss": 3.3737, "step": 380000 }, { "epoch": 3.12, "learning_rate": 8.781831034539448e-06, "loss": 3.3717, "step": 380500 }, { "epoch": 3.12, "learning_rate": 8.780187081819528e-06, "loss": 3.3763, "step": 381000 }, { "epoch": 3.13, "learning_rate": 8.778543129099608e-06, "loss": 3.3748, "step": 381500 }, { "epoch": 3.13, "learning_rate": 8.776899176379688e-06, "loss": 3.3824, "step": 382000 }, { "epoch": 3.13, "learning_rate": 8.775255223659769e-06, "loss": 3.3739, "step": 382500 }, { "epoch": 3.14, "learning_rate": 8.773611270939848e-06, "loss": 3.3745, "step": 383000 }, { "epoch": 3.14, "learning_rate": 8.771967318219929e-06, "loss": 3.3775, "step": 383500 }, { "epoch": 3.15, "learning_rate": 8.770323365500008e-06, "loss": 3.3675, "step": 384000 }, { "epoch": 3.15, "learning_rate": 8.76867941278009e-06, "loss": 3.3676, "step": 384500 }, { "epoch": 3.15, "learning_rate": 8.767035460060169e-06, "loss": 3.3667, "step": 385000 }, { "epoch": 3.16, "learning_rate": 8.76539150734025e-06, "loss": 3.3638, "step": 385500 }, { "epoch": 3.16, "learning_rate": 8.76374755462033e-06, "loss": 3.3682, "step": 386000 }, { "epoch": 3.17, "learning_rate": 8.76210360190041e-06, "loss": 3.3698, "step": 386500 }, { "epoch": 3.17, "learning_rate": 8.76045964918049e-06, "loss": 3.371, "step": 387000 }, { "epoch": 3.17, "learning_rate": 8.75881569646057e-06, "loss": 3.3797, "step": 387500 }, { "epoch": 3.18, "learning_rate": 8.757171743740651e-06, "loss": 3.3701, "step": 388000 }, { "epoch": 3.18, "learning_rate": 8.755527791020732e-06, "loss": 3.3725, "step": 388500 }, { "epoch": 3.19, "learning_rate": 8.753883838300811e-06, "loss": 3.3784, "step": 389000 }, { "epoch": 3.19, "learning_rate": 8.75223988558089e-06, "loss": 3.3677, "step": 389500 }, { "epoch": 3.2, "learning_rate": 8.750595932860971e-06, "loss": 3.3635, "step": 390000 }, { "epoch": 3.2, "eval_accuracy": 0.43791422082834985, "eval_loss": 3.228388547897339, "eval_runtime": 413.1728, "eval_samples_per_second": 746.312, "eval_steps_per_second": 15.55, "step": 390000 }, { "epoch": 3.2, "learning_rate": 8.748951980141052e-06, "loss": 3.364, "step": 390500 }, { "epoch": 3.2, "learning_rate": 8.747308027421132e-06, "loss": 3.3708, "step": 391000 }, { "epoch": 3.21, "learning_rate": 8.745664074701213e-06, "loss": 3.3714, "step": 391500 }, { "epoch": 3.21, "learning_rate": 8.744020121981292e-06, "loss": 3.3658, "step": 392000 }, { "epoch": 3.22, "learning_rate": 8.742376169261373e-06, "loss": 3.3653, "step": 392500 }, { "epoch": 3.22, "learning_rate": 8.740732216541454e-06, "loss": 3.3617, "step": 393000 }, { "epoch": 3.22, "learning_rate": 8.739088263821533e-06, "loss": 3.367, "step": 393500 }, { "epoch": 3.23, "learning_rate": 8.737444311101614e-06, "loss": 3.3608, "step": 394000 }, { "epoch": 3.23, "learning_rate": 8.735800358381693e-06, "loss": 3.3643, "step": 394500 }, { "epoch": 3.24, "learning_rate": 8.734156405661773e-06, "loss": 3.3607, "step": 395000 }, { "epoch": 3.24, "learning_rate": 8.732512452941854e-06, "loss": 3.3638, "step": 395500 }, { "epoch": 3.24, "learning_rate": 8.730868500221935e-06, "loss": 3.3687, "step": 396000 }, { "epoch": 3.25, "learning_rate": 8.729224547502014e-06, "loss": 3.3616, "step": 396500 }, { "epoch": 3.25, "learning_rate": 8.727580594782095e-06, "loss": 3.3678, "step": 397000 }, { "epoch": 3.26, "learning_rate": 8.725936642062174e-06, "loss": 3.3616, "step": 397500 }, { "epoch": 3.26, "learning_rate": 8.724292689342255e-06, "loss": 3.3573, "step": 398000 }, { "epoch": 3.26, "learning_rate": 8.722648736622336e-06, "loss": 3.3622, "step": 398500 }, { "epoch": 3.27, "learning_rate": 8.721004783902415e-06, "loss": 3.3588, "step": 399000 }, { "epoch": 3.27, "learning_rate": 8.719360831182496e-06, "loss": 3.3604, "step": 399500 }, { "epoch": 3.28, "learning_rate": 8.717716878462576e-06, "loss": 3.361, "step": 400000 }, { "epoch": 3.28, "learning_rate": 8.716072925742657e-06, "loss": 3.3546, "step": 400500 }, { "epoch": 3.29, "learning_rate": 8.714428973022736e-06, "loss": 3.3613, "step": 401000 }, { "epoch": 3.29, "learning_rate": 8.712785020302817e-06, "loss": 3.3619, "step": 401500 }, { "epoch": 3.29, "learning_rate": 8.711141067582896e-06, "loss": 3.3623, "step": 402000 }, { "epoch": 3.3, "learning_rate": 8.709497114862977e-06, "loss": 3.3552, "step": 402500 }, { "epoch": 3.3, "learning_rate": 8.707853162143056e-06, "loss": 3.3554, "step": 403000 }, { "epoch": 3.31, "learning_rate": 8.706209209423137e-06, "loss": 3.3587, "step": 403500 }, { "epoch": 3.31, "learning_rate": 8.704565256703218e-06, "loss": 3.3558, "step": 404000 }, { "epoch": 3.31, "learning_rate": 8.702921303983298e-06, "loss": 3.3582, "step": 404500 }, { "epoch": 3.32, "learning_rate": 8.701277351263379e-06, "loss": 3.3627, "step": 405000 }, { "epoch": 3.32, "learning_rate": 8.69963339854346e-06, "loss": 3.3572, "step": 405500 }, { "epoch": 3.33, "learning_rate": 8.697989445823539e-06, "loss": 3.3658, "step": 406000 }, { "epoch": 3.33, "learning_rate": 8.69634549310362e-06, "loss": 3.358, "step": 406500 }, { "epoch": 3.33, "learning_rate": 8.6947015403837e-06, "loss": 3.349, "step": 407000 }, { "epoch": 3.34, "learning_rate": 8.693057587663778e-06, "loss": 3.3534, "step": 407500 }, { "epoch": 3.34, "learning_rate": 8.69141363494386e-06, "loss": 3.3595, "step": 408000 }, { "epoch": 3.35, "learning_rate": 8.68976968222394e-06, "loss": 3.3551, "step": 408500 }, { "epoch": 3.35, "learning_rate": 8.68812572950402e-06, "loss": 3.3574, "step": 409000 }, { "epoch": 3.35, "learning_rate": 8.6864817767841e-06, "loss": 3.3534, "step": 409500 }, { "epoch": 3.36, "learning_rate": 8.68483782406418e-06, "loss": 3.3541, "step": 410000 }, { "epoch": 3.36, "learning_rate": 8.683193871344261e-06, "loss": 3.3545, "step": 410500 }, { "epoch": 3.37, "learning_rate": 8.681549918624342e-06, "loss": 3.3506, "step": 411000 }, { "epoch": 3.37, "learning_rate": 8.679905965904421e-06, "loss": 3.3454, "step": 411500 }, { "epoch": 3.38, "learning_rate": 8.678262013184502e-06, "loss": 3.3557, "step": 412000 }, { "epoch": 3.38, "learning_rate": 8.676618060464581e-06, "loss": 3.3449, "step": 412500 }, { "epoch": 3.38, "learning_rate": 8.67497410774466e-06, "loss": 3.3565, "step": 413000 }, { "epoch": 3.39, "learning_rate": 8.673330155024742e-06, "loss": 3.3423, "step": 413500 }, { "epoch": 3.39, "learning_rate": 8.671686202304823e-06, "loss": 3.3515, "step": 414000 }, { "epoch": 3.4, "learning_rate": 8.670042249584902e-06, "loss": 3.3462, "step": 414500 }, { "epoch": 3.4, "learning_rate": 8.668398296864983e-06, "loss": 3.3515, "step": 415000 }, { "epoch": 3.4, "learning_rate": 8.666754344145062e-06, "loss": 3.3514, "step": 415500 }, { "epoch": 3.41, "learning_rate": 8.665110391425143e-06, "loss": 3.3498, "step": 416000 }, { "epoch": 3.41, "learning_rate": 8.663466438705224e-06, "loss": 3.3514, "step": 416500 }, { "epoch": 3.42, "learning_rate": 8.661822485985303e-06, "loss": 3.3517, "step": 417000 }, { "epoch": 3.42, "learning_rate": 8.660178533265384e-06, "loss": 3.3476, "step": 417500 }, { "epoch": 3.42, "learning_rate": 8.658534580545465e-06, "loss": 3.3381, "step": 418000 }, { "epoch": 3.43, "learning_rate": 8.656890627825545e-06, "loss": 3.3461, "step": 418500 }, { "epoch": 3.43, "learning_rate": 8.655246675105626e-06, "loss": 3.3492, "step": 419000 }, { "epoch": 3.44, "learning_rate": 8.653602722385705e-06, "loss": 3.3478, "step": 419500 }, { "epoch": 3.44, "learning_rate": 8.651958769665784e-06, "loss": 3.3488, "step": 420000 }, { "epoch": 3.44, "eval_accuracy": 0.4409229499038142, "eval_loss": 3.20596981048584, "eval_runtime": 409.4361, "eval_samples_per_second": 753.124, "eval_steps_per_second": 15.692, "step": 420000 }, { "epoch": 3.45, "learning_rate": 8.650314816945865e-06, "loss": 3.3408, "step": 420500 }, { "epoch": 3.45, "learning_rate": 8.648670864225946e-06, "loss": 3.3418, "step": 421000 }, { "epoch": 3.45, "learning_rate": 8.647026911506025e-06, "loss": 3.3481, "step": 421500 }, { "epoch": 3.46, "learning_rate": 8.645382958786106e-06, "loss": 3.3451, "step": 422000 }, { "epoch": 3.46, "learning_rate": 8.643739006066186e-06, "loss": 3.3391, "step": 422500 }, { "epoch": 3.47, "learning_rate": 8.642095053346267e-06, "loss": 3.3423, "step": 423000 }, { "epoch": 3.47, "learning_rate": 8.640451100626348e-06, "loss": 3.3375, "step": 423500 }, { "epoch": 3.47, "learning_rate": 8.638807147906427e-06, "loss": 3.3379, "step": 424000 }, { "epoch": 3.48, "learning_rate": 8.637163195186508e-06, "loss": 3.3446, "step": 424500 }, { "epoch": 3.48, "learning_rate": 8.635519242466587e-06, "loss": 3.3365, "step": 425000 }, { "epoch": 3.49, "learning_rate": 8.633875289746666e-06, "loss": 3.3405, "step": 425500 }, { "epoch": 3.49, "learning_rate": 8.632231337026747e-06, "loss": 3.3409, "step": 426000 }, { "epoch": 3.49, "learning_rate": 8.630587384306828e-06, "loss": 3.3369, "step": 426500 }, { "epoch": 3.5, "learning_rate": 8.628943431586908e-06, "loss": 3.3391, "step": 427000 }, { "epoch": 3.5, "learning_rate": 8.627299478866989e-06, "loss": 3.3426, "step": 427500 }, { "epoch": 3.51, "learning_rate": 8.625655526147068e-06, "loss": 3.3418, "step": 428000 }, { "epoch": 3.51, "learning_rate": 8.624011573427149e-06, "loss": 3.3402, "step": 428500 }, { "epoch": 3.51, "learning_rate": 8.62236762070723e-06, "loss": 3.3424, "step": 429000 }, { "epoch": 3.52, "learning_rate": 8.620723667987309e-06, "loss": 3.338, "step": 429500 }, { "epoch": 3.52, "learning_rate": 8.61907971526739e-06, "loss": 3.3335, "step": 430000 }, { "epoch": 3.53, "learning_rate": 8.617435762547471e-06, "loss": 3.3276, "step": 430500 }, { "epoch": 3.53, "learning_rate": 8.61579180982755e-06, "loss": 3.3285, "step": 431000 }, { "epoch": 3.54, "learning_rate": 8.61414785710763e-06, "loss": 3.3357, "step": 431500 }, { "epoch": 3.54, "learning_rate": 8.61250390438771e-06, "loss": 3.337, "step": 432000 }, { "epoch": 3.54, "learning_rate": 8.61085995166779e-06, "loss": 3.3378, "step": 432500 }, { "epoch": 3.55, "learning_rate": 8.60921599894787e-06, "loss": 3.3323, "step": 433000 }, { "epoch": 3.55, "learning_rate": 8.607572046227952e-06, "loss": 3.3337, "step": 433500 }, { "epoch": 3.56, "learning_rate": 8.605928093508031e-06, "loss": 3.3325, "step": 434000 }, { "epoch": 3.56, "learning_rate": 8.604284140788112e-06, "loss": 3.3287, "step": 434500 }, { "epoch": 3.56, "learning_rate": 8.602640188068191e-06, "loss": 3.3334, "step": 435000 }, { "epoch": 3.57, "learning_rate": 8.600996235348272e-06, "loss": 3.3308, "step": 435500 }, { "epoch": 3.57, "learning_rate": 8.599352282628353e-06, "loss": 3.3421, "step": 436000 }, { "epoch": 3.58, "learning_rate": 8.597708329908433e-06, "loss": 3.3361, "step": 436500 }, { "epoch": 3.58, "learning_rate": 8.596064377188514e-06, "loss": 3.3349, "step": 437000 }, { "epoch": 3.58, "learning_rate": 8.594420424468593e-06, "loss": 3.3278, "step": 437500 }, { "epoch": 3.59, "learning_rate": 8.592776471748672e-06, "loss": 3.3309, "step": 438000 }, { "epoch": 3.59, "learning_rate": 8.591132519028753e-06, "loss": 3.3285, "step": 438500 }, { "epoch": 3.6, "learning_rate": 8.589488566308834e-06, "loss": 3.322, "step": 439000 }, { "epoch": 3.6, "learning_rate": 8.587844613588913e-06, "loss": 3.3353, "step": 439500 }, { "epoch": 3.6, "learning_rate": 8.586200660868994e-06, "loss": 3.3251, "step": 440000 }, { "epoch": 3.61, "learning_rate": 8.584556708149074e-06, "loss": 3.3303, "step": 440500 }, { "epoch": 3.61, "learning_rate": 8.582912755429155e-06, "loss": 3.3266, "step": 441000 }, { "epoch": 3.62, "learning_rate": 8.581268802709236e-06, "loss": 3.3359, "step": 441500 }, { "epoch": 3.62, "learning_rate": 8.579624849989315e-06, "loss": 3.3264, "step": 442000 }, { "epoch": 3.63, "learning_rate": 8.577980897269396e-06, "loss": 3.3244, "step": 442500 }, { "epoch": 3.63, "learning_rate": 8.576336944549477e-06, "loss": 3.3266, "step": 443000 }, { "epoch": 3.63, "learning_rate": 8.574692991829554e-06, "loss": 3.3246, "step": 443500 }, { "epoch": 3.64, "learning_rate": 8.573049039109635e-06, "loss": 3.3264, "step": 444000 }, { "epoch": 3.64, "learning_rate": 8.571405086389716e-06, "loss": 3.3309, "step": 444500 }, { "epoch": 3.65, "learning_rate": 8.569761133669796e-06, "loss": 3.329, "step": 445000 }, { "epoch": 3.65, "learning_rate": 8.568117180949877e-06, "loss": 3.3208, "step": 445500 }, { "epoch": 3.65, "learning_rate": 8.566473228229958e-06, "loss": 3.3259, "step": 446000 }, { "epoch": 3.66, "learning_rate": 8.564829275510037e-06, "loss": 3.3272, "step": 446500 }, { "epoch": 3.66, "learning_rate": 8.563185322790118e-06, "loss": 3.3228, "step": 447000 }, { "epoch": 3.67, "learning_rate": 8.561541370070197e-06, "loss": 3.3259, "step": 447500 }, { "epoch": 3.67, "learning_rate": 8.559897417350278e-06, "loss": 3.3244, "step": 448000 }, { "epoch": 3.67, "learning_rate": 8.558253464630359e-06, "loss": 3.3245, "step": 448500 }, { "epoch": 3.68, "learning_rate": 8.556609511910438e-06, "loss": 3.3324, "step": 449000 }, { "epoch": 3.68, "learning_rate": 8.554965559190518e-06, "loss": 3.3236, "step": 449500 }, { "epoch": 3.69, "learning_rate": 8.553321606470599e-06, "loss": 3.3239, "step": 450000 }, { "epoch": 3.69, "eval_accuracy": 0.443556698280814, "eval_loss": 3.187194585800171, "eval_runtime": 414.6673, "eval_samples_per_second": 743.623, "eval_steps_per_second": 15.494, "step": 450000 }, { "epoch": 3.69, "learning_rate": 8.551677653750678e-06, "loss": 3.3201, "step": 450500 }, { "epoch": 3.69, "learning_rate": 8.550033701030759e-06, "loss": 3.3214, "step": 451000 }, { "epoch": 3.7, "learning_rate": 8.54838974831084e-06, "loss": 3.3252, "step": 451500 }, { "epoch": 3.7, "learning_rate": 8.546745795590919e-06, "loss": 3.3185, "step": 452000 }, { "epoch": 3.71, "learning_rate": 8.545101842871e-06, "loss": 3.324, "step": 452500 }, { "epoch": 3.71, "learning_rate": 8.54345789015108e-06, "loss": 3.3222, "step": 453000 }, { "epoch": 3.72, "learning_rate": 8.54181393743116e-06, "loss": 3.3129, "step": 453500 }, { "epoch": 3.72, "learning_rate": 8.540169984711241e-06, "loss": 3.3245, "step": 454000 }, { "epoch": 3.72, "learning_rate": 8.53852603199132e-06, "loss": 3.3226, "step": 454500 }, { "epoch": 3.73, "learning_rate": 8.536882079271401e-06, "loss": 3.3162, "step": 455000 }, { "epoch": 3.73, "learning_rate": 8.53523812655148e-06, "loss": 3.3124, "step": 455500 }, { "epoch": 3.74, "learning_rate": 8.53359417383156e-06, "loss": 3.3155, "step": 456000 }, { "epoch": 3.74, "learning_rate": 8.531950221111641e-06, "loss": 3.3179, "step": 456500 }, { "epoch": 3.74, "learning_rate": 8.530306268391722e-06, "loss": 3.3139, "step": 457000 }, { "epoch": 3.75, "learning_rate": 8.528662315671801e-06, "loss": 3.3116, "step": 457500 }, { "epoch": 3.75, "learning_rate": 8.527018362951882e-06, "loss": 3.3204, "step": 458000 }, { "epoch": 3.76, "learning_rate": 8.525374410231963e-06, "loss": 3.3166, "step": 458500 }, { "epoch": 3.76, "learning_rate": 8.523730457512042e-06, "loss": 3.3175, "step": 459000 }, { "epoch": 3.76, "learning_rate": 8.522086504792123e-06, "loss": 3.3107, "step": 459500 }, { "epoch": 3.77, "learning_rate": 8.520442552072203e-06, "loss": 3.3189, "step": 460000 }, { "epoch": 3.77, "learning_rate": 8.518798599352284e-06, "loss": 3.3178, "step": 460500 }, { "epoch": 3.78, "learning_rate": 8.517154646632365e-06, "loss": 3.3001, "step": 461000 }, { "epoch": 3.78, "learning_rate": 8.515510693912444e-06, "loss": 3.3178, "step": 461500 }, { "epoch": 3.79, "learning_rate": 8.513866741192523e-06, "loss": 3.3168, "step": 462000 }, { "epoch": 3.79, "learning_rate": 8.512222788472604e-06, "loss": 3.3178, "step": 462500 }, { "epoch": 3.79, "learning_rate": 8.510578835752684e-06, "loss": 3.3187, "step": 463000 }, { "epoch": 3.8, "learning_rate": 8.508934883032764e-06, "loss": 3.3136, "step": 463500 }, { "epoch": 3.8, "learning_rate": 8.507290930312845e-06, "loss": 3.3138, "step": 464000 }, { "epoch": 3.81, "learning_rate": 8.505646977592925e-06, "loss": 3.3117, "step": 464500 }, { "epoch": 3.81, "learning_rate": 8.504003024873006e-06, "loss": 3.3167, "step": 465000 }, { "epoch": 3.81, "learning_rate": 8.502359072153085e-06, "loss": 3.3172, "step": 465500 }, { "epoch": 3.82, "learning_rate": 8.500715119433166e-06, "loss": 3.3083, "step": 466000 }, { "epoch": 3.82, "learning_rate": 8.499071166713247e-06, "loss": 3.31, "step": 466500 }, { "epoch": 3.83, "learning_rate": 8.497427213993326e-06, "loss": 3.3198, "step": 467000 }, { "epoch": 3.83, "learning_rate": 8.495783261273407e-06, "loss": 3.3153, "step": 467500 }, { "epoch": 3.83, "learning_rate": 8.494139308553486e-06, "loss": 3.3139, "step": 468000 }, { "epoch": 3.84, "learning_rate": 8.492495355833566e-06, "loss": 3.3044, "step": 468500 }, { "epoch": 3.84, "learning_rate": 8.490851403113647e-06, "loss": 3.306, "step": 469000 }, { "epoch": 3.85, "learning_rate": 8.489207450393728e-06, "loss": 3.3104, "step": 469500 }, { "epoch": 3.85, "learning_rate": 8.487563497673807e-06, "loss": 3.3154, "step": 470000 }, { "epoch": 3.85, "learning_rate": 8.485919544953888e-06, "loss": 3.3111, "step": 470500 }, { "epoch": 3.86, "learning_rate": 8.484275592233969e-06, "loss": 3.3065, "step": 471000 }, { "epoch": 3.86, "learning_rate": 8.482631639514048e-06, "loss": 3.3094, "step": 471500 }, { "epoch": 3.87, "learning_rate": 8.48098768679413e-06, "loss": 3.307, "step": 472000 }, { "epoch": 3.87, "learning_rate": 8.479343734074208e-06, "loss": 3.3099, "step": 472500 }, { "epoch": 3.88, "learning_rate": 8.47769978135429e-06, "loss": 3.3043, "step": 473000 }, { "epoch": 3.88, "learning_rate": 8.47605582863437e-06, "loss": 3.3068, "step": 473500 }, { "epoch": 3.88, "learning_rate": 8.47441187591445e-06, "loss": 3.3114, "step": 474000 }, { "epoch": 3.89, "learning_rate": 8.472767923194529e-06, "loss": 3.3066, "step": 474500 }, { "epoch": 3.89, "learning_rate": 8.47112397047461e-06, "loss": 3.3085, "step": 475000 }, { "epoch": 3.9, "learning_rate": 8.46948001775469e-06, "loss": 3.3108, "step": 475500 }, { "epoch": 3.9, "learning_rate": 8.46783606503477e-06, "loss": 3.3058, "step": 476000 }, { "epoch": 3.9, "learning_rate": 8.466192112314851e-06, "loss": 3.3152, "step": 476500 }, { "epoch": 3.91, "learning_rate": 8.46454815959493e-06, "loss": 3.2999, "step": 477000 }, { "epoch": 3.91, "learning_rate": 8.462904206875011e-06, "loss": 3.3049, "step": 477500 }, { "epoch": 3.92, "learning_rate": 8.46126025415509e-06, "loss": 3.3029, "step": 478000 }, { "epoch": 3.92, "learning_rate": 8.459616301435172e-06, "loss": 3.3006, "step": 478500 }, { "epoch": 3.92, "learning_rate": 8.457972348715253e-06, "loss": 3.3031, "step": 479000 }, { "epoch": 3.93, "learning_rate": 8.456328395995332e-06, "loss": 3.3033, "step": 479500 }, { "epoch": 3.93, "learning_rate": 8.454684443275411e-06, "loss": 3.3062, "step": 480000 }, { "epoch": 3.93, "eval_accuracy": 0.4462195104271718, "eval_loss": 3.166045904159546, "eval_runtime": 409.7348, "eval_samples_per_second": 752.575, "eval_steps_per_second": 15.681, "step": 480000 }, { "epoch": 3.94, "learning_rate": 8.453040490555492e-06, "loss": 3.3111, "step": 480500 }, { "epoch": 3.94, "learning_rate": 8.451396537835571e-06, "loss": 3.2931, "step": 481000 }, { "epoch": 3.94, "learning_rate": 8.449752585115652e-06, "loss": 3.2991, "step": 481500 }, { "epoch": 3.95, "learning_rate": 8.448108632395733e-06, "loss": 3.3012, "step": 482000 }, { "epoch": 3.95, "learning_rate": 8.446464679675813e-06, "loss": 3.3012, "step": 482500 }, { "epoch": 3.96, "learning_rate": 8.444820726955894e-06, "loss": 3.3084, "step": 483000 }, { "epoch": 3.96, "learning_rate": 8.443176774235975e-06, "loss": 3.3038, "step": 483500 }, { "epoch": 3.97, "learning_rate": 8.441532821516054e-06, "loss": 3.2989, "step": 484000 }, { "epoch": 3.97, "learning_rate": 8.439888868796135e-06, "loss": 3.3014, "step": 484500 }, { "epoch": 3.97, "learning_rate": 8.438244916076214e-06, "loss": 3.3043, "step": 485000 }, { "epoch": 3.98, "learning_rate": 8.436600963356295e-06, "loss": 3.305, "step": 485500 }, { "epoch": 3.98, "learning_rate": 8.434957010636374e-06, "loss": 3.3027, "step": 486000 }, { "epoch": 3.99, "learning_rate": 8.433313057916455e-06, "loss": 3.3018, "step": 486500 }, { "epoch": 3.99, "learning_rate": 8.431669105196535e-06, "loss": 3.3057, "step": 487000 }, { "epoch": 3.99, "learning_rate": 8.430025152476616e-06, "loss": 3.2995, "step": 487500 }, { "epoch": 4.0, "learning_rate": 8.428381199756695e-06, "loss": 3.3044, "step": 488000 }, { "epoch": 4.0, "learning_rate": 8.426737247036776e-06, "loss": 3.2979, "step": 488500 }, { "epoch": 4.01, "learning_rate": 8.425093294316857e-06, "loss": 3.2995, "step": 489000 }, { "epoch": 4.01, "learning_rate": 8.423449341596936e-06, "loss": 3.2958, "step": 489500 }, { "epoch": 4.01, "learning_rate": 8.421805388877017e-06, "loss": 3.3006, "step": 490000 }, { "epoch": 4.02, "learning_rate": 8.420161436157096e-06, "loss": 3.3015, "step": 490500 }, { "epoch": 4.02, "learning_rate": 8.418517483437177e-06, "loss": 3.2939, "step": 491000 }, { "epoch": 4.03, "learning_rate": 8.416873530717258e-06, "loss": 3.2984, "step": 491500 }, { "epoch": 4.03, "learning_rate": 8.415229577997338e-06, "loss": 3.294, "step": 492000 }, { "epoch": 4.03, "learning_rate": 8.413585625277417e-06, "loss": 3.2979, "step": 492500 }, { "epoch": 4.04, "learning_rate": 8.411941672557498e-06, "loss": 3.2938, "step": 493000 }, { "epoch": 4.04, "learning_rate": 8.410297719837577e-06, "loss": 3.2961, "step": 493500 }, { "epoch": 4.05, "learning_rate": 8.408653767117658e-06, "loss": 3.286, "step": 494000 }, { "epoch": 4.05, "learning_rate": 8.407009814397739e-06, "loss": 3.2893, "step": 494500 }, { "epoch": 4.06, "learning_rate": 8.405365861677818e-06, "loss": 3.289, "step": 495000 }, { "epoch": 4.06, "learning_rate": 8.4037219089579e-06, "loss": 3.2949, "step": 495500 }, { "epoch": 4.06, "learning_rate": 8.40207795623798e-06, "loss": 3.2887, "step": 496000 }, { "epoch": 4.07, "learning_rate": 8.40043400351806e-06, "loss": 3.2963, "step": 496500 }, { "epoch": 4.07, "learning_rate": 8.39879005079814e-06, "loss": 3.2874, "step": 497000 }, { "epoch": 4.08, "learning_rate": 8.39714609807822e-06, "loss": 3.2941, "step": 497500 }, { "epoch": 4.08, "learning_rate": 8.395502145358299e-06, "loss": 3.2907, "step": 498000 }, { "epoch": 4.08, "learning_rate": 8.39385819263838e-06, "loss": 3.2968, "step": 498500 }, { "epoch": 4.09, "learning_rate": 8.392214239918461e-06, "loss": 3.2853, "step": 499000 }, { "epoch": 4.09, "learning_rate": 8.39057028719854e-06, "loss": 3.2883, "step": 499500 }, { "epoch": 4.1, "learning_rate": 8.388926334478621e-06, "loss": 3.2939, "step": 500000 }, { "epoch": 4.1, "learning_rate": 8.3872823817587e-06, "loss": 3.2897, "step": 500500 }, { "epoch": 4.1, "learning_rate": 8.385638429038782e-06, "loss": 3.299, "step": 501000 }, { "epoch": 4.11, "learning_rate": 8.383994476318863e-06, "loss": 3.2911, "step": 501500 }, { "epoch": 4.11, "learning_rate": 8.382350523598942e-06, "loss": 3.2926, "step": 502000 }, { "epoch": 4.12, "learning_rate": 8.380706570879023e-06, "loss": 3.2913, "step": 502500 }, { "epoch": 4.12, "learning_rate": 8.379062618159102e-06, "loss": 3.3006, "step": 503000 }, { "epoch": 4.13, "learning_rate": 8.377418665439183e-06, "loss": 3.2884, "step": 503500 }, { "epoch": 4.13, "learning_rate": 8.375774712719262e-06, "loss": 3.2926, "step": 504000 }, { "epoch": 4.13, "learning_rate": 8.374130759999343e-06, "loss": 3.2898, "step": 504500 }, { "epoch": 4.14, "learning_rate": 8.372486807279423e-06, "loss": 3.2941, "step": 505000 }, { "epoch": 4.14, "learning_rate": 8.370842854559504e-06, "loss": 3.2886, "step": 505500 }, { "epoch": 4.15, "learning_rate": 8.369198901839583e-06, "loss": 3.2888, "step": 506000 }, { "epoch": 4.15, "learning_rate": 8.367554949119664e-06, "loss": 3.2876, "step": 506500 }, { "epoch": 4.15, "learning_rate": 8.365910996399745e-06, "loss": 3.2868, "step": 507000 }, { "epoch": 4.16, "learning_rate": 8.364267043679824e-06, "loss": 3.2854, "step": 507500 }, { "epoch": 4.16, "learning_rate": 8.362623090959905e-06, "loss": 3.2951, "step": 508000 }, { "epoch": 4.17, "learning_rate": 8.360979138239986e-06, "loss": 3.2928, "step": 508500 }, { "epoch": 4.17, "learning_rate": 8.359335185520065e-06, "loss": 3.286, "step": 509000 }, { "epoch": 4.17, "learning_rate": 8.357691232800146e-06, "loss": 3.2894, "step": 509500 }, { "epoch": 4.18, "learning_rate": 8.356047280080226e-06, "loss": 3.2841, "step": 510000 }, { "epoch": 4.18, "eval_accuracy": 0.4485220748826658, "eval_loss": 3.1493306159973145, "eval_runtime": 411.9758, "eval_samples_per_second": 748.481, "eval_steps_per_second": 15.596, "step": 510000 }, { "epoch": 4.18, "learning_rate": 8.354403327360305e-06, "loss": 3.2809, "step": 510500 }, { "epoch": 4.19, "learning_rate": 8.352759374640386e-06, "loss": 3.2861, "step": 511000 }, { "epoch": 4.19, "learning_rate": 8.351115421920467e-06, "loss": 3.2792, "step": 511500 }, { "epoch": 4.19, "learning_rate": 8.349471469200546e-06, "loss": 3.2864, "step": 512000 }, { "epoch": 4.2, "learning_rate": 8.347827516480627e-06, "loss": 3.2761, "step": 512500 }, { "epoch": 4.2, "learning_rate": 8.346183563760706e-06, "loss": 3.2821, "step": 513000 }, { "epoch": 4.21, "learning_rate": 8.344539611040787e-06, "loss": 3.2797, "step": 513500 }, { "epoch": 4.21, "learning_rate": 8.342895658320868e-06, "loss": 3.2854, "step": 514000 }, { "epoch": 4.22, "learning_rate": 8.341251705600948e-06, "loss": 3.2826, "step": 514500 }, { "epoch": 4.22, "learning_rate": 8.339607752881029e-06, "loss": 3.2923, "step": 515000 }, { "epoch": 4.22, "learning_rate": 8.337963800161108e-06, "loss": 3.2826, "step": 515500 }, { "epoch": 4.23, "learning_rate": 8.336319847441187e-06, "loss": 3.2886, "step": 516000 }, { "epoch": 4.23, "learning_rate": 8.334675894721268e-06, "loss": 3.2822, "step": 516500 }, { "epoch": 4.24, "learning_rate": 8.333031942001349e-06, "loss": 3.2828, "step": 517000 }, { "epoch": 4.24, "learning_rate": 8.331387989281428e-06, "loss": 3.2848, "step": 517500 }, { "epoch": 4.24, "learning_rate": 8.32974403656151e-06, "loss": 3.2878, "step": 518000 }, { "epoch": 4.25, "learning_rate": 8.328100083841589e-06, "loss": 3.2879, "step": 518500 }, { "epoch": 4.25, "learning_rate": 8.32645613112167e-06, "loss": 3.2825, "step": 519000 }, { "epoch": 4.26, "learning_rate": 8.32481217840175e-06, "loss": 3.2823, "step": 519500 }, { "epoch": 4.26, "learning_rate": 8.32316822568183e-06, "loss": 3.2841, "step": 520000 }, { "epoch": 4.26, "learning_rate": 8.32152427296191e-06, "loss": 3.2854, "step": 520500 }, { "epoch": 4.27, "learning_rate": 8.31988032024199e-06, "loss": 3.2817, "step": 521000 }, { "epoch": 4.27, "learning_rate": 8.318236367522071e-06, "loss": 3.277, "step": 521500 }, { "epoch": 4.28, "learning_rate": 8.316592414802152e-06, "loss": 3.2717, "step": 522000 }, { "epoch": 4.28, "learning_rate": 8.314948462082231e-06, "loss": 3.2838, "step": 522500 }, { "epoch": 4.28, "learning_rate": 8.31330450936231e-06, "loss": 3.2764, "step": 523000 }, { "epoch": 4.29, "learning_rate": 8.311660556642392e-06, "loss": 3.2824, "step": 523500 }, { "epoch": 4.29, "learning_rate": 8.310016603922472e-06, "loss": 3.2748, "step": 524000 }, { "epoch": 4.3, "learning_rate": 8.308372651202552e-06, "loss": 3.275, "step": 524500 }, { "epoch": 4.3, "learning_rate": 8.306728698482633e-06, "loss": 3.2802, "step": 525000 }, { "epoch": 4.31, "learning_rate": 8.305084745762712e-06, "loss": 3.2764, "step": 525500 }, { "epoch": 4.31, "learning_rate": 8.303440793042793e-06, "loss": 3.2746, "step": 526000 }, { "epoch": 4.31, "learning_rate": 8.301796840322874e-06, "loss": 3.2793, "step": 526500 }, { "epoch": 4.32, "learning_rate": 8.300152887602953e-06, "loss": 3.2819, "step": 527000 }, { "epoch": 4.32, "learning_rate": 8.298508934883034e-06, "loss": 3.2755, "step": 527500 }, { "epoch": 4.33, "learning_rate": 8.296864982163114e-06, "loss": 3.2831, "step": 528000 }, { "epoch": 4.33, "learning_rate": 8.295221029443193e-06, "loss": 3.2733, "step": 528500 }, { "epoch": 4.33, "learning_rate": 8.293577076723274e-06, "loss": 3.277, "step": 529000 }, { "epoch": 4.34, "learning_rate": 8.291933124003355e-06, "loss": 3.2783, "step": 529500 }, { "epoch": 4.34, "learning_rate": 8.290289171283434e-06, "loss": 3.2877, "step": 530000 }, { "epoch": 4.35, "learning_rate": 8.288645218563515e-06, "loss": 3.2762, "step": 530500 }, { "epoch": 4.35, "learning_rate": 8.287001265843594e-06, "loss": 3.2744, "step": 531000 }, { "epoch": 4.35, "learning_rate": 8.285357313123675e-06, "loss": 3.275, "step": 531500 }, { "epoch": 4.36, "learning_rate": 8.283713360403756e-06, "loss": 3.2724, "step": 532000 }, { "epoch": 4.36, "learning_rate": 8.282069407683835e-06, "loss": 3.2723, "step": 532500 }, { "epoch": 4.37, "learning_rate": 8.280425454963916e-06, "loss": 3.2662, "step": 533000 }, { "epoch": 4.37, "learning_rate": 8.278781502243996e-06, "loss": 3.2702, "step": 533500 }, { "epoch": 4.37, "learning_rate": 8.277137549524077e-06, "loss": 3.2736, "step": 534000 }, { "epoch": 4.38, "learning_rate": 8.275493596804156e-06, "loss": 3.2694, "step": 534500 }, { "epoch": 4.38, "learning_rate": 8.273849644084237e-06, "loss": 3.2722, "step": 535000 }, { "epoch": 4.39, "learning_rate": 8.272205691364316e-06, "loss": 3.2708, "step": 535500 }, { "epoch": 4.39, "learning_rate": 8.270561738644397e-06, "loss": 3.274, "step": 536000 }, { "epoch": 4.4, "learning_rate": 8.268917785924478e-06, "loss": 3.2798, "step": 536500 }, { "epoch": 4.4, "learning_rate": 8.267273833204557e-06, "loss": 3.2723, "step": 537000 }, { "epoch": 4.4, "learning_rate": 8.265629880484638e-06, "loss": 3.2728, "step": 537500 }, { "epoch": 4.41, "learning_rate": 8.263985927764718e-06, "loss": 3.2721, "step": 538000 }, { "epoch": 4.41, "learning_rate": 8.262341975044799e-06, "loss": 3.2774, "step": 538500 }, { "epoch": 4.42, "learning_rate": 8.26069802232488e-06, "loss": 3.2741, "step": 539000 }, { "epoch": 4.42, "learning_rate": 8.259054069604959e-06, "loss": 3.2655, "step": 539500 }, { "epoch": 4.42, "learning_rate": 8.25741011688504e-06, "loss": 3.2663, "step": 540000 }, { "epoch": 4.42, "eval_accuracy": 0.4502872659892971, "eval_loss": 3.1354901790618896, "eval_runtime": 409.1172, "eval_samples_per_second": 753.711, "eval_steps_per_second": 15.705, "step": 540000 }, { "epoch": 4.43, "learning_rate": 8.25576616416512e-06, "loss": 3.2669, "step": 540500 }, { "epoch": 4.43, "learning_rate": 8.254122211445198e-06, "loss": 3.2673, "step": 541000 }, { "epoch": 4.44, "learning_rate": 8.25247825872528e-06, "loss": 3.2689, "step": 541500 }, { "epoch": 4.44, "learning_rate": 8.25083430600536e-06, "loss": 3.2659, "step": 542000 }, { "epoch": 4.44, "learning_rate": 8.24919035328544e-06, "loss": 3.2657, "step": 542500 }, { "epoch": 4.45, "learning_rate": 8.24754640056552e-06, "loss": 3.2651, "step": 543000 }, { "epoch": 4.45, "learning_rate": 8.2459024478456e-06, "loss": 3.2673, "step": 543500 }, { "epoch": 4.46, "learning_rate": 8.244258495125681e-06, "loss": 3.2719, "step": 544000 }, { "epoch": 4.46, "learning_rate": 8.242614542405762e-06, "loss": 3.2714, "step": 544500 }, { "epoch": 4.47, "learning_rate": 8.240970589685841e-06, "loss": 3.2702, "step": 545000 }, { "epoch": 4.47, "learning_rate": 8.239326636965922e-06, "loss": 3.2649, "step": 545500 }, { "epoch": 4.47, "learning_rate": 8.237682684246001e-06, "loss": 3.2791, "step": 546000 }, { "epoch": 4.48, "learning_rate": 8.23603873152608e-06, "loss": 3.2627, "step": 546500 }, { "epoch": 4.48, "learning_rate": 8.234394778806162e-06, "loss": 3.2707, "step": 547000 }, { "epoch": 4.49, "learning_rate": 8.232750826086243e-06, "loss": 3.2704, "step": 547500 }, { "epoch": 4.49, "learning_rate": 8.231106873366322e-06, "loss": 3.2624, "step": 548000 }, { "epoch": 4.49, "learning_rate": 8.229462920646403e-06, "loss": 3.2661, "step": 548500 }, { "epoch": 4.5, "learning_rate": 8.227818967926482e-06, "loss": 3.2724, "step": 549000 }, { "epoch": 4.5, "learning_rate": 8.226175015206563e-06, "loss": 3.2653, "step": 549500 }, { "epoch": 4.51, "learning_rate": 8.224531062486644e-06, "loss": 3.2676, "step": 550000 }, { "epoch": 4.51, "learning_rate": 8.222887109766723e-06, "loss": 3.2621, "step": 550500 }, { "epoch": 4.51, "learning_rate": 8.221243157046804e-06, "loss": 3.2705, "step": 551000 }, { "epoch": 4.52, "learning_rate": 8.219599204326885e-06, "loss": 3.2677, "step": 551500 }, { "epoch": 4.52, "learning_rate": 8.217955251606965e-06, "loss": 3.2643, "step": 552000 }, { "epoch": 4.53, "learning_rate": 8.216311298887044e-06, "loss": 3.2621, "step": 552500 }, { "epoch": 4.53, "learning_rate": 8.214667346167125e-06, "loss": 3.262, "step": 553000 }, { "epoch": 4.53, "learning_rate": 8.213023393447204e-06, "loss": 3.2641, "step": 553500 }, { "epoch": 4.54, "learning_rate": 8.211379440727285e-06, "loss": 3.2677, "step": 554000 }, { "epoch": 4.54, "learning_rate": 8.209735488007366e-06, "loss": 3.2587, "step": 554500 }, { "epoch": 4.55, "learning_rate": 8.208091535287445e-06, "loss": 3.2668, "step": 555000 }, { "epoch": 4.55, "learning_rate": 8.206447582567526e-06, "loss": 3.2543, "step": 555500 }, { "epoch": 4.56, "learning_rate": 8.204803629847606e-06, "loss": 3.2612, "step": 556000 }, { "epoch": 4.56, "learning_rate": 8.203159677127687e-06, "loss": 3.2631, "step": 556500 }, { "epoch": 4.56, "learning_rate": 8.201515724407768e-06, "loss": 3.2678, "step": 557000 }, { "epoch": 4.57, "learning_rate": 8.199871771687847e-06, "loss": 3.2658, "step": 557500 }, { "epoch": 4.57, "learning_rate": 8.198227818967928e-06, "loss": 3.2576, "step": 558000 }, { "epoch": 4.58, "learning_rate": 8.196583866248007e-06, "loss": 3.2639, "step": 558500 }, { "epoch": 4.58, "learning_rate": 8.194939913528086e-06, "loss": 3.2549, "step": 559000 }, { "epoch": 4.58, "learning_rate": 8.193295960808167e-06, "loss": 3.2605, "step": 559500 }, { "epoch": 4.59, "learning_rate": 8.191652008088248e-06, "loss": 3.2623, "step": 560000 }, { "epoch": 4.59, "learning_rate": 8.190008055368328e-06, "loss": 3.2635, "step": 560500 }, { "epoch": 4.6, "learning_rate": 8.188364102648409e-06, "loss": 3.2587, "step": 561000 }, { "epoch": 4.6, "learning_rate": 8.186720149928488e-06, "loss": 3.266, "step": 561500 }, { "epoch": 4.6, "learning_rate": 8.185076197208569e-06, "loss": 3.2584, "step": 562000 }, { "epoch": 4.61, "learning_rate": 8.18343224448865e-06, "loss": 3.2625, "step": 562500 }, { "epoch": 4.61, "learning_rate": 8.181788291768729e-06, "loss": 3.2598, "step": 563000 }, { "epoch": 4.62, "learning_rate": 8.18014433904881e-06, "loss": 3.2545, "step": 563500 }, { "epoch": 4.62, "learning_rate": 8.178500386328891e-06, "loss": 3.2591, "step": 564000 }, { "epoch": 4.62, "learning_rate": 8.17685643360897e-06, "loss": 3.2561, "step": 564500 }, { "epoch": 4.63, "learning_rate": 8.17521248088905e-06, "loss": 3.2604, "step": 565000 }, { "epoch": 4.63, "learning_rate": 8.17356852816913e-06, "loss": 3.2529, "step": 565500 }, { "epoch": 4.64, "learning_rate": 8.17192457544921e-06, "loss": 3.2568, "step": 566000 }, { "epoch": 4.64, "learning_rate": 8.170280622729291e-06, "loss": 3.2554, "step": 566500 }, { "epoch": 4.65, "learning_rate": 8.168636670009372e-06, "loss": 3.2528, "step": 567000 }, { "epoch": 4.65, "learning_rate": 8.166992717289451e-06, "loss": 3.2633, "step": 567500 }, { "epoch": 4.65, "learning_rate": 8.165348764569532e-06, "loss": 3.2545, "step": 568000 }, { "epoch": 4.66, "learning_rate": 8.163704811849611e-06, "loss": 3.257, "step": 568500 }, { "epoch": 4.66, "learning_rate": 8.162060859129692e-06, "loss": 3.2531, "step": 569000 }, { "epoch": 4.67, "learning_rate": 8.160416906409773e-06, "loss": 3.2573, "step": 569500 }, { "epoch": 4.67, "learning_rate": 8.158772953689853e-06, "loss": 3.259, "step": 570000 }, { "epoch": 4.67, "eval_accuracy": 0.45185906617575416, "eval_loss": 3.122938632965088, "eval_runtime": 406.5956, "eval_samples_per_second": 758.385, "eval_steps_per_second": 15.802, "step": 570000 }, { "epoch": 4.67, "learning_rate": 8.157129000969934e-06, "loss": 3.2607, "step": 570500 }, { "epoch": 4.68, "learning_rate": 8.155485048250013e-06, "loss": 3.2596, "step": 571000 }, { "epoch": 4.68, "learning_rate": 8.153841095530092e-06, "loss": 3.2551, "step": 571500 }, { "epoch": 4.69, "learning_rate": 8.152197142810173e-06, "loss": 3.26, "step": 572000 }, { "epoch": 4.69, "learning_rate": 8.150553190090254e-06, "loss": 3.2566, "step": 572500 }, { "epoch": 4.69, "learning_rate": 8.148909237370333e-06, "loss": 3.2509, "step": 573000 }, { "epoch": 4.7, "learning_rate": 8.147265284650414e-06, "loss": 3.2506, "step": 573500 }, { "epoch": 4.7, "learning_rate": 8.145621331930494e-06, "loss": 3.2583, "step": 574000 }, { "epoch": 4.71, "learning_rate": 8.143977379210575e-06, "loss": 3.2572, "step": 574500 }, { "epoch": 4.71, "learning_rate": 8.142333426490656e-06, "loss": 3.2544, "step": 575000 }, { "epoch": 4.71, "learning_rate": 8.140689473770735e-06, "loss": 3.2544, "step": 575500 }, { "epoch": 4.72, "learning_rate": 8.139045521050816e-06, "loss": 3.2497, "step": 576000 }, { "epoch": 4.72, "learning_rate": 8.137401568330897e-06, "loss": 3.2496, "step": 576500 }, { "epoch": 4.73, "learning_rate": 8.135757615610974e-06, "loss": 3.2468, "step": 577000 }, { "epoch": 4.73, "learning_rate": 8.134113662891055e-06, "loss": 3.2497, "step": 577500 }, { "epoch": 4.74, "learning_rate": 8.132469710171136e-06, "loss": 3.249, "step": 578000 }, { "epoch": 4.74, "learning_rate": 8.130825757451216e-06, "loss": 3.2553, "step": 578500 }, { "epoch": 4.74, "learning_rate": 8.129181804731297e-06, "loss": 3.2604, "step": 579000 }, { "epoch": 4.75, "learning_rate": 8.127537852011378e-06, "loss": 3.2497, "step": 579500 }, { "epoch": 4.75, "learning_rate": 8.125893899291457e-06, "loss": 3.2503, "step": 580000 }, { "epoch": 4.76, "learning_rate": 8.124249946571538e-06, "loss": 3.2484, "step": 580500 }, { "epoch": 4.76, "learning_rate": 8.122605993851617e-06, "loss": 3.2596, "step": 581000 }, { "epoch": 4.76, "learning_rate": 8.120962041131698e-06, "loss": 3.2464, "step": 581500 }, { "epoch": 4.77, "learning_rate": 8.119318088411779e-06, "loss": 3.2508, "step": 582000 }, { "epoch": 4.77, "learning_rate": 8.117674135691858e-06, "loss": 3.249, "step": 582500 }, { "epoch": 4.78, "learning_rate": 8.116030182971938e-06, "loss": 3.2492, "step": 583000 }, { "epoch": 4.78, "learning_rate": 8.114386230252019e-06, "loss": 3.2457, "step": 583500 }, { "epoch": 4.78, "learning_rate": 8.112742277532098e-06, "loss": 3.2488, "step": 584000 }, { "epoch": 4.79, "learning_rate": 8.111098324812179e-06, "loss": 3.2475, "step": 584500 }, { "epoch": 4.79, "learning_rate": 8.10945437209226e-06, "loss": 3.2466, "step": 585000 }, { "epoch": 4.8, "learning_rate": 8.107810419372339e-06, "loss": 3.2452, "step": 585500 }, { "epoch": 4.8, "learning_rate": 8.10616646665242e-06, "loss": 3.2506, "step": 586000 }, { "epoch": 4.81, "learning_rate": 8.1045225139325e-06, "loss": 3.243, "step": 586500 }, { "epoch": 4.81, "learning_rate": 8.10287856121258e-06, "loss": 3.2476, "step": 587000 }, { "epoch": 4.81, "learning_rate": 8.101234608492661e-06, "loss": 3.2427, "step": 587500 }, { "epoch": 4.82, "learning_rate": 8.09959065577274e-06, "loss": 3.2548, "step": 588000 }, { "epoch": 4.82, "learning_rate": 8.097946703052822e-06, "loss": 3.2503, "step": 588500 }, { "epoch": 4.83, "learning_rate": 8.0963027503329e-06, "loss": 3.2469, "step": 589000 }, { "epoch": 4.83, "learning_rate": 8.09465879761298e-06, "loss": 3.239, "step": 589500 }, { "epoch": 4.83, "learning_rate": 8.093014844893061e-06, "loss": 3.2494, "step": 590000 }, { "epoch": 4.84, "learning_rate": 8.091370892173142e-06, "loss": 3.2423, "step": 590500 }, { "epoch": 4.84, "learning_rate": 8.089726939453221e-06, "loss": 3.2455, "step": 591000 }, { "epoch": 4.85, "learning_rate": 8.088082986733302e-06, "loss": 3.2395, "step": 591500 }, { "epoch": 4.85, "learning_rate": 8.086439034013383e-06, "loss": 3.244, "step": 592000 }, { "epoch": 4.85, "learning_rate": 8.084795081293463e-06, "loss": 3.2469, "step": 592500 }, { "epoch": 4.86, "learning_rate": 8.083151128573544e-06, "loss": 3.244, "step": 593000 }, { "epoch": 4.86, "learning_rate": 8.081507175853623e-06, "loss": 3.2457, "step": 593500 }, { "epoch": 4.87, "learning_rate": 8.079863223133704e-06, "loss": 3.2449, "step": 594000 }, { "epoch": 4.87, "learning_rate": 8.078219270413785e-06, "loss": 3.2407, "step": 594500 }, { "epoch": 4.87, "learning_rate": 8.076575317693864e-06, "loss": 3.2462, "step": 595000 }, { "epoch": 4.88, "learning_rate": 8.074931364973943e-06, "loss": 3.2402, "step": 595500 }, { "epoch": 4.88, "learning_rate": 8.073287412254024e-06, "loss": 3.2431, "step": 596000 }, { "epoch": 4.89, "learning_rate": 8.071643459534104e-06, "loss": 3.2429, "step": 596500 }, { "epoch": 4.89, "learning_rate": 8.069999506814185e-06, "loss": 3.2418, "step": 597000 }, { "epoch": 4.9, "learning_rate": 8.068355554094265e-06, "loss": 3.2403, "step": 597500 }, { "epoch": 4.9, "learning_rate": 8.066711601374345e-06, "loss": 3.2438, "step": 598000 }, { "epoch": 4.9, "learning_rate": 8.065067648654426e-06, "loss": 3.2456, "step": 598500 }, { "epoch": 4.91, "learning_rate": 8.063423695934505e-06, "loss": 3.2345, "step": 599000 }, { "epoch": 4.91, "learning_rate": 8.061779743214586e-06, "loss": 3.2393, "step": 599500 }, { "epoch": 4.92, "learning_rate": 8.060135790494667e-06, "loss": 3.2429, "step": 600000 }, { "epoch": 4.92, "eval_accuracy": 0.45349973762719414, "eval_loss": 3.1096389293670654, "eval_runtime": 406.7173, "eval_samples_per_second": 758.158, "eval_steps_per_second": 15.797, "step": 600000 }, { "epoch": 4.92, "learning_rate": 8.058491837774746e-06, "loss": 3.2356, "step": 600500 }, { "epoch": 4.92, "learning_rate": 8.056847885054826e-06, "loss": 3.2468, "step": 601000 }, { "epoch": 4.93, "learning_rate": 8.055203932334907e-06, "loss": 3.2398, "step": 601500 }, { "epoch": 4.93, "learning_rate": 8.053559979614986e-06, "loss": 3.2352, "step": 602000 }, { "epoch": 4.94, "learning_rate": 8.051916026895067e-06, "loss": 3.2442, "step": 602500 }, { "epoch": 4.94, "learning_rate": 8.050272074175148e-06, "loss": 3.2401, "step": 603000 }, { "epoch": 4.94, "learning_rate": 8.048628121455227e-06, "loss": 3.2406, "step": 603500 }, { "epoch": 4.95, "learning_rate": 8.046984168735308e-06, "loss": 3.2348, "step": 604000 }, { "epoch": 4.95, "learning_rate": 8.045340216015389e-06, "loss": 3.2435, "step": 604500 }, { "epoch": 4.96, "learning_rate": 8.043696263295468e-06, "loss": 3.2372, "step": 605000 }, { "epoch": 4.96, "learning_rate": 8.04205231057555e-06, "loss": 3.2415, "step": 605500 }, { "epoch": 4.96, "learning_rate": 8.040408357855628e-06, "loss": 3.234, "step": 606000 }, { "epoch": 4.97, "learning_rate": 8.03876440513571e-06, "loss": 3.2412, "step": 606500 }, { "epoch": 4.97, "learning_rate": 8.037120452415789e-06, "loss": 3.2354, "step": 607000 }, { "epoch": 4.98, "learning_rate": 8.03547649969587e-06, "loss": 3.2394, "step": 607500 }, { "epoch": 4.98, "learning_rate": 8.033832546975949e-06, "loss": 3.2317, "step": 608000 }, { "epoch": 4.99, "learning_rate": 8.03218859425603e-06, "loss": 3.2447, "step": 608500 }, { "epoch": 4.99, "learning_rate": 8.03054464153611e-06, "loss": 3.2438, "step": 609000 }, { "epoch": 4.99, "learning_rate": 8.02890068881619e-06, "loss": 3.2373, "step": 609500 }, { "epoch": 5.0, "learning_rate": 8.027256736096271e-06, "loss": 3.2411, "step": 610000 }, { "epoch": 5.0, "learning_rate": 8.02561278337635e-06, "loss": 3.2358, "step": 610500 }, { "epoch": 5.01, "learning_rate": 8.023968830656431e-06, "loss": 3.2351, "step": 611000 }, { "epoch": 5.01, "learning_rate": 8.02232487793651e-06, "loss": 3.2341, "step": 611500 }, { "epoch": 5.01, "learning_rate": 8.020680925216592e-06, "loss": 3.2321, "step": 612000 }, { "epoch": 5.02, "learning_rate": 8.019036972496673e-06, "loss": 3.2408, "step": 612500 }, { "epoch": 5.02, "learning_rate": 8.017393019776752e-06, "loss": 3.2406, "step": 613000 }, { "epoch": 5.03, "learning_rate": 8.015749067056831e-06, "loss": 3.2375, "step": 613500 }, { "epoch": 5.03, "learning_rate": 8.014105114336912e-06, "loss": 3.2416, "step": 614000 }, { "epoch": 5.03, "learning_rate": 8.012461161616991e-06, "loss": 3.2331, "step": 614500 }, { "epoch": 5.04, "learning_rate": 8.010817208897072e-06, "loss": 3.2473, "step": 615000 }, { "epoch": 5.04, "learning_rate": 8.009173256177153e-06, "loss": 3.2345, "step": 615500 }, { "epoch": 5.05, "learning_rate": 8.007529303457233e-06, "loss": 3.2292, "step": 616000 }, { "epoch": 5.05, "learning_rate": 8.005885350737314e-06, "loss": 3.2327, "step": 616500 }, { "epoch": 5.05, "learning_rate": 8.004241398017395e-06, "loss": 3.2251, "step": 617000 }, { "epoch": 5.06, "learning_rate": 8.002597445297474e-06, "loss": 3.2326, "step": 617500 }, { "epoch": 5.06, "learning_rate": 8.000953492577555e-06, "loss": 3.2348, "step": 618000 }, { "epoch": 5.07, "learning_rate": 7.999309539857634e-06, "loss": 3.231, "step": 618500 }, { "epoch": 5.07, "learning_rate": 7.997665587137715e-06, "loss": 3.236, "step": 619000 }, { "epoch": 5.08, "learning_rate": 7.996021634417794e-06, "loss": 3.2324, "step": 619500 }, { "epoch": 5.08, "learning_rate": 7.994377681697875e-06, "loss": 3.2321, "step": 620000 }, { "epoch": 5.08, "learning_rate": 7.992733728977955e-06, "loss": 3.2326, "step": 620500 }, { "epoch": 5.09, "learning_rate": 7.991089776258036e-06, "loss": 3.2318, "step": 621000 }, { "epoch": 5.09, "learning_rate": 7.989445823538115e-06, "loss": 3.2291, "step": 621500 }, { "epoch": 5.1, "learning_rate": 7.987801870818196e-06, "loss": 3.2291, "step": 622000 }, { "epoch": 5.1, "learning_rate": 7.986157918098277e-06, "loss": 3.2264, "step": 622500 }, { "epoch": 5.1, "learning_rate": 7.984513965378356e-06, "loss": 3.2319, "step": 623000 }, { "epoch": 5.11, "learning_rate": 7.982870012658437e-06, "loss": 3.231, "step": 623500 }, { "epoch": 5.11, "learning_rate": 7.981226059938516e-06, "loss": 3.2294, "step": 624000 }, { "epoch": 5.12, "learning_rate": 7.979582107218597e-06, "loss": 3.2212, "step": 624500 }, { "epoch": 5.12, "learning_rate": 7.977938154498678e-06, "loss": 3.2315, "step": 625000 }, { "epoch": 5.12, "learning_rate": 7.976294201778758e-06, "loss": 3.2388, "step": 625500 }, { "epoch": 5.13, "learning_rate": 7.974650249058837e-06, "loss": 3.2373, "step": 626000 }, { "epoch": 5.13, "learning_rate": 7.973006296338918e-06, "loss": 3.2268, "step": 626500 }, { "epoch": 5.14, "learning_rate": 7.971362343618997e-06, "loss": 3.2271, "step": 627000 }, { "epoch": 5.14, "learning_rate": 7.969718390899078e-06, "loss": 3.23, "step": 627500 }, { "epoch": 5.15, "learning_rate": 7.968074438179159e-06, "loss": 3.2272, "step": 628000 }, { "epoch": 5.15, "learning_rate": 7.966430485459238e-06, "loss": 3.2258, "step": 628500 }, { "epoch": 5.15, "learning_rate": 7.96478653273932e-06, "loss": 3.2267, "step": 629000 }, { "epoch": 5.16, "learning_rate": 7.9631425800194e-06, "loss": 3.2273, "step": 629500 }, { "epoch": 5.16, "learning_rate": 7.96149862729948e-06, "loss": 3.2234, "step": 630000 }, { "epoch": 5.16, "eval_accuracy": 0.4554354208134474, "eval_loss": 3.0947325229644775, "eval_runtime": 407.3576, "eval_samples_per_second": 756.966, "eval_steps_per_second": 15.772, "step": 630000 }, { "epoch": 5.17, "learning_rate": 7.95985467457956e-06, "loss": 3.2299, "step": 630500 }, { "epoch": 5.17, "learning_rate": 7.95821072185964e-06, "loss": 3.2245, "step": 631000 }, { "epoch": 5.17, "learning_rate": 7.95656676913972e-06, "loss": 3.2264, "step": 631500 }, { "epoch": 5.18, "learning_rate": 7.9549228164198e-06, "loss": 3.2198, "step": 632000 }, { "epoch": 5.18, "learning_rate": 7.953278863699881e-06, "loss": 3.2207, "step": 632500 }, { "epoch": 5.19, "learning_rate": 7.95163491097996e-06, "loss": 3.225, "step": 633000 }, { "epoch": 5.19, "learning_rate": 7.949990958260041e-06, "loss": 3.2346, "step": 633500 }, { "epoch": 5.19, "learning_rate": 7.94834700554012e-06, "loss": 3.228, "step": 634000 }, { "epoch": 5.2, "learning_rate": 7.946703052820202e-06, "loss": 3.2245, "step": 634500 }, { "epoch": 5.2, "learning_rate": 7.945059100100283e-06, "loss": 3.2281, "step": 635000 }, { "epoch": 5.21, "learning_rate": 7.943415147380362e-06, "loss": 3.2263, "step": 635500 }, { "epoch": 5.21, "learning_rate": 7.941771194660443e-06, "loss": 3.2242, "step": 636000 }, { "epoch": 5.21, "learning_rate": 7.940127241940522e-06, "loss": 3.2237, "step": 636500 }, { "epoch": 5.22, "learning_rate": 7.938483289220603e-06, "loss": 3.2275, "step": 637000 }, { "epoch": 5.22, "learning_rate": 7.936839336500682e-06, "loss": 3.223, "step": 637500 }, { "epoch": 5.23, "learning_rate": 7.935195383780763e-06, "loss": 3.2262, "step": 638000 }, { "epoch": 5.23, "learning_rate": 7.933551431060843e-06, "loss": 3.2343, "step": 638500 }, { "epoch": 5.24, "learning_rate": 7.931907478340924e-06, "loss": 3.225, "step": 639000 }, { "epoch": 5.24, "learning_rate": 7.930263525621003e-06, "loss": 3.2203, "step": 639500 }, { "epoch": 5.24, "learning_rate": 7.928619572901084e-06, "loss": 3.2276, "step": 640000 }, { "epoch": 5.25, "learning_rate": 7.926975620181165e-06, "loss": 3.2216, "step": 640500 }, { "epoch": 5.25, "learning_rate": 7.925331667461244e-06, "loss": 3.2199, "step": 641000 }, { "epoch": 5.26, "learning_rate": 7.923687714741325e-06, "loss": 3.226, "step": 641500 }, { "epoch": 5.26, "learning_rate": 7.922043762021406e-06, "loss": 3.2282, "step": 642000 }, { "epoch": 5.26, "learning_rate": 7.920399809301485e-06, "loss": 3.2232, "step": 642500 }, { "epoch": 5.27, "learning_rate": 7.918755856581566e-06, "loss": 3.2266, "step": 643000 }, { "epoch": 5.27, "learning_rate": 7.917111903861646e-06, "loss": 3.2136, "step": 643500 }, { "epoch": 5.28, "learning_rate": 7.915467951141725e-06, "loss": 3.2274, "step": 644000 }, { "epoch": 5.28, "learning_rate": 7.913823998421806e-06, "loss": 3.2235, "step": 644500 }, { "epoch": 5.28, "learning_rate": 7.912180045701887e-06, "loss": 3.2288, "step": 645000 }, { "epoch": 5.29, "learning_rate": 7.910536092981966e-06, "loss": 3.2166, "step": 645500 }, { "epoch": 5.29, "learning_rate": 7.908892140262047e-06, "loss": 3.2206, "step": 646000 }, { "epoch": 5.3, "learning_rate": 7.907248187542126e-06, "loss": 3.2212, "step": 646500 }, { "epoch": 5.3, "learning_rate": 7.905604234822207e-06, "loss": 3.2169, "step": 647000 }, { "epoch": 5.3, "learning_rate": 7.903960282102288e-06, "loss": 3.2286, "step": 647500 }, { "epoch": 5.31, "learning_rate": 7.902316329382368e-06, "loss": 3.2228, "step": 648000 }, { "epoch": 5.31, "learning_rate": 7.900672376662449e-06, "loss": 3.2177, "step": 648500 }, { "epoch": 5.32, "learning_rate": 7.899028423942528e-06, "loss": 3.2192, "step": 649000 }, { "epoch": 5.32, "learning_rate": 7.897384471222607e-06, "loss": 3.2179, "step": 649500 }, { "epoch": 5.33, "learning_rate": 7.895740518502688e-06, "loss": 3.227, "step": 650000 }, { "epoch": 5.33, "learning_rate": 7.894096565782769e-06, "loss": 3.2206, "step": 650500 }, { "epoch": 5.33, "learning_rate": 7.892452613062848e-06, "loss": 3.2183, "step": 651000 }, { "epoch": 5.34, "learning_rate": 7.89080866034293e-06, "loss": 3.2277, "step": 651500 }, { "epoch": 5.34, "learning_rate": 7.889164707623009e-06, "loss": 3.2174, "step": 652000 }, { "epoch": 5.35, "learning_rate": 7.88752075490309e-06, "loss": 3.2233, "step": 652500 }, { "epoch": 5.35, "learning_rate": 7.88587680218317e-06, "loss": 3.2165, "step": 653000 }, { "epoch": 5.35, "learning_rate": 7.88423284946325e-06, "loss": 3.2164, "step": 653500 }, { "epoch": 5.36, "learning_rate": 7.88258889674333e-06, "loss": 3.2157, "step": 654000 }, { "epoch": 5.36, "learning_rate": 7.880944944023412e-06, "loss": 3.2215, "step": 654500 }, { "epoch": 5.37, "learning_rate": 7.879300991303491e-06, "loss": 3.2217, "step": 655000 }, { "epoch": 5.37, "learning_rate": 7.87765703858357e-06, "loss": 3.2213, "step": 655500 }, { "epoch": 5.37, "learning_rate": 7.876013085863651e-06, "loss": 3.2122, "step": 656000 }, { "epoch": 5.38, "learning_rate": 7.87436913314373e-06, "loss": 3.2176, "step": 656500 }, { "epoch": 5.38, "learning_rate": 7.872725180423812e-06, "loss": 3.2147, "step": 657000 }, { "epoch": 5.39, "learning_rate": 7.871081227703893e-06, "loss": 3.217, "step": 657500 }, { "epoch": 5.39, "learning_rate": 7.869437274983972e-06, "loss": 3.219, "step": 658000 }, { "epoch": 5.39, "learning_rate": 7.867793322264053e-06, "loss": 3.2219, "step": 658500 }, { "epoch": 5.4, "learning_rate": 7.866149369544132e-06, "loss": 3.215, "step": 659000 }, { "epoch": 5.4, "learning_rate": 7.864505416824213e-06, "loss": 3.2098, "step": 659500 }, { "epoch": 5.41, "learning_rate": 7.862861464104294e-06, "loss": 3.2115, "step": 660000 }, { "epoch": 5.41, "eval_accuracy": 0.4573150918223206, "eval_loss": 3.0818052291870117, "eval_runtime": 409.6703, "eval_samples_per_second": 752.693, "eval_steps_per_second": 15.683, "step": 660000 }, { "epoch": 5.41, "learning_rate": 7.861217511384373e-06, "loss": 3.214, "step": 660500 }, { "epoch": 5.42, "learning_rate": 7.859573558664454e-06, "loss": 3.2186, "step": 661000 }, { "epoch": 5.42, "learning_rate": 7.857929605944534e-06, "loss": 3.2234, "step": 661500 }, { "epoch": 5.42, "learning_rate": 7.856285653224613e-06, "loss": 3.22, "step": 662000 }, { "epoch": 5.43, "learning_rate": 7.854641700504694e-06, "loss": 3.2187, "step": 662500 }, { "epoch": 5.43, "learning_rate": 7.852997747784775e-06, "loss": 3.2166, "step": 663000 }, { "epoch": 5.44, "learning_rate": 7.851353795064854e-06, "loss": 3.218, "step": 663500 }, { "epoch": 5.44, "learning_rate": 7.849709842344935e-06, "loss": 3.2183, "step": 664000 }, { "epoch": 5.44, "learning_rate": 7.848065889625014e-06, "loss": 3.2065, "step": 664500 }, { "epoch": 5.45, "learning_rate": 7.846421936905095e-06, "loss": 3.2123, "step": 665000 }, { "epoch": 5.45, "learning_rate": 7.844777984185176e-06, "loss": 3.2178, "step": 665500 }, { "epoch": 5.46, "learning_rate": 7.843134031465256e-06, "loss": 3.215, "step": 666000 }, { "epoch": 5.46, "learning_rate": 7.841490078745336e-06, "loss": 3.212, "step": 666500 }, { "epoch": 5.46, "learning_rate": 7.839846126025416e-06, "loss": 3.2089, "step": 667000 }, { "epoch": 5.47, "learning_rate": 7.838202173305495e-06, "loss": 3.2153, "step": 667500 }, { "epoch": 5.47, "learning_rate": 7.836558220585576e-06, "loss": 3.2125, "step": 668000 }, { "epoch": 5.48, "learning_rate": 7.834914267865657e-06, "loss": 3.2162, "step": 668500 }, { "epoch": 5.48, "learning_rate": 7.833270315145736e-06, "loss": 3.2106, "step": 669000 }, { "epoch": 5.49, "learning_rate": 7.831626362425817e-06, "loss": 3.2122, "step": 669500 }, { "epoch": 5.49, "learning_rate": 7.829982409705898e-06, "loss": 3.2046, "step": 670000 }, { "epoch": 5.49, "learning_rate": 7.828338456985978e-06, "loss": 3.202, "step": 670500 }, { "epoch": 5.5, "learning_rate": 7.826694504266058e-06, "loss": 3.2076, "step": 671000 }, { "epoch": 5.5, "learning_rate": 7.825050551546138e-06, "loss": 3.2012, "step": 671500 }, { "epoch": 5.51, "learning_rate": 7.823406598826219e-06, "loss": 3.2134, "step": 672000 }, { "epoch": 5.51, "learning_rate": 7.8217626461063e-06, "loss": 3.2088, "step": 672500 }, { "epoch": 5.51, "learning_rate": 7.820118693386379e-06, "loss": 3.2135, "step": 673000 }, { "epoch": 5.52, "learning_rate": 7.81847474066646e-06, "loss": 3.206, "step": 673500 }, { "epoch": 5.52, "learning_rate": 7.81683078794654e-06, "loss": 3.2101, "step": 674000 }, { "epoch": 5.53, "learning_rate": 7.815186835226619e-06, "loss": 3.2208, "step": 674500 }, { "epoch": 5.53, "learning_rate": 7.8135428825067e-06, "loss": 3.2144, "step": 675000 }, { "epoch": 5.53, "learning_rate": 7.81189892978678e-06, "loss": 3.2144, "step": 675500 }, { "epoch": 5.54, "learning_rate": 7.81025497706686e-06, "loss": 3.2076, "step": 676000 }, { "epoch": 5.54, "learning_rate": 7.80861102434694e-06, "loss": 3.2067, "step": 676500 }, { "epoch": 5.55, "learning_rate": 7.80696707162702e-06, "loss": 3.2107, "step": 677000 }, { "epoch": 5.55, "learning_rate": 7.805323118907101e-06, "loss": 3.2109, "step": 677500 }, { "epoch": 5.55, "learning_rate": 7.803679166187182e-06, "loss": 3.2116, "step": 678000 }, { "epoch": 5.56, "learning_rate": 7.802035213467261e-06, "loss": 3.205, "step": 678500 }, { "epoch": 5.56, "learning_rate": 7.800391260747342e-06, "loss": 3.2019, "step": 679000 }, { "epoch": 5.57, "learning_rate": 7.798747308027421e-06, "loss": 3.2089, "step": 679500 }, { "epoch": 5.57, "learning_rate": 7.7971033553075e-06, "loss": 3.2096, "step": 680000 }, { "epoch": 5.58, "learning_rate": 7.795459402587582e-06, "loss": 3.2131, "step": 680500 }, { "epoch": 5.58, "learning_rate": 7.793815449867663e-06, "loss": 3.2079, "step": 681000 }, { "epoch": 5.58, "learning_rate": 7.792171497147742e-06, "loss": 3.2019, "step": 681500 }, { "epoch": 5.59, "learning_rate": 7.790527544427823e-06, "loss": 3.2092, "step": 682000 }, { "epoch": 5.59, "learning_rate": 7.788883591707904e-06, "loss": 3.2066, "step": 682500 }, { "epoch": 5.6, "learning_rate": 7.787239638987983e-06, "loss": 3.1992, "step": 683000 }, { "epoch": 5.6, "learning_rate": 7.785595686268064e-06, "loss": 3.2042, "step": 683500 }, { "epoch": 5.6, "learning_rate": 7.783951733548143e-06, "loss": 3.1997, "step": 684000 }, { "epoch": 5.61, "learning_rate": 7.782307780828224e-06, "loss": 3.2064, "step": 684500 }, { "epoch": 5.61, "learning_rate": 7.780663828108305e-06, "loss": 3.2102, "step": 685000 }, { "epoch": 5.62, "learning_rate": 7.779019875388385e-06, "loss": 3.202, "step": 685500 }, { "epoch": 5.62, "learning_rate": 7.777375922668464e-06, "loss": 3.2049, "step": 686000 }, { "epoch": 5.62, "learning_rate": 7.775731969948545e-06, "loss": 3.212, "step": 686500 }, { "epoch": 5.63, "learning_rate": 7.774088017228624e-06, "loss": 3.2008, "step": 687000 }, { "epoch": 5.63, "learning_rate": 7.772444064508705e-06, "loss": 3.2015, "step": 687500 }, { "epoch": 5.64, "learning_rate": 7.770800111788786e-06, "loss": 3.1986, "step": 688000 }, { "epoch": 5.64, "learning_rate": 7.769156159068865e-06, "loss": 3.2028, "step": 688500 }, { "epoch": 5.64, "learning_rate": 7.767512206348946e-06, "loss": 3.21, "step": 689000 }, { "epoch": 5.65, "learning_rate": 7.765868253629026e-06, "loss": 3.2104, "step": 689500 }, { "epoch": 5.65, "learning_rate": 7.764224300909107e-06, "loss": 3.2011, "step": 690000 }, { "epoch": 5.65, "eval_accuracy": 0.4590029752907897, "eval_loss": 3.068504810333252, "eval_runtime": 404.8644, "eval_samples_per_second": 761.628, "eval_steps_per_second": 15.87, "step": 690000 }, { "epoch": 5.66, "learning_rate": 7.762580348189188e-06, "loss": 3.2015, "step": 690500 }, { "epoch": 5.66, "learning_rate": 7.760936395469267e-06, "loss": 3.2047, "step": 691000 }, { "epoch": 5.67, "learning_rate": 7.759292442749348e-06, "loss": 3.2034, "step": 691500 }, { "epoch": 5.67, "learning_rate": 7.757648490029427e-06, "loss": 3.1976, "step": 692000 }, { "epoch": 5.67, "learning_rate": 7.756004537309506e-06, "loss": 3.2029, "step": 692500 }, { "epoch": 5.68, "learning_rate": 7.754360584589587e-06, "loss": 3.1993, "step": 693000 }, { "epoch": 5.68, "learning_rate": 7.752716631869668e-06, "loss": 3.2092, "step": 693500 }, { "epoch": 5.69, "learning_rate": 7.751072679149748e-06, "loss": 3.2087, "step": 694000 }, { "epoch": 5.69, "learning_rate": 7.749428726429829e-06, "loss": 3.1983, "step": 694500 }, { "epoch": 5.69, "learning_rate": 7.747784773709908e-06, "loss": 3.209, "step": 695000 }, { "epoch": 5.7, "learning_rate": 7.746140820989989e-06, "loss": 3.1986, "step": 695500 }, { "epoch": 5.7, "learning_rate": 7.74449686827007e-06, "loss": 3.2066, "step": 696000 }, { "epoch": 5.71, "learning_rate": 7.74285291555015e-06, "loss": 3.2, "step": 696500 }, { "epoch": 5.71, "learning_rate": 7.74120896283023e-06, "loss": 3.1934, "step": 697000 }, { "epoch": 5.71, "learning_rate": 7.739565010110311e-06, "loss": 3.1966, "step": 697500 }, { "epoch": 5.72, "learning_rate": 7.73792105739039e-06, "loss": 3.1951, "step": 698000 }, { "epoch": 5.72, "learning_rate": 7.73627710467047e-06, "loss": 3.2011, "step": 698500 }, { "epoch": 5.73, "learning_rate": 7.73463315195055e-06, "loss": 3.2043, "step": 699000 }, { "epoch": 5.73, "learning_rate": 7.73298919923063e-06, "loss": 3.1936, "step": 699500 }, { "epoch": 5.73, "learning_rate": 7.731345246510711e-06, "loss": 3.1944, "step": 700000 }, { "epoch": 5.74, "learning_rate": 7.729701293790792e-06, "loss": 3.1976, "step": 700500 }, { "epoch": 5.74, "learning_rate": 7.728057341070871e-06, "loss": 3.1919, "step": 701000 }, { "epoch": 5.75, "learning_rate": 7.726413388350952e-06, "loss": 3.2013, "step": 701500 }, { "epoch": 5.75, "learning_rate": 7.724769435631031e-06, "loss": 3.1973, "step": 702000 }, { "epoch": 5.76, "learning_rate": 7.723125482911112e-06, "loss": 3.1948, "step": 702500 }, { "epoch": 5.76, "learning_rate": 7.721481530191193e-06, "loss": 3.1967, "step": 703000 }, { "epoch": 5.76, "learning_rate": 7.719837577471273e-06, "loss": 3.1862, "step": 703500 }, { "epoch": 5.77, "learning_rate": 7.718193624751352e-06, "loss": 3.1962, "step": 704000 }, { "epoch": 5.77, "learning_rate": 7.716549672031433e-06, "loss": 3.1963, "step": 704500 }, { "epoch": 5.78, "learning_rate": 7.714905719311512e-06, "loss": 3.1839, "step": 705000 }, { "epoch": 5.78, "learning_rate": 7.713261766591593e-06, "loss": 3.1969, "step": 705500 }, { "epoch": 5.78, "learning_rate": 7.711617813871674e-06, "loss": 3.1906, "step": 706000 }, { "epoch": 5.79, "learning_rate": 7.709973861151753e-06, "loss": 3.1996, "step": 706500 }, { "epoch": 5.79, "learning_rate": 7.708329908431834e-06, "loss": 3.1966, "step": 707000 }, { "epoch": 5.8, "learning_rate": 7.706685955711914e-06, "loss": 3.1923, "step": 707500 }, { "epoch": 5.8, "learning_rate": 7.705042002991995e-06, "loss": 3.1893, "step": 708000 }, { "epoch": 5.8, "learning_rate": 7.703398050272076e-06, "loss": 3.1887, "step": 708500 }, { "epoch": 5.81, "learning_rate": 7.701754097552155e-06, "loss": 3.1983, "step": 709000 }, { "epoch": 5.81, "learning_rate": 7.700110144832236e-06, "loss": 3.1923, "step": 709500 }, { "epoch": 5.82, "learning_rate": 7.698466192112315e-06, "loss": 3.1964, "step": 710000 }, { "epoch": 5.82, "learning_rate": 7.696822239392396e-06, "loss": 3.1965, "step": 710500 }, { "epoch": 5.83, "learning_rate": 7.695178286672475e-06, "loss": 3.1941, "step": 711000 }, { "epoch": 5.83, "learning_rate": 7.693534333952556e-06, "loss": 3.1893, "step": 711500 }, { "epoch": 5.83, "learning_rate": 7.691890381232636e-06, "loss": 3.1945, "step": 712000 }, { "epoch": 5.84, "learning_rate": 7.690246428512717e-06, "loss": 3.1931, "step": 712500 }, { "epoch": 5.84, "learning_rate": 7.688602475792798e-06, "loss": 3.1853, "step": 713000 }, { "epoch": 5.85, "learning_rate": 7.686958523072877e-06, "loss": 3.1902, "step": 713500 }, { "epoch": 5.85, "learning_rate": 7.685314570352958e-06, "loss": 3.2006, "step": 714000 }, { "epoch": 5.85, "learning_rate": 7.683670617633037e-06, "loss": 3.195, "step": 714500 }, { "epoch": 5.86, "learning_rate": 7.682026664913118e-06, "loss": 3.1975, "step": 715000 }, { "epoch": 5.86, "learning_rate": 7.680382712193199e-06, "loss": 3.1905, "step": 715500 }, { "epoch": 5.87, "learning_rate": 7.678738759473278e-06, "loss": 3.1872, "step": 716000 }, { "epoch": 5.87, "learning_rate": 7.677094806753358e-06, "loss": 3.1891, "step": 716500 }, { "epoch": 5.87, "learning_rate": 7.675450854033439e-06, "loss": 3.1857, "step": 717000 }, { "epoch": 5.88, "learning_rate": 7.673806901313518e-06, "loss": 3.1894, "step": 717500 }, { "epoch": 5.88, "learning_rate": 7.672162948593599e-06, "loss": 3.1856, "step": 718000 }, { "epoch": 5.89, "learning_rate": 7.67051899587368e-06, "loss": 3.1828, "step": 718500 }, { "epoch": 5.89, "learning_rate": 7.668875043153759e-06, "loss": 3.1812, "step": 719000 }, { "epoch": 5.89, "learning_rate": 7.66723109043384e-06, "loss": 3.1908, "step": 719500 }, { "epoch": 5.9, "learning_rate": 7.66558713771392e-06, "loss": 3.1898, "step": 720000 }, { "epoch": 5.9, "eval_accuracy": 0.46191997629259224, "eval_loss": 3.0464377403259277, "eval_runtime": 413.3901, "eval_samples_per_second": 745.92, "eval_steps_per_second": 15.542, "step": 720000 }, { "epoch": 5.9, "learning_rate": 7.663943184994e-06, "loss": 3.1823, "step": 720500 }, { "epoch": 5.91, "learning_rate": 7.662299232274081e-06, "loss": 3.1837, "step": 721000 }, { "epoch": 5.91, "learning_rate": 7.66065527955416e-06, "loss": 3.1796, "step": 721500 }, { "epoch": 5.92, "learning_rate": 7.659011326834242e-06, "loss": 3.188, "step": 722000 }, { "epoch": 5.92, "learning_rate": 7.65736737411432e-06, "loss": 3.1883, "step": 722500 }, { "epoch": 5.92, "learning_rate": 7.6557234213944e-06, "loss": 3.1769, "step": 723000 }, { "epoch": 5.93, "learning_rate": 7.654079468674481e-06, "loss": 3.1887, "step": 723500 }, { "epoch": 5.93, "learning_rate": 7.652435515954562e-06, "loss": 3.1806, "step": 724000 }, { "epoch": 5.94, "learning_rate": 7.650791563234641e-06, "loss": 3.1816, "step": 724500 }, { "epoch": 5.94, "learning_rate": 7.649147610514722e-06, "loss": 3.1748, "step": 725000 }, { "epoch": 5.94, "learning_rate": 7.647503657794803e-06, "loss": 3.1888, "step": 725500 }, { "epoch": 5.95, "learning_rate": 7.645859705074883e-06, "loss": 3.1823, "step": 726000 }, { "epoch": 5.95, "learning_rate": 7.644215752354964e-06, "loss": 3.1763, "step": 726500 }, { "epoch": 5.96, "learning_rate": 7.642571799635043e-06, "loss": 3.1843, "step": 727000 }, { "epoch": 5.96, "learning_rate": 7.640927846915124e-06, "loss": 3.1856, "step": 727500 }, { "epoch": 5.96, "learning_rate": 7.639283894195205e-06, "loss": 3.1812, "step": 728000 }, { "epoch": 5.97, "learning_rate": 7.637639941475284e-06, "loss": 3.176, "step": 728500 }, { "epoch": 5.97, "learning_rate": 7.635995988755363e-06, "loss": 3.1842, "step": 729000 }, { "epoch": 5.98, "learning_rate": 7.634352036035444e-06, "loss": 3.1775, "step": 729500 }, { "epoch": 5.98, "learning_rate": 7.632708083315524e-06, "loss": 3.1803, "step": 730000 }, { "epoch": 5.98, "learning_rate": 7.631064130595605e-06, "loss": 3.1818, "step": 730500 }, { "epoch": 5.99, "learning_rate": 7.629420177875686e-06, "loss": 3.1754, "step": 731000 }, { "epoch": 5.99, "learning_rate": 7.627776225155765e-06, "loss": 3.1827, "step": 731500 }, { "epoch": 6.0, "learning_rate": 7.626132272435846e-06, "loss": 3.1745, "step": 732000 }, { "epoch": 6.0, "learning_rate": 7.624488319715926e-06, "loss": 3.1769, "step": 732500 }, { "epoch": 6.01, "learning_rate": 7.622844366996006e-06, "loss": 3.1717, "step": 733000 }, { "epoch": 6.01, "learning_rate": 7.621200414276086e-06, "loss": 3.1804, "step": 733500 }, { "epoch": 6.01, "learning_rate": 7.619556461556167e-06, "loss": 3.1726, "step": 734000 }, { "epoch": 6.02, "learning_rate": 7.617912508836246e-06, "loss": 3.1754, "step": 734500 }, { "epoch": 6.02, "learning_rate": 7.6162685561163265e-06, "loss": 3.177, "step": 735000 }, { "epoch": 6.03, "learning_rate": 7.614624603396407e-06, "loss": 3.1697, "step": 735500 }, { "epoch": 6.03, "learning_rate": 7.612980650676487e-06, "loss": 3.1712, "step": 736000 }, { "epoch": 6.03, "learning_rate": 7.611336697956567e-06, "loss": 3.1734, "step": 736500 }, { "epoch": 6.04, "learning_rate": 7.609692745236648e-06, "loss": 3.1702, "step": 737000 }, { "epoch": 6.04, "learning_rate": 7.608048792516728e-06, "loss": 3.1723, "step": 737500 }, { "epoch": 6.05, "learning_rate": 7.606404839796808e-06, "loss": 3.1815, "step": 738000 }, { "epoch": 6.05, "learning_rate": 7.604760887076888e-06, "loss": 3.1775, "step": 738500 }, { "epoch": 6.05, "learning_rate": 7.603116934356969e-06, "loss": 3.1654, "step": 739000 }, { "epoch": 6.06, "learning_rate": 7.601472981637049e-06, "loss": 3.1639, "step": 739500 }, { "epoch": 6.06, "learning_rate": 7.5998290289171295e-06, "loss": 3.1757, "step": 740000 }, { "epoch": 6.07, "learning_rate": 7.598185076197209e-06, "loss": 3.176, "step": 740500 }, { "epoch": 6.07, "learning_rate": 7.596541123477289e-06, "loss": 3.1781, "step": 741000 }, { "epoch": 6.07, "learning_rate": 7.594897170757369e-06, "loss": 3.1671, "step": 741500 }, { "epoch": 6.08, "learning_rate": 7.59325321803745e-06, "loss": 3.1691, "step": 742000 }, { "epoch": 6.08, "learning_rate": 7.59160926531753e-06, "loss": 3.1701, "step": 742500 }, { "epoch": 6.09, "learning_rate": 7.58996531259761e-06, "loss": 3.1767, "step": 743000 }, { "epoch": 6.09, "learning_rate": 7.58832135987769e-06, "loss": 3.1719, "step": 743500 }, { "epoch": 6.1, "learning_rate": 7.5866774071577705e-06, "loss": 3.1576, "step": 744000 }, { "epoch": 6.1, "learning_rate": 7.5850334544378515e-06, "loss": 3.167, "step": 744500 }, { "epoch": 6.1, "learning_rate": 7.583389501717932e-06, "loss": 3.172, "step": 745000 }, { "epoch": 6.11, "learning_rate": 7.581745548998012e-06, "loss": 3.17, "step": 745500 }, { "epoch": 6.11, "learning_rate": 7.580101596278092e-06, "loss": 3.1622, "step": 746000 }, { "epoch": 6.12, "learning_rate": 7.578457643558171e-06, "loss": 3.1708, "step": 746500 }, { "epoch": 6.12, "learning_rate": 7.576813690838252e-06, "loss": 3.1658, "step": 747000 }, { "epoch": 6.12, "learning_rate": 7.575169738118332e-06, "loss": 3.1665, "step": 747500 }, { "epoch": 6.13, "learning_rate": 7.573525785398412e-06, "loss": 3.157, "step": 748000 }, { "epoch": 6.13, "learning_rate": 7.5718818326784925e-06, "loss": 3.164, "step": 748500 }, { "epoch": 6.14, "learning_rate": 7.570237879958573e-06, "loss": 3.1626, "step": 749000 }, { "epoch": 6.14, "learning_rate": 7.568593927238654e-06, "loss": 3.162, "step": 749500 }, { "epoch": 6.14, "learning_rate": 7.566949974518734e-06, "loss": 3.1651, "step": 750000 }, { "epoch": 6.14, "eval_accuracy": 0.4657812440113609, "eval_loss": 3.022564172744751, "eval_runtime": 408.1772, "eval_samples_per_second": 755.446, "eval_steps_per_second": 15.741, "step": 750000 }, { "epoch": 6.15, "learning_rate": 7.565306021798814e-06, "loss": 3.1641, "step": 750500 }, { "epoch": 6.15, "learning_rate": 7.563662069078894e-06, "loss": 3.153, "step": 751000 }, { "epoch": 6.16, "learning_rate": 7.562018116358975e-06, "loss": 3.1609, "step": 751500 }, { "epoch": 6.16, "learning_rate": 7.560374163639055e-06, "loss": 3.1574, "step": 752000 }, { "epoch": 6.17, "learning_rate": 7.558730210919134e-06, "loss": 3.1566, "step": 752500 }, { "epoch": 6.17, "learning_rate": 7.5570862581992145e-06, "loss": 3.162, "step": 753000 }, { "epoch": 6.17, "learning_rate": 7.555442305479295e-06, "loss": 3.1615, "step": 753500 }, { "epoch": 6.18, "learning_rate": 7.553798352759375e-06, "loss": 3.1636, "step": 754000 }, { "epoch": 6.18, "learning_rate": 7.552154400039456e-06, "loss": 3.1591, "step": 754500 }, { "epoch": 6.19, "learning_rate": 7.550510447319536e-06, "loss": 3.1607, "step": 755000 }, { "epoch": 6.19, "learning_rate": 7.548866494599616e-06, "loss": 3.1572, "step": 755500 }, { "epoch": 6.19, "learning_rate": 7.547222541879696e-06, "loss": 3.1628, "step": 756000 }, { "epoch": 6.2, "learning_rate": 7.545578589159776e-06, "loss": 3.1555, "step": 756500 }, { "epoch": 6.2, "learning_rate": 7.543934636439857e-06, "loss": 3.1581, "step": 757000 }, { "epoch": 6.21, "learning_rate": 7.542290683719937e-06, "loss": 3.1646, "step": 757500 }, { "epoch": 6.21, "learning_rate": 7.5406467310000174e-06, "loss": 3.1626, "step": 758000 }, { "epoch": 6.21, "learning_rate": 7.539002778280097e-06, "loss": 3.1525, "step": 758500 }, { "epoch": 6.22, "learning_rate": 7.537358825560177e-06, "loss": 3.1567, "step": 759000 }, { "epoch": 6.22, "learning_rate": 7.535714872840257e-06, "loss": 3.1599, "step": 759500 }, { "epoch": 6.23, "learning_rate": 7.534070920120338e-06, "loss": 3.1513, "step": 760000 }, { "epoch": 6.23, "learning_rate": 7.532426967400418e-06, "loss": 3.1621, "step": 760500 }, { "epoch": 6.23, "learning_rate": 7.530783014680498e-06, "loss": 3.1586, "step": 761000 }, { "epoch": 6.24, "learning_rate": 7.529139061960578e-06, "loss": 3.1493, "step": 761500 }, { "epoch": 6.24, "learning_rate": 7.527495109240659e-06, "loss": 3.1613, "step": 762000 }, { "epoch": 6.25, "learning_rate": 7.525851156520739e-06, "loss": 3.1542, "step": 762500 }, { "epoch": 6.25, "learning_rate": 7.5242072038008195e-06, "loss": 3.1486, "step": 763000 }, { "epoch": 6.26, "learning_rate": 7.5225632510809e-06, "loss": 3.1542, "step": 763500 }, { "epoch": 6.26, "learning_rate": 7.520919298360981e-06, "loss": 3.1495, "step": 764000 }, { "epoch": 6.26, "learning_rate": 7.519275345641059e-06, "loss": 3.1548, "step": 764500 }, { "epoch": 6.27, "learning_rate": 7.51763139292114e-06, "loss": 3.1525, "step": 765000 }, { "epoch": 6.27, "learning_rate": 7.51598744020122e-06, "loss": 3.1562, "step": 765500 }, { "epoch": 6.28, "learning_rate": 7.5143434874813e-06, "loss": 3.163, "step": 766000 }, { "epoch": 6.28, "learning_rate": 7.5126995347613804e-06, "loss": 3.1509, "step": 766500 }, { "epoch": 6.28, "learning_rate": 7.511055582041461e-06, "loss": 3.1502, "step": 767000 }, { "epoch": 6.29, "learning_rate": 7.5094116293215415e-06, "loss": 3.1499, "step": 767500 }, { "epoch": 6.29, "learning_rate": 7.507767676601622e-06, "loss": 3.1461, "step": 768000 }, { "epoch": 6.3, "learning_rate": 7.506123723881702e-06, "loss": 3.1437, "step": 768500 }, { "epoch": 6.3, "learning_rate": 7.504479771161782e-06, "loss": 3.1509, "step": 769000 }, { "epoch": 6.3, "learning_rate": 7.502835818441863e-06, "loss": 3.1525, "step": 769500 }, { "epoch": 6.31, "learning_rate": 7.501191865721943e-06, "loss": 3.1488, "step": 770000 }, { "epoch": 6.31, "learning_rate": 7.499547913002023e-06, "loss": 3.1541, "step": 770500 }, { "epoch": 6.32, "learning_rate": 7.497903960282102e-06, "loss": 3.147, "step": 771000 }, { "epoch": 6.32, "learning_rate": 7.4962600075621825e-06, "loss": 3.1472, "step": 771500 }, { "epoch": 6.32, "learning_rate": 7.494616054842263e-06, "loss": 3.15, "step": 772000 }, { "epoch": 6.33, "learning_rate": 7.492972102122344e-06, "loss": 3.1486, "step": 772500 }, { "epoch": 6.33, "learning_rate": 7.491328149402424e-06, "loss": 3.1611, "step": 773000 }, { "epoch": 6.34, "learning_rate": 7.489684196682504e-06, "loss": 3.1517, "step": 773500 }, { "epoch": 6.34, "learning_rate": 7.488040243962584e-06, "loss": 3.1473, "step": 774000 }, { "epoch": 6.35, "learning_rate": 7.486396291242665e-06, "loss": 3.1438, "step": 774500 }, { "epoch": 6.35, "learning_rate": 7.484752338522745e-06, "loss": 3.137, "step": 775000 }, { "epoch": 6.35, "learning_rate": 7.483108385802825e-06, "loss": 3.15, "step": 775500 }, { "epoch": 6.36, "learning_rate": 7.481464433082905e-06, "loss": 3.1464, "step": 776000 }, { "epoch": 6.36, "learning_rate": 7.4798204803629855e-06, "loss": 3.1438, "step": 776500 }, { "epoch": 6.37, "learning_rate": 7.478176527643065e-06, "loss": 3.1437, "step": 777000 }, { "epoch": 6.37, "learning_rate": 7.476532574923146e-06, "loss": 3.1401, "step": 777500 }, { "epoch": 6.37, "learning_rate": 7.474888622203226e-06, "loss": 3.144, "step": 778000 }, { "epoch": 6.38, "learning_rate": 7.473244669483306e-06, "loss": 3.1513, "step": 778500 }, { "epoch": 6.38, "learning_rate": 7.471600716763386e-06, "loss": 3.1559, "step": 779000 }, { "epoch": 6.39, "learning_rate": 7.469956764043467e-06, "loss": 3.1461, "step": 779500 }, { "epoch": 6.39, "learning_rate": 7.468312811323547e-06, "loss": 3.1477, "step": 780000 }, { "epoch": 6.39, "eval_accuracy": 0.4689297446428191, "eval_loss": 3.002525568008423, "eval_runtime": 411.5642, "eval_samples_per_second": 749.229, "eval_steps_per_second": 15.611, "step": 780000 }, { "epoch": 6.39, "learning_rate": 7.466668858603627e-06, "loss": 3.1411, "step": 780500 }, { "epoch": 6.4, "learning_rate": 7.4650249058837075e-06, "loss": 3.1422, "step": 781000 }, { "epoch": 6.4, "learning_rate": 7.463380953163788e-06, "loss": 3.1464, "step": 781500 }, { "epoch": 6.41, "learning_rate": 7.461737000443869e-06, "loss": 3.1429, "step": 782000 }, { "epoch": 6.41, "learning_rate": 7.460093047723949e-06, "loss": 3.1451, "step": 782500 }, { "epoch": 6.41, "learning_rate": 7.458449095004028e-06, "loss": 3.1436, "step": 783000 }, { "epoch": 6.42, "learning_rate": 7.456805142284108e-06, "loss": 3.1398, "step": 783500 }, { "epoch": 6.42, "learning_rate": 7.455161189564188e-06, "loss": 3.1447, "step": 784000 }, { "epoch": 6.43, "learning_rate": 7.453517236844268e-06, "loss": 3.1414, "step": 784500 }, { "epoch": 6.43, "learning_rate": 7.451873284124349e-06, "loss": 3.1408, "step": 785000 }, { "epoch": 6.44, "learning_rate": 7.4502293314044295e-06, "loss": 3.1384, "step": 785500 }, { "epoch": 6.44, "learning_rate": 7.44858537868451e-06, "loss": 3.1389, "step": 786000 }, { "epoch": 6.44, "learning_rate": 7.44694142596459e-06, "loss": 3.149, "step": 786500 }, { "epoch": 6.45, "learning_rate": 7.445297473244671e-06, "loss": 3.1404, "step": 787000 }, { "epoch": 6.45, "learning_rate": 7.443653520524751e-06, "loss": 3.1353, "step": 787500 }, { "epoch": 6.46, "learning_rate": 7.442009567804831e-06, "loss": 3.134, "step": 788000 }, { "epoch": 6.46, "learning_rate": 7.440365615084911e-06, "loss": 3.1423, "step": 788500 }, { "epoch": 6.46, "learning_rate": 7.43872166236499e-06, "loss": 3.14, "step": 789000 }, { "epoch": 6.47, "learning_rate": 7.4370777096450705e-06, "loss": 3.1309, "step": 789500 }, { "epoch": 6.47, "learning_rate": 7.4354337569251515e-06, "loss": 3.1384, "step": 790000 }, { "epoch": 6.48, "learning_rate": 7.433789804205232e-06, "loss": 3.1371, "step": 790500 }, { "epoch": 6.48, "learning_rate": 7.432145851485312e-06, "loss": 3.136, "step": 791000 }, { "epoch": 6.48, "learning_rate": 7.430501898765392e-06, "loss": 3.1433, "step": 791500 }, { "epoch": 6.49, "learning_rate": 7.428857946045473e-06, "loss": 3.1398, "step": 792000 }, { "epoch": 6.49, "learning_rate": 7.427213993325553e-06, "loss": 3.1428, "step": 792500 }, { "epoch": 6.5, "learning_rate": 7.425570040605633e-06, "loss": 3.1316, "step": 793000 }, { "epoch": 6.5, "learning_rate": 7.423926087885713e-06, "loss": 3.1392, "step": 793500 }, { "epoch": 6.51, "learning_rate": 7.422282135165793e-06, "loss": 3.1288, "step": 794000 }, { "epoch": 6.51, "learning_rate": 7.420638182445874e-06, "loss": 3.1354, "step": 794500 }, { "epoch": 6.51, "learning_rate": 7.4189942297259536e-06, "loss": 3.1339, "step": 795000 }, { "epoch": 6.52, "learning_rate": 7.417350277006034e-06, "loss": 3.1419, "step": 795500 }, { "epoch": 6.52, "learning_rate": 7.415706324286114e-06, "loss": 3.1363, "step": 796000 }, { "epoch": 6.53, "learning_rate": 7.414062371566194e-06, "loss": 3.134, "step": 796500 }, { "epoch": 6.53, "learning_rate": 7.412418418846274e-06, "loss": 3.13, "step": 797000 }, { "epoch": 6.53, "learning_rate": 7.410774466126355e-06, "loss": 3.1382, "step": 797500 }, { "epoch": 6.54, "learning_rate": 7.409130513406435e-06, "loss": 3.1341, "step": 798000 }, { "epoch": 6.54, "learning_rate": 7.407486560686515e-06, "loss": 3.1306, "step": 798500 }, { "epoch": 6.55, "learning_rate": 7.4058426079665954e-06, "loss": 3.134, "step": 799000 }, { "epoch": 6.55, "learning_rate": 7.404198655246676e-06, "loss": 3.1329, "step": 799500 }, { "epoch": 6.55, "learning_rate": 7.4025547025267565e-06, "loss": 3.1382, "step": 800000 }, { "epoch": 6.56, "learning_rate": 7.400910749806837e-06, "loss": 3.1293, "step": 800500 }, { "epoch": 6.56, "learning_rate": 7.399266797086916e-06, "loss": 3.135, "step": 801000 }, { "epoch": 6.57, "learning_rate": 7.397622844366996e-06, "loss": 3.1407, "step": 801500 }, { "epoch": 6.57, "learning_rate": 7.395978891647076e-06, "loss": 3.1354, "step": 802000 }, { "epoch": 6.57, "learning_rate": 7.394334938927157e-06, "loss": 3.1371, "step": 802500 }, { "epoch": 6.58, "learning_rate": 7.392690986207237e-06, "loss": 3.1362, "step": 803000 }, { "epoch": 6.58, "learning_rate": 7.391047033487317e-06, "loss": 3.129, "step": 803500 }, { "epoch": 6.59, "learning_rate": 7.3894030807673975e-06, "loss": 3.1378, "step": 804000 }, { "epoch": 6.59, "learning_rate": 7.387759128047478e-06, "loss": 3.125, "step": 804500 }, { "epoch": 6.6, "learning_rate": 7.386115175327559e-06, "loss": 3.1285, "step": 805000 }, { "epoch": 6.6, "learning_rate": 7.384471222607639e-06, "loss": 3.1344, "step": 805500 }, { "epoch": 6.6, "learning_rate": 7.382827269887719e-06, "loss": 3.1241, "step": 806000 }, { "epoch": 6.61, "learning_rate": 7.381183317167799e-06, "loss": 3.1264, "step": 806500 }, { "epoch": 6.61, "learning_rate": 7.379539364447878e-06, "loss": 3.1278, "step": 807000 }, { "epoch": 6.62, "learning_rate": 7.377895411727959e-06, "loss": 3.1363, "step": 807500 }, { "epoch": 6.62, "learning_rate": 7.376251459008039e-06, "loss": 3.124, "step": 808000 }, { "epoch": 6.62, "learning_rate": 7.3746075062881195e-06, "loss": 3.1246, "step": 808500 }, { "epoch": 6.63, "learning_rate": 7.3729635535682e-06, "loss": 3.1281, "step": 809000 }, { "epoch": 6.63, "learning_rate": 7.37131960084828e-06, "loss": 3.1198, "step": 809500 }, { "epoch": 6.64, "learning_rate": 7.369675648128361e-06, "loss": 3.1276, "step": 810000 }, { "epoch": 6.64, "eval_accuracy": 0.47175398712230254, "eval_loss": 2.9837634563446045, "eval_runtime": 427.1705, "eval_samples_per_second": 721.857, "eval_steps_per_second": 15.041, "step": 810000 }, { "epoch": 6.64, "learning_rate": 7.368031695408441e-06, "loss": 3.1275, "step": 810500 }, { "epoch": 6.64, "learning_rate": 7.366387742688521e-06, "loss": 3.1262, "step": 811000 }, { "epoch": 6.65, "learning_rate": 7.364743789968601e-06, "loss": 3.1216, "step": 811500 }, { "epoch": 6.65, "learning_rate": 7.363099837248682e-06, "loss": 3.1212, "step": 812000 }, { "epoch": 6.66, "learning_rate": 7.361455884528762e-06, "loss": 3.1169, "step": 812500 }, { "epoch": 6.66, "learning_rate": 7.3598119318088415e-06, "loss": 3.1221, "step": 813000 }, { "epoch": 6.66, "learning_rate": 7.358167979088922e-06, "loss": 3.128, "step": 813500 }, { "epoch": 6.67, "learning_rate": 7.356524026369002e-06, "loss": 3.1245, "step": 814000 }, { "epoch": 6.67, "learning_rate": 7.354880073649082e-06, "loss": 3.1248, "step": 814500 }, { "epoch": 6.68, "learning_rate": 7.353236120929163e-06, "loss": 3.1219, "step": 815000 }, { "epoch": 6.68, "learning_rate": 7.351592168209243e-06, "loss": 3.1285, "step": 815500 }, { "epoch": 6.69, "learning_rate": 7.349948215489323e-06, "loss": 3.1275, "step": 816000 }, { "epoch": 6.69, "learning_rate": 7.348304262769403e-06, "loss": 3.1213, "step": 816500 }, { "epoch": 6.69, "learning_rate": 7.346660310049483e-06, "loss": 3.1211, "step": 817000 }, { "epoch": 6.7, "learning_rate": 7.345016357329564e-06, "loss": 3.1231, "step": 817500 }, { "epoch": 6.7, "learning_rate": 7.3433724046096445e-06, "loss": 3.1306, "step": 818000 }, { "epoch": 6.71, "learning_rate": 7.341728451889725e-06, "loss": 3.1169, "step": 818500 }, { "epoch": 6.71, "learning_rate": 7.340084499169804e-06, "loss": 3.1254, "step": 819000 }, { "epoch": 6.71, "learning_rate": 7.338440546449884e-06, "loss": 3.1182, "step": 819500 }, { "epoch": 6.72, "learning_rate": 7.336796593729965e-06, "loss": 3.1185, "step": 820000 }, { "epoch": 6.72, "learning_rate": 7.335152641010045e-06, "loss": 3.1197, "step": 820500 }, { "epoch": 6.73, "learning_rate": 7.333508688290125e-06, "loss": 3.1243, "step": 821000 }, { "epoch": 6.73, "learning_rate": 7.331864735570205e-06, "loss": 3.1223, "step": 821500 }, { "epoch": 6.73, "learning_rate": 7.3302207828502855e-06, "loss": 3.1259, "step": 822000 }, { "epoch": 6.74, "learning_rate": 7.3285768301303665e-06, "loss": 3.1265, "step": 822500 }, { "epoch": 6.74, "learning_rate": 7.326932877410447e-06, "loss": 3.1141, "step": 823000 }, { "epoch": 6.75, "learning_rate": 7.325288924690527e-06, "loss": 3.1206, "step": 823500 }, { "epoch": 6.75, "learning_rate": 7.323644971970607e-06, "loss": 3.1178, "step": 824000 }, { "epoch": 6.75, "learning_rate": 7.322001019250688e-06, "loss": 3.1122, "step": 824500 }, { "epoch": 6.76, "learning_rate": 7.320357066530768e-06, "loss": 3.1187, "step": 825000 }, { "epoch": 6.76, "learning_rate": 7.318713113810847e-06, "loss": 3.118, "step": 825500 }, { "epoch": 6.77, "learning_rate": 7.317069161090927e-06, "loss": 3.1143, "step": 826000 }, { "epoch": 6.77, "learning_rate": 7.3154252083710075e-06, "loss": 3.1176, "step": 826500 }, { "epoch": 6.78, "learning_rate": 7.313781255651088e-06, "loss": 3.1117, "step": 827000 }, { "epoch": 6.78, "learning_rate": 7.3121373029311686e-06, "loss": 3.1185, "step": 827500 }, { "epoch": 6.78, "learning_rate": 7.310493350211249e-06, "loss": 3.1208, "step": 828000 }, { "epoch": 6.79, "learning_rate": 7.308849397491329e-06, "loss": 3.1171, "step": 828500 }, { "epoch": 6.79, "learning_rate": 7.307205444771409e-06, "loss": 3.1127, "step": 829000 }, { "epoch": 6.8, "learning_rate": 7.305561492051489e-06, "loss": 3.1146, "step": 829500 }, { "epoch": 6.8, "learning_rate": 7.30391753933157e-06, "loss": 3.1141, "step": 830000 }, { "epoch": 6.8, "learning_rate": 7.30227358661165e-06, "loss": 3.1191, "step": 830500 }, { "epoch": 6.81, "learning_rate": 7.30062963389173e-06, "loss": 3.123, "step": 831000 }, { "epoch": 6.81, "learning_rate": 7.29898568117181e-06, "loss": 3.1178, "step": 831500 }, { "epoch": 6.82, "learning_rate": 7.29734172845189e-06, "loss": 3.1133, "step": 832000 }, { "epoch": 6.82, "learning_rate": 7.29569777573197e-06, "loss": 3.1174, "step": 832500 }, { "epoch": 6.82, "learning_rate": 7.294053823012051e-06, "loss": 3.1201, "step": 833000 }, { "epoch": 6.83, "learning_rate": 7.292409870292131e-06, "loss": 3.1199, "step": 833500 }, { "epoch": 6.83, "learning_rate": 7.290765917572211e-06, "loss": 3.1157, "step": 834000 }, { "epoch": 6.84, "learning_rate": 7.289121964852291e-06, "loss": 3.1183, "step": 834500 }, { "epoch": 6.84, "learning_rate": 7.287478012132372e-06, "loss": 3.1141, "step": 835000 }, { "epoch": 6.85, "learning_rate": 7.285834059412452e-06, "loss": 3.1165, "step": 835500 }, { "epoch": 6.85, "learning_rate": 7.284190106692532e-06, "loss": 3.1112, "step": 836000 }, { "epoch": 6.85, "learning_rate": 7.2825461539726125e-06, "loss": 3.1217, "step": 836500 }, { "epoch": 6.86, "learning_rate": 7.2809022012526935e-06, "loss": 3.1089, "step": 837000 }, { "epoch": 6.86, "learning_rate": 7.279258248532772e-06, "loss": 3.118, "step": 837500 }, { "epoch": 6.87, "learning_rate": 7.277614295812853e-06, "loss": 3.1127, "step": 838000 }, { "epoch": 6.87, "learning_rate": 7.275970343092933e-06, "loss": 3.1117, "step": 838500 }, { "epoch": 6.87, "learning_rate": 7.274326390373013e-06, "loss": 3.1112, "step": 839000 }, { "epoch": 6.88, "learning_rate": 7.272682437653093e-06, "loss": 3.1084, "step": 839500 }, { "epoch": 6.88, "learning_rate": 7.271038484933174e-06, "loss": 3.1102, "step": 840000 }, { "epoch": 6.88, "eval_accuracy": 0.47402583913951796, "eval_loss": 2.969046115875244, "eval_runtime": 428.0247, "eval_samples_per_second": 720.416, "eval_steps_per_second": 15.011, "step": 840000 }, { "epoch": 6.89, "learning_rate": 7.269394532213254e-06, "loss": 3.1116, "step": 840500 }, { "epoch": 6.89, "learning_rate": 7.2677505794933345e-06, "loss": 3.1169, "step": 841000 }, { "epoch": 6.89, "learning_rate": 7.266106626773415e-06, "loss": 3.1087, "step": 841500 }, { "epoch": 6.9, "learning_rate": 7.264462674053495e-06, "loss": 3.1085, "step": 842000 }, { "epoch": 6.9, "learning_rate": 7.262818721333576e-06, "loss": 3.1109, "step": 842500 }, { "epoch": 6.91, "learning_rate": 7.261174768613656e-06, "loss": 3.112, "step": 843000 }, { "epoch": 6.91, "learning_rate": 7.259530815893735e-06, "loss": 3.1072, "step": 843500 }, { "epoch": 6.91, "learning_rate": 7.257886863173815e-06, "loss": 3.1142, "step": 844000 }, { "epoch": 6.92, "learning_rate": 7.256242910453895e-06, "loss": 3.1146, "step": 844500 }, { "epoch": 6.92, "learning_rate": 7.2545989577339755e-06, "loss": 3.1098, "step": 845000 }, { "epoch": 6.93, "learning_rate": 7.2529550050140565e-06, "loss": 3.1138, "step": 845500 }, { "epoch": 6.93, "learning_rate": 7.251311052294137e-06, "loss": 3.1111, "step": 846000 }, { "epoch": 6.94, "learning_rate": 7.249667099574217e-06, "loss": 3.1045, "step": 846500 }, { "epoch": 6.94, "learning_rate": 7.248023146854297e-06, "loss": 3.1108, "step": 847000 }, { "epoch": 6.94, "learning_rate": 7.246379194134378e-06, "loss": 3.109, "step": 847500 }, { "epoch": 6.95, "learning_rate": 7.244735241414458e-06, "loss": 3.118, "step": 848000 }, { "epoch": 6.95, "learning_rate": 7.243091288694538e-06, "loss": 3.1119, "step": 848500 }, { "epoch": 6.96, "learning_rate": 7.241447335974618e-06, "loss": 3.1125, "step": 849000 }, { "epoch": 6.96, "learning_rate": 7.2398033832546975e-06, "loss": 3.1131, "step": 849500 }, { "epoch": 6.96, "learning_rate": 7.238159430534778e-06, "loss": 3.1072, "step": 850000 }, { "epoch": 6.97, "learning_rate": 7.236515477814859e-06, "loss": 3.1102, "step": 850500 }, { "epoch": 6.97, "learning_rate": 7.234871525094939e-06, "loss": 3.1095, "step": 851000 }, { "epoch": 6.98, "learning_rate": 7.233227572375019e-06, "loss": 3.1029, "step": 851500 }, { "epoch": 6.98, "learning_rate": 7.231583619655099e-06, "loss": 3.1064, "step": 852000 }, { "epoch": 6.98, "learning_rate": 7.22993966693518e-06, "loss": 3.1105, "step": 852500 }, { "epoch": 6.99, "learning_rate": 7.22829571421526e-06, "loss": 3.1084, "step": 853000 }, { "epoch": 6.99, "learning_rate": 7.22665176149534e-06, "loss": 3.107, "step": 853500 }, { "epoch": 7.0, "learning_rate": 7.22500780877542e-06, "loss": 3.1091, "step": 854000 }, { "epoch": 7.0, "learning_rate": 7.2233638560555005e-06, "loss": 3.1021, "step": 854500 }, { "epoch": 7.0, "learning_rate": 7.2217199033355815e-06, "loss": 3.0996, "step": 855000 }, { "epoch": 7.01, "learning_rate": 7.220075950615661e-06, "loss": 3.0913, "step": 855500 }, { "epoch": 7.01, "learning_rate": 7.218431997895741e-06, "loss": 3.1022, "step": 856000 }, { "epoch": 7.02, "learning_rate": 7.216788045175821e-06, "loss": 3.1077, "step": 856500 }, { "epoch": 7.02, "learning_rate": 7.215144092455901e-06, "loss": 3.1003, "step": 857000 }, { "epoch": 7.03, "learning_rate": 7.213500139735981e-06, "loss": 3.1036, "step": 857500 }, { "epoch": 7.03, "learning_rate": 7.211856187016062e-06, "loss": 3.1037, "step": 858000 }, { "epoch": 7.03, "learning_rate": 7.210212234296142e-06, "loss": 3.1082, "step": 858500 }, { "epoch": 7.04, "learning_rate": 7.2085682815762225e-06, "loss": 3.1052, "step": 859000 }, { "epoch": 7.04, "learning_rate": 7.206924328856303e-06, "loss": 3.1078, "step": 859500 }, { "epoch": 7.05, "learning_rate": 7.2052803761363836e-06, "loss": 3.1049, "step": 860000 }, { "epoch": 7.05, "learning_rate": 7.203636423416464e-06, "loss": 3.0981, "step": 860500 }, { "epoch": 7.05, "learning_rate": 7.201992470696544e-06, "loss": 3.1124, "step": 861000 }, { "epoch": 7.06, "learning_rate": 7.200348517976623e-06, "loss": 3.1003, "step": 861500 }, { "epoch": 7.06, "learning_rate": 7.198704565256703e-06, "loss": 3.1041, "step": 862000 }, { "epoch": 7.07, "learning_rate": 7.197060612536783e-06, "loss": 3.0975, "step": 862500 }, { "epoch": 7.07, "learning_rate": 7.195416659816864e-06, "loss": 3.1068, "step": 863000 }, { "epoch": 7.07, "learning_rate": 7.1937727070969445e-06, "loss": 3.1015, "step": 863500 }, { "epoch": 7.08, "learning_rate": 7.192128754377025e-06, "loss": 3.1072, "step": 864000 }, { "epoch": 7.08, "learning_rate": 7.190484801657105e-06, "loss": 3.0992, "step": 864500 }, { "epoch": 7.09, "learning_rate": 7.188840848937186e-06, "loss": 3.1031, "step": 865000 }, { "epoch": 7.09, "learning_rate": 7.187196896217266e-06, "loss": 3.0964, "step": 865500 }, { "epoch": 7.09, "learning_rate": 7.185552943497346e-06, "loss": 3.1047, "step": 866000 }, { "epoch": 7.1, "learning_rate": 7.183908990777426e-06, "loss": 3.103, "step": 866500 }, { "epoch": 7.1, "learning_rate": 7.182265038057506e-06, "loss": 3.0944, "step": 867000 }, { "epoch": 7.11, "learning_rate": 7.1806210853375855e-06, "loss": 3.0977, "step": 867500 }, { "epoch": 7.11, "learning_rate": 7.1789771326176664e-06, "loss": 3.1003, "step": 868000 }, { "epoch": 7.12, "learning_rate": 7.1773331798977466e-06, "loss": 3.1025, "step": 868500 }, { "epoch": 7.12, "learning_rate": 7.175689227177827e-06, "loss": 3.097, "step": 869000 }, { "epoch": 7.12, "learning_rate": 7.174045274457907e-06, "loss": 3.0947, "step": 869500 }, { "epoch": 7.13, "learning_rate": 7.172401321737987e-06, "loss": 3.1046, "step": 870000 }, { "epoch": 7.13, "eval_accuracy": 0.4757021230011519, "eval_loss": 2.956329584121704, "eval_runtime": 428.2951, "eval_samples_per_second": 719.962, "eval_steps_per_second": 15.001, "step": 870000 }, { "epoch": 7.13, "learning_rate": 7.170757369018068e-06, "loss": 3.1, "step": 870500 }, { "epoch": 7.14, "learning_rate": 7.169113416298148e-06, "loss": 3.0954, "step": 871000 }, { "epoch": 7.14, "learning_rate": 7.167469463578228e-06, "loss": 3.0978, "step": 871500 }, { "epoch": 7.14, "learning_rate": 7.165825510858308e-06, "loss": 3.097, "step": 872000 }, { "epoch": 7.15, "learning_rate": 7.164181558138389e-06, "loss": 3.1062, "step": 872500 }, { "epoch": 7.15, "learning_rate": 7.162537605418469e-06, "loss": 3.0963, "step": 873000 }, { "epoch": 7.16, "learning_rate": 7.1608936526985495e-06, "loss": 3.0909, "step": 873500 }, { "epoch": 7.16, "learning_rate": 7.159249699978629e-06, "loss": 3.0979, "step": 874000 }, { "epoch": 7.16, "learning_rate": 7.157605747258709e-06, "loss": 3.0919, "step": 874500 }, { "epoch": 7.17, "learning_rate": 7.155961794538789e-06, "loss": 3.0945, "step": 875000 }, { "epoch": 7.17, "learning_rate": 7.15431784181887e-06, "loss": 3.0981, "step": 875500 }, { "epoch": 7.18, "learning_rate": 7.15267388909895e-06, "loss": 3.0912, "step": 876000 }, { "epoch": 7.18, "learning_rate": 7.15102993637903e-06, "loss": 3.0948, "step": 876500 }, { "epoch": 7.19, "learning_rate": 7.14938598365911e-06, "loss": 3.1008, "step": 877000 }, { "epoch": 7.19, "learning_rate": 7.1477420309391905e-06, "loss": 3.103, "step": 877500 }, { "epoch": 7.19, "learning_rate": 7.1460980782192715e-06, "loss": 3.0908, "step": 878000 }, { "epoch": 7.2, "learning_rate": 7.144454125499352e-06, "loss": 3.0975, "step": 878500 }, { "epoch": 7.2, "learning_rate": 7.142810172779432e-06, "loss": 3.0972, "step": 879000 }, { "epoch": 7.21, "learning_rate": 7.141166220059512e-06, "loss": 3.0939, "step": 879500 }, { "epoch": 7.21, "learning_rate": 7.139522267339591e-06, "loss": 3.099, "step": 880000 }, { "epoch": 7.21, "learning_rate": 7.137878314619672e-06, "loss": 3.0973, "step": 880500 }, { "epoch": 7.22, "learning_rate": 7.136234361899752e-06, "loss": 3.0922, "step": 881000 }, { "epoch": 7.22, "learning_rate": 7.134590409179832e-06, "loss": 3.0964, "step": 881500 }, { "epoch": 7.23, "learning_rate": 7.1329464564599125e-06, "loss": 3.0954, "step": 882000 }, { "epoch": 7.23, "learning_rate": 7.131302503739993e-06, "loss": 3.1022, "step": 882500 }, { "epoch": 7.23, "learning_rate": 7.129658551020074e-06, "loss": 3.099, "step": 883000 }, { "epoch": 7.24, "learning_rate": 7.128014598300154e-06, "loss": 3.0887, "step": 883500 }, { "epoch": 7.24, "learning_rate": 7.126370645580234e-06, "loss": 3.0924, "step": 884000 }, { "epoch": 7.25, "learning_rate": 7.124726692860314e-06, "loss": 3.0895, "step": 884500 }, { "epoch": 7.25, "learning_rate": 7.123082740140395e-06, "loss": 3.0968, "step": 885000 }, { "epoch": 7.25, "learning_rate": 7.121438787420475e-06, "loss": 3.0948, "step": 885500 }, { "epoch": 7.26, "learning_rate": 7.119794834700554e-06, "loss": 3.0969, "step": 886000 }, { "epoch": 7.26, "learning_rate": 7.1181508819806345e-06, "loss": 3.0952, "step": 886500 }, { "epoch": 7.27, "learning_rate": 7.116506929260715e-06, "loss": 3.0957, "step": 887000 }, { "epoch": 7.27, "learning_rate": 7.114862976540795e-06, "loss": 3.0874, "step": 887500 }, { "epoch": 7.28, "learning_rate": 7.113219023820876e-06, "loss": 3.0911, "step": 888000 }, { "epoch": 7.28, "learning_rate": 7.111575071100956e-06, "loss": 3.0898, "step": 888500 }, { "epoch": 7.28, "learning_rate": 7.109931118381036e-06, "loss": 3.0941, "step": 889000 }, { "epoch": 7.29, "learning_rate": 7.108287165661116e-06, "loss": 3.097, "step": 889500 }, { "epoch": 7.29, "learning_rate": 7.106643212941196e-06, "loss": 3.0977, "step": 890000 }, { "epoch": 7.3, "learning_rate": 7.104999260221277e-06, "loss": 3.0923, "step": 890500 }, { "epoch": 7.3, "learning_rate": 7.103355307501357e-06, "loss": 3.0844, "step": 891000 }, { "epoch": 7.3, "learning_rate": 7.1017113547814375e-06, "loss": 3.0889, "step": 891500 }, { "epoch": 7.31, "learning_rate": 7.100067402061517e-06, "loss": 3.0952, "step": 892000 }, { "epoch": 7.31, "learning_rate": 7.098423449341597e-06, "loss": 3.0884, "step": 892500 }, { "epoch": 7.32, "learning_rate": 7.096779496621678e-06, "loss": 3.0956, "step": 893000 }, { "epoch": 7.32, "learning_rate": 7.095135543901758e-06, "loss": 3.0888, "step": 893500 }, { "epoch": 7.32, "learning_rate": 7.093491591181838e-06, "loss": 3.092, "step": 894000 }, { "epoch": 7.33, "learning_rate": 7.091847638461918e-06, "loss": 3.0842, "step": 894500 }, { "epoch": 7.33, "learning_rate": 7.090203685741998e-06, "loss": 3.092, "step": 895000 }, { "epoch": 7.34, "learning_rate": 7.088559733022079e-06, "loss": 3.0886, "step": 895500 }, { "epoch": 7.34, "learning_rate": 7.0869157803021594e-06, "loss": 3.0845, "step": 896000 }, { "epoch": 7.34, "learning_rate": 7.0852718275822396e-06, "loss": 3.0871, "step": 896500 }, { "epoch": 7.35, "learning_rate": 7.08362787486232e-06, "loss": 3.0804, "step": 897000 }, { "epoch": 7.35, "learning_rate": 7.081983922142401e-06, "loss": 3.0817, "step": 897500 }, { "epoch": 7.36, "learning_rate": 7.080339969422479e-06, "loss": 3.0935, "step": 898000 }, { "epoch": 7.36, "learning_rate": 7.07869601670256e-06, "loss": 3.0893, "step": 898500 }, { "epoch": 7.37, "learning_rate": 7.07705206398264e-06, "loss": 3.0876, "step": 899000 }, { "epoch": 7.37, "learning_rate": 7.07540811126272e-06, "loss": 3.0946, "step": 899500 }, { "epoch": 7.37, "learning_rate": 7.0737641585428005e-06, "loss": 3.0817, "step": 900000 }, { "epoch": 7.37, "eval_accuracy": 0.47711464421124683, "eval_loss": 2.9477081298828125, "eval_runtime": 415.7627, "eval_samples_per_second": 741.663, "eval_steps_per_second": 15.454, "step": 900000 }, { "epoch": 7.38, "learning_rate": 7.0721202058228814e-06, "loss": 3.0775, "step": 900500 }, { "epoch": 7.38, "learning_rate": 7.0704762531029616e-06, "loss": 3.0839, "step": 901000 }, { "epoch": 7.39, "learning_rate": 7.068832300383042e-06, "loss": 3.086, "step": 901500 }, { "epoch": 7.39, "learning_rate": 7.067188347663122e-06, "loss": 3.0927, "step": 902000 }, { "epoch": 7.39, "learning_rate": 7.065544394943202e-06, "loss": 3.0884, "step": 902500 }, { "epoch": 7.4, "learning_rate": 7.063900442223283e-06, "loss": 3.0848, "step": 903000 }, { "epoch": 7.4, "learning_rate": 7.062256489503363e-06, "loss": 3.0805, "step": 903500 }, { "epoch": 7.41, "learning_rate": 7.060612536783442e-06, "loss": 3.0891, "step": 904000 }, { "epoch": 7.41, "learning_rate": 7.0589685840635224e-06, "loss": 3.0887, "step": 904500 }, { "epoch": 7.41, "learning_rate": 7.0573246313436026e-06, "loss": 3.0889, "step": 905000 }, { "epoch": 7.42, "learning_rate": 7.055680678623683e-06, "loss": 3.0918, "step": 905500 }, { "epoch": 7.42, "learning_rate": 7.054036725903764e-06, "loss": 3.0735, "step": 906000 }, { "epoch": 7.43, "learning_rate": 7.052392773183844e-06, "loss": 3.0821, "step": 906500 }, { "epoch": 7.43, "learning_rate": 7.050748820463924e-06, "loss": 3.0843, "step": 907000 }, { "epoch": 7.43, "learning_rate": 7.049104867744004e-06, "loss": 3.088, "step": 907500 }, { "epoch": 7.44, "learning_rate": 7.047460915024085e-06, "loss": 3.0885, "step": 908000 }, { "epoch": 7.44, "learning_rate": 7.045816962304165e-06, "loss": 3.0836, "step": 908500 }, { "epoch": 7.45, "learning_rate": 7.044173009584245e-06, "loss": 3.083, "step": 909000 }, { "epoch": 7.45, "learning_rate": 7.042529056864325e-06, "loss": 3.0838, "step": 909500 }, { "epoch": 7.46, "learning_rate": 7.040885104144405e-06, "loss": 3.0789, "step": 910000 }, { "epoch": 7.46, "learning_rate": 7.039241151424485e-06, "loss": 3.082, "step": 910500 }, { "epoch": 7.46, "learning_rate": 7.037597198704566e-06, "loss": 3.08, "step": 911000 }, { "epoch": 7.47, "learning_rate": 7.035953245984646e-06, "loss": 3.0853, "step": 911500 }, { "epoch": 7.47, "learning_rate": 7.034309293264726e-06, "loss": 3.0808, "step": 912000 }, { "epoch": 7.48, "learning_rate": 7.032665340544806e-06, "loss": 3.0855, "step": 912500 }, { "epoch": 7.48, "learning_rate": 7.031021387824887e-06, "loss": 3.0833, "step": 913000 }, { "epoch": 7.48, "learning_rate": 7.029377435104967e-06, "loss": 3.0754, "step": 913500 }, { "epoch": 7.49, "learning_rate": 7.027733482385047e-06, "loss": 3.0819, "step": 914000 }, { "epoch": 7.49, "learning_rate": 7.0260895296651275e-06, "loss": 3.083, "step": 914500 }, { "epoch": 7.5, "learning_rate": 7.024445576945208e-06, "loss": 3.0783, "step": 915000 }, { "epoch": 7.5, "learning_rate": 7.022801624225289e-06, "loss": 3.0812, "step": 915500 }, { "epoch": 7.5, "learning_rate": 7.021157671505368e-06, "loss": 3.0782, "step": 916000 }, { "epoch": 7.51, "learning_rate": 7.019513718785448e-06, "loss": 3.0797, "step": 916500 }, { "epoch": 7.51, "learning_rate": 7.017869766065528e-06, "loss": 3.0876, "step": 917000 }, { "epoch": 7.52, "learning_rate": 7.016225813345608e-06, "loss": 3.0857, "step": 917500 }, { "epoch": 7.52, "learning_rate": 7.014581860625688e-06, "loss": 3.0776, "step": 918000 }, { "epoch": 7.53, "learning_rate": 7.012937907905769e-06, "loss": 3.085, "step": 918500 }, { "epoch": 7.53, "learning_rate": 7.0112939551858495e-06, "loss": 3.0817, "step": 919000 }, { "epoch": 7.53, "learning_rate": 7.00965000246593e-06, "loss": 3.0807, "step": 919500 }, { "epoch": 7.54, "learning_rate": 7.00800604974601e-06, "loss": 3.0892, "step": 920000 }, { "epoch": 7.54, "learning_rate": 7.006362097026091e-06, "loss": 3.0781, "step": 920500 }, { "epoch": 7.55, "learning_rate": 7.004718144306171e-06, "loss": 3.0726, "step": 921000 }, { "epoch": 7.55, "learning_rate": 7.003074191586251e-06, "loss": 3.0824, "step": 921500 }, { "epoch": 7.55, "learning_rate": 7.001430238866331e-06, "loss": 3.0816, "step": 922000 }, { "epoch": 7.56, "learning_rate": 6.99978628614641e-06, "loss": 3.0747, "step": 922500 }, { "epoch": 7.56, "learning_rate": 6.9981423334264905e-06, "loss": 3.0819, "step": 923000 }, { "epoch": 7.57, "learning_rate": 6.9964983807065715e-06, "loss": 3.0806, "step": 923500 }, { "epoch": 7.57, "learning_rate": 6.994854427986652e-06, "loss": 3.0842, "step": 924000 }, { "epoch": 7.57, "learning_rate": 6.993210475266732e-06, "loss": 3.0732, "step": 924500 }, { "epoch": 7.58, "learning_rate": 6.991566522546812e-06, "loss": 3.0746, "step": 925000 }, { "epoch": 7.58, "learning_rate": 6.989922569826893e-06, "loss": 3.0789, "step": 925500 }, { "epoch": 7.59, "learning_rate": 6.988278617106973e-06, "loss": 3.0817, "step": 926000 }, { "epoch": 7.59, "learning_rate": 6.986634664387053e-06, "loss": 3.0718, "step": 926500 }, { "epoch": 7.59, "learning_rate": 6.984990711667133e-06, "loss": 3.0824, "step": 927000 }, { "epoch": 7.6, "learning_rate": 6.983346758947213e-06, "loss": 3.0776, "step": 927500 }, { "epoch": 7.6, "learning_rate": 6.981702806227294e-06, "loss": 3.075, "step": 928000 }, { "epoch": 7.61, "learning_rate": 6.980058853507374e-06, "loss": 3.0787, "step": 928500 }, { "epoch": 7.61, "learning_rate": 6.978414900787454e-06, "loss": 3.0708, "step": 929000 }, { "epoch": 7.62, "learning_rate": 6.976770948067534e-06, "loss": 3.086, "step": 929500 }, { "epoch": 7.62, "learning_rate": 6.975126995347614e-06, "loss": 3.0813, "step": 930000 }, { "epoch": 7.62, "eval_accuracy": 0.478479420881626, "eval_loss": 2.939741611480713, "eval_runtime": 412.5363, "eval_samples_per_second": 747.464, "eval_steps_per_second": 15.574, "step": 930000 }, { "epoch": 7.62, "learning_rate": 6.973483042627694e-06, "loss": 3.0788, "step": 930500 }, { "epoch": 7.63, "learning_rate": 6.971839089907775e-06, "loss": 3.0747, "step": 931000 }, { "epoch": 7.63, "learning_rate": 6.970195137187855e-06, "loss": 3.0767, "step": 931500 }, { "epoch": 7.64, "learning_rate": 6.968551184467935e-06, "loss": 3.0797, "step": 932000 }, { "epoch": 7.64, "learning_rate": 6.9669072317480155e-06, "loss": 3.0759, "step": 932500 }, { "epoch": 7.64, "learning_rate": 6.9652632790280964e-06, "loss": 3.0809, "step": 933000 }, { "epoch": 7.65, "learning_rate": 6.9636193263081766e-06, "loss": 3.0756, "step": 933500 }, { "epoch": 7.65, "learning_rate": 6.961975373588257e-06, "loss": 3.0814, "step": 934000 }, { "epoch": 7.66, "learning_rate": 6.960331420868336e-06, "loss": 3.0757, "step": 934500 }, { "epoch": 7.66, "learning_rate": 6.958687468148416e-06, "loss": 3.0666, "step": 935000 }, { "epoch": 7.66, "learning_rate": 6.957043515428496e-06, "loss": 3.0683, "step": 935500 }, { "epoch": 7.67, "learning_rate": 6.955399562708577e-06, "loss": 3.0742, "step": 936000 }, { "epoch": 7.67, "learning_rate": 6.953755609988657e-06, "loss": 3.0645, "step": 936500 }, { "epoch": 7.68, "learning_rate": 6.9521116572687374e-06, "loss": 3.0785, "step": 937000 }, { "epoch": 7.68, "learning_rate": 6.9504677045488176e-06, "loss": 3.0819, "step": 937500 }, { "epoch": 7.68, "learning_rate": 6.9488237518288985e-06, "loss": 3.0803, "step": 938000 }, { "epoch": 7.69, "learning_rate": 6.947179799108979e-06, "loss": 3.0805, "step": 938500 }, { "epoch": 7.69, "learning_rate": 6.945535846389059e-06, "loss": 3.0775, "step": 939000 }, { "epoch": 7.7, "learning_rate": 6.943891893669139e-06, "loss": 3.0725, "step": 939500 }, { "epoch": 7.7, "learning_rate": 6.942247940949219e-06, "loss": 3.0744, "step": 940000 }, { "epoch": 7.71, "learning_rate": 6.940603988229298e-06, "loss": 3.0714, "step": 940500 }, { "epoch": 7.71, "learning_rate": 6.938960035509379e-06, "loss": 3.0715, "step": 941000 }, { "epoch": 7.71, "learning_rate": 6.9373160827894594e-06, "loss": 3.0773, "step": 941500 }, { "epoch": 7.72, "learning_rate": 6.9356721300695396e-06, "loss": 3.0694, "step": 942000 }, { "epoch": 7.72, "learning_rate": 6.93402817734962e-06, "loss": 3.0711, "step": 942500 }, { "epoch": 7.73, "learning_rate": 6.9323842246297e-06, "loss": 3.0744, "step": 943000 }, { "epoch": 7.73, "learning_rate": 6.930740271909781e-06, "loss": 3.0734, "step": 943500 }, { "epoch": 7.73, "learning_rate": 6.929096319189861e-06, "loss": 3.0716, "step": 944000 }, { "epoch": 7.74, "learning_rate": 6.927452366469941e-06, "loss": 3.0772, "step": 944500 }, { "epoch": 7.74, "learning_rate": 6.925808413750021e-06, "loss": 3.0681, "step": 945000 }, { "epoch": 7.75, "learning_rate": 6.924164461030102e-06, "loss": 3.0786, "step": 945500 }, { "epoch": 7.75, "learning_rate": 6.922520508310182e-06, "loss": 3.075, "step": 946000 }, { "epoch": 7.75, "learning_rate": 6.9208765555902615e-06, "loss": 3.0774, "step": 946500 }, { "epoch": 7.76, "learning_rate": 6.919232602870342e-06, "loss": 3.0651, "step": 947000 }, { "epoch": 7.76, "learning_rate": 6.917588650150422e-06, "loss": 3.0708, "step": 947500 }, { "epoch": 7.77, "learning_rate": 6.915944697430502e-06, "loss": 3.0813, "step": 948000 }, { "epoch": 7.77, "learning_rate": 6.914300744710583e-06, "loss": 3.0719, "step": 948500 }, { "epoch": 7.77, "learning_rate": 6.912656791990663e-06, "loss": 3.0739, "step": 949000 }, { "epoch": 7.78, "learning_rate": 6.911012839270743e-06, "loss": 3.07, "step": 949500 }, { "epoch": 7.78, "learning_rate": 6.909368886550823e-06, "loss": 3.0715, "step": 950000 }, { "epoch": 7.79, "learning_rate": 6.907724933830903e-06, "loss": 3.0675, "step": 950500 }, { "epoch": 7.79, "learning_rate": 6.906080981110984e-06, "loss": 3.0693, "step": 951000 }, { "epoch": 7.8, "learning_rate": 6.9044370283910645e-06, "loss": 3.0755, "step": 951500 }, { "epoch": 7.8, "learning_rate": 6.902793075671145e-06, "loss": 3.0741, "step": 952000 }, { "epoch": 7.8, "learning_rate": 6.901149122951224e-06, "loss": 3.068, "step": 952500 }, { "epoch": 7.81, "learning_rate": 6.899505170231304e-06, "loss": 3.0707, "step": 953000 }, { "epoch": 7.81, "learning_rate": 6.897861217511385e-06, "loss": 3.0731, "step": 953500 }, { "epoch": 7.82, "learning_rate": 6.896217264791465e-06, "loss": 3.0658, "step": 954000 }, { "epoch": 7.82, "learning_rate": 6.894573312071545e-06, "loss": 3.064, "step": 954500 }, { "epoch": 7.82, "learning_rate": 6.892929359351625e-06, "loss": 3.0747, "step": 955000 }, { "epoch": 7.83, "learning_rate": 6.8912854066317055e-06, "loss": 3.0731, "step": 955500 }, { "epoch": 7.83, "learning_rate": 6.8896414539117865e-06, "loss": 3.0695, "step": 956000 }, { "epoch": 7.84, "learning_rate": 6.887997501191867e-06, "loss": 3.0668, "step": 956500 }, { "epoch": 7.84, "learning_rate": 6.886353548471947e-06, "loss": 3.0691, "step": 957000 }, { "epoch": 7.84, "learning_rate": 6.884709595752027e-06, "loss": 3.068, "step": 957500 }, { "epoch": 7.85, "learning_rate": 6.883065643032108e-06, "loss": 3.0675, "step": 958000 }, { "epoch": 7.85, "learning_rate": 6.881421690312186e-06, "loss": 3.0692, "step": 958500 }, { "epoch": 7.86, "learning_rate": 6.879777737592267e-06, "loss": 3.0684, "step": 959000 }, { "epoch": 7.86, "learning_rate": 6.878133784872347e-06, "loss": 3.0657, "step": 959500 }, { "epoch": 7.87, "learning_rate": 6.8764898321524275e-06, "loss": 3.0709, "step": 960000 }, { "epoch": 7.87, "eval_accuracy": 0.4803523362943657, "eval_loss": 2.925929307937622, "eval_runtime": 423.4884, "eval_samples_per_second": 728.133, "eval_steps_per_second": 15.172, "step": 960000 }, { "epoch": 7.87, "learning_rate": 6.874845879432508e-06, "loss": 3.0705, "step": 960500 }, { "epoch": 7.87, "learning_rate": 6.873201926712589e-06, "loss": 3.0672, "step": 961000 }, { "epoch": 7.88, "learning_rate": 6.871557973992669e-06, "loss": 3.064, "step": 961500 }, { "epoch": 7.88, "learning_rate": 6.869914021272749e-06, "loss": 3.0644, "step": 962000 }, { "epoch": 7.89, "learning_rate": 6.868270068552829e-06, "loss": 3.0636, "step": 962500 }, { "epoch": 7.89, "learning_rate": 6.866626115832909e-06, "loss": 3.0693, "step": 963000 }, { "epoch": 7.89, "learning_rate": 6.86498216311299e-06, "loss": 3.0597, "step": 963500 }, { "epoch": 7.9, "learning_rate": 6.86333821039307e-06, "loss": 3.0646, "step": 964000 }, { "epoch": 7.9, "learning_rate": 6.8616942576731495e-06, "loss": 3.0692, "step": 964500 }, { "epoch": 7.91, "learning_rate": 6.86005030495323e-06, "loss": 3.0657, "step": 965000 }, { "epoch": 7.91, "learning_rate": 6.85840635223331e-06, "loss": 3.061, "step": 965500 }, { "epoch": 7.91, "learning_rate": 6.856762399513391e-06, "loss": 3.0604, "step": 966000 }, { "epoch": 7.92, "learning_rate": 6.855118446793471e-06, "loss": 3.062, "step": 966500 }, { "epoch": 7.92, "learning_rate": 6.853474494073551e-06, "loss": 3.0679, "step": 967000 }, { "epoch": 7.93, "learning_rate": 6.851830541353631e-06, "loss": 3.0556, "step": 967500 }, { "epoch": 7.93, "learning_rate": 6.850186588633711e-06, "loss": 3.0666, "step": 968000 }, { "epoch": 7.93, "learning_rate": 6.848542635913792e-06, "loss": 3.0748, "step": 968500 }, { "epoch": 7.94, "learning_rate": 6.846898683193872e-06, "loss": 3.0643, "step": 969000 }, { "epoch": 7.94, "learning_rate": 6.8452547304739524e-06, "loss": 3.0599, "step": 969500 }, { "epoch": 7.95, "learning_rate": 6.8436107777540326e-06, "loss": 3.0642, "step": 970000 }, { "epoch": 7.95, "learning_rate": 6.841966825034112e-06, "loss": 3.0588, "step": 970500 }, { "epoch": 7.96, "learning_rate": 6.840322872314192e-06, "loss": 3.058, "step": 971000 }, { "epoch": 7.96, "learning_rate": 6.838678919594273e-06, "loss": 3.0621, "step": 971500 }, { "epoch": 7.96, "learning_rate": 6.837034966874353e-06, "loss": 3.0656, "step": 972000 }, { "epoch": 7.97, "learning_rate": 6.835391014154433e-06, "loss": 3.0681, "step": 972500 }, { "epoch": 7.97, "learning_rate": 6.833747061434513e-06, "loss": 3.0672, "step": 973000 }, { "epoch": 7.98, "learning_rate": 6.832103108714594e-06, "loss": 3.068, "step": 973500 }, { "epoch": 7.98, "learning_rate": 6.8304591559946744e-06, "loss": 3.0616, "step": 974000 }, { "epoch": 7.98, "learning_rate": 6.8288152032747546e-06, "loss": 3.0651, "step": 974500 }, { "epoch": 7.99, "learning_rate": 6.827171250554835e-06, "loss": 3.0669, "step": 975000 }, { "epoch": 7.99, "learning_rate": 6.825527297834915e-06, "loss": 3.0668, "step": 975500 }, { "epoch": 8.0, "learning_rate": 6.823883345114996e-06, "loss": 3.0692, "step": 976000 }, { "epoch": 8.0, "learning_rate": 6.822239392395076e-06, "loss": 3.0622, "step": 976500 }, { "epoch": 8.0, "learning_rate": 6.820595439675155e-06, "loss": 3.0618, "step": 977000 }, { "epoch": 8.01, "learning_rate": 6.818951486955235e-06, "loss": 3.0672, "step": 977500 }, { "epoch": 8.01, "learning_rate": 6.8173075342353154e-06, "loss": 3.0686, "step": 978000 }, { "epoch": 8.02, "learning_rate": 6.8156635815153956e-06, "loss": 3.065, "step": 978500 }, { "epoch": 8.02, "learning_rate": 6.8140196287954765e-06, "loss": 3.0568, "step": 979000 }, { "epoch": 8.02, "learning_rate": 6.812375676075557e-06, "loss": 3.0605, "step": 979500 }, { "epoch": 8.03, "learning_rate": 6.810731723355637e-06, "loss": 3.0586, "step": 980000 }, { "epoch": 8.03, "learning_rate": 6.809087770635717e-06, "loss": 3.061, "step": 980500 }, { "epoch": 8.04, "learning_rate": 6.807443817915798e-06, "loss": 3.0603, "step": 981000 }, { "epoch": 8.04, "learning_rate": 6.805799865195878e-06, "loss": 3.0591, "step": 981500 }, { "epoch": 8.05, "learning_rate": 6.804155912475958e-06, "loss": 3.0645, "step": 982000 }, { "epoch": 8.05, "learning_rate": 6.802511959756038e-06, "loss": 3.0609, "step": 982500 }, { "epoch": 8.05, "learning_rate": 6.8008680070361176e-06, "loss": 3.0596, "step": 983000 }, { "epoch": 8.06, "learning_rate": 6.799224054316198e-06, "loss": 3.0609, "step": 983500 }, { "epoch": 8.06, "learning_rate": 6.797580101596279e-06, "loss": 3.0547, "step": 984000 }, { "epoch": 8.07, "learning_rate": 6.795936148876359e-06, "loss": 3.0566, "step": 984500 }, { "epoch": 8.07, "learning_rate": 6.794292196156439e-06, "loss": 3.0511, "step": 985000 }, { "epoch": 8.07, "learning_rate": 6.792648243436519e-06, "loss": 3.0621, "step": 985500 }, { "epoch": 8.08, "learning_rate": 6.7910042907166e-06, "loss": 3.0607, "step": 986000 }, { "epoch": 8.08, "learning_rate": 6.78936033799668e-06, "loss": 3.0596, "step": 986500 }, { "epoch": 8.09, "learning_rate": 6.78771638527676e-06, "loss": 3.0615, "step": 987000 }, { "epoch": 8.09, "learning_rate": 6.78607243255684e-06, "loss": 3.0581, "step": 987500 }, { "epoch": 8.09, "learning_rate": 6.7844284798369205e-06, "loss": 3.0586, "step": 988000 }, { "epoch": 8.1, "learning_rate": 6.7827845271170015e-06, "loss": 3.0569, "step": 988500 }, { "epoch": 8.1, "learning_rate": 6.781140574397081e-06, "loss": 3.0522, "step": 989000 }, { "epoch": 8.11, "learning_rate": 6.779496621677161e-06, "loss": 3.0566, "step": 989500 }, { "epoch": 8.11, "learning_rate": 6.777852668957241e-06, "loss": 3.0528, "step": 990000 }, { "epoch": 8.11, "eval_accuracy": 0.4812049123013876, "eval_loss": 2.920849323272705, "eval_runtime": 415.0385, "eval_samples_per_second": 742.958, "eval_steps_per_second": 15.48, "step": 990000 }, { "epoch": 8.11, "learning_rate": 6.776208716237321e-06, "loss": 3.0549, "step": 990500 }, { "epoch": 8.12, "learning_rate": 6.774564763517401e-06, "loss": 3.0566, "step": 991000 }, { "epoch": 8.12, "learning_rate": 6.772920810797482e-06, "loss": 3.0576, "step": 991500 }, { "epoch": 8.13, "learning_rate": 6.771276858077562e-06, "loss": 3.0539, "step": 992000 }, { "epoch": 8.13, "learning_rate": 6.7696329053576425e-06, "loss": 3.0525, "step": 992500 }, { "epoch": 8.14, "learning_rate": 6.767988952637723e-06, "loss": 3.0545, "step": 993000 }, { "epoch": 8.14, "learning_rate": 6.766344999917804e-06, "loss": 3.0543, "step": 993500 }, { "epoch": 8.14, "learning_rate": 6.764701047197884e-06, "loss": 3.0492, "step": 994000 }, { "epoch": 8.15, "learning_rate": 6.763057094477964e-06, "loss": 3.0608, "step": 994500 }, { "epoch": 8.15, "learning_rate": 6.761413141758043e-06, "loss": 3.0615, "step": 995000 }, { "epoch": 8.16, "learning_rate": 6.759769189038123e-06, "loss": 3.0605, "step": 995500 }, { "epoch": 8.16, "learning_rate": 6.758125236318203e-06, "loss": 3.0584, "step": 996000 }, { "epoch": 8.16, "learning_rate": 6.756481283598284e-06, "loss": 3.0554, "step": 996500 }, { "epoch": 8.17, "learning_rate": 6.7548373308783645e-06, "loss": 3.0558, "step": 997000 }, { "epoch": 8.17, "learning_rate": 6.753193378158445e-06, "loss": 3.056, "step": 997500 }, { "epoch": 8.18, "learning_rate": 6.751549425438525e-06, "loss": 3.0517, "step": 998000 }, { "epoch": 8.18, "learning_rate": 6.749905472718606e-06, "loss": 3.0549, "step": 998500 }, { "epoch": 8.18, "learning_rate": 6.748261519998686e-06, "loss": 3.0613, "step": 999000 }, { "epoch": 8.19, "learning_rate": 6.746617567278766e-06, "loss": 3.0511, "step": 999500 }, { "epoch": 8.19, "learning_rate": 6.744973614558846e-06, "loss": 3.0535, "step": 1000000 }, { "epoch": 8.2, "learning_rate": 6.743329661838926e-06, "loss": 3.0498, "step": 1000500 }, { "epoch": 8.2, "learning_rate": 6.7416857091190055e-06, "loss": 3.0545, "step": 1001000 }, { "epoch": 8.21, "learning_rate": 6.7400417563990865e-06, "loss": 3.0522, "step": 1001500 }, { "epoch": 8.21, "learning_rate": 6.738397803679167e-06, "loss": 3.0562, "step": 1002000 }, { "epoch": 8.21, "learning_rate": 6.736753850959247e-06, "loss": 3.0551, "step": 1002500 }, { "epoch": 8.22, "learning_rate": 6.735109898239327e-06, "loss": 3.0594, "step": 1003000 }, { "epoch": 8.22, "learning_rate": 6.733465945519407e-06, "loss": 3.0588, "step": 1003500 }, { "epoch": 8.23, "learning_rate": 6.731821992799488e-06, "loss": 3.0536, "step": 1004000 }, { "epoch": 8.23, "learning_rate": 6.730178040079568e-06, "loss": 3.0492, "step": 1004500 }, { "epoch": 8.23, "learning_rate": 6.728534087359648e-06, "loss": 3.049, "step": 1005000 }, { "epoch": 8.24, "learning_rate": 6.726890134639728e-06, "loss": 3.0574, "step": 1005500 }, { "epoch": 8.24, "learning_rate": 6.725246181919809e-06, "loss": 3.0467, "step": 1006000 }, { "epoch": 8.25, "learning_rate": 6.7236022291998894e-06, "loss": 3.0529, "step": 1006500 }, { "epoch": 8.25, "learning_rate": 6.721958276479969e-06, "loss": 3.0568, "step": 1007000 }, { "epoch": 8.25, "learning_rate": 6.720314323760049e-06, "loss": 3.0447, "step": 1007500 }, { "epoch": 8.26, "learning_rate": 6.718670371040129e-06, "loss": 3.0549, "step": 1008000 }, { "epoch": 8.26, "learning_rate": 6.717026418320209e-06, "loss": 3.0504, "step": 1008500 }, { "epoch": 8.27, "learning_rate": 6.71538246560029e-06, "loss": 3.0499, "step": 1009000 }, { "epoch": 8.27, "learning_rate": 6.71373851288037e-06, "loss": 3.0537, "step": 1009500 }, { "epoch": 8.27, "learning_rate": 6.71209456016045e-06, "loss": 3.0496, "step": 1010000 }, { "epoch": 8.28, "learning_rate": 6.7104506074405304e-06, "loss": 3.0458, "step": 1010500 }, { "epoch": 8.28, "learning_rate": 6.708806654720611e-06, "loss": 3.047, "step": 1011000 }, { "epoch": 8.29, "learning_rate": 6.7071627020006915e-06, "loss": 3.0525, "step": 1011500 }, { "epoch": 8.29, "learning_rate": 6.705518749280772e-06, "loss": 3.0566, "step": 1012000 }, { "epoch": 8.3, "learning_rate": 6.703874796560852e-06, "loss": 3.0503, "step": 1012500 }, { "epoch": 8.3, "learning_rate": 6.702230843840931e-06, "loss": 3.0531, "step": 1013000 }, { "epoch": 8.3, "learning_rate": 6.700586891121011e-06, "loss": 3.0495, "step": 1013500 }, { "epoch": 8.31, "learning_rate": 6.698942938401092e-06, "loss": 3.052, "step": 1014000 }, { "epoch": 8.31, "learning_rate": 6.697298985681172e-06, "loss": 3.0462, "step": 1014500 }, { "epoch": 8.32, "learning_rate": 6.695655032961252e-06, "loss": 3.0589, "step": 1015000 }, { "epoch": 8.32, "learning_rate": 6.6940110802413325e-06, "loss": 3.0518, "step": 1015500 }, { "epoch": 8.32, "learning_rate": 6.692367127521413e-06, "loss": 3.0572, "step": 1016000 }, { "epoch": 8.33, "learning_rate": 6.690723174801494e-06, "loss": 3.0515, "step": 1016500 }, { "epoch": 8.33, "learning_rate": 6.689079222081574e-06, "loss": 3.0498, "step": 1017000 }, { "epoch": 8.34, "learning_rate": 6.687435269361654e-06, "loss": 3.0501, "step": 1017500 }, { "epoch": 8.34, "learning_rate": 6.685791316641734e-06, "loss": 3.0552, "step": 1018000 }, { "epoch": 8.34, "learning_rate": 6.684147363921815e-06, "loss": 3.049, "step": 1018500 }, { "epoch": 8.35, "learning_rate": 6.6825034112018934e-06, "loss": 3.0508, "step": 1019000 }, { "epoch": 8.35, "learning_rate": 6.680859458481974e-06, "loss": 3.0454, "step": 1019500 }, { "epoch": 8.36, "learning_rate": 6.6792155057620545e-06, "loss": 3.0541, "step": 1020000 }, { "epoch": 8.36, "eval_accuracy": 0.4829413695036341, "eval_loss": 2.90887713432312, "eval_runtime": 413.3015, "eval_samples_per_second": 746.08, "eval_steps_per_second": 15.546, "step": 1020000 }, { "epoch": 8.36, "learning_rate": 6.677571553042135e-06, "loss": 3.0473, "step": 1020500 }, { "epoch": 8.36, "learning_rate": 6.675927600322215e-06, "loss": 3.046, "step": 1021000 }, { "epoch": 8.37, "learning_rate": 6.674283647602296e-06, "loss": 3.0498, "step": 1021500 }, { "epoch": 8.37, "learning_rate": 6.672639694882376e-06, "loss": 3.0504, "step": 1022000 }, { "epoch": 8.38, "learning_rate": 6.670995742162456e-06, "loss": 3.0417, "step": 1022500 }, { "epoch": 8.38, "learning_rate": 6.669351789442536e-06, "loss": 3.0592, "step": 1023000 }, { "epoch": 8.39, "learning_rate": 6.667707836722616e-06, "loss": 3.0561, "step": 1023500 }, { "epoch": 8.39, "learning_rate": 6.666063884002697e-06, "loss": 3.0394, "step": 1024000 }, { "epoch": 8.39, "learning_rate": 6.664419931282777e-06, "loss": 3.0459, "step": 1024500 }, { "epoch": 8.4, "learning_rate": 6.6627759785628575e-06, "loss": 3.0518, "step": 1025000 }, { "epoch": 8.4, "learning_rate": 6.661132025842937e-06, "loss": 3.0447, "step": 1025500 }, { "epoch": 8.41, "learning_rate": 6.659488073123017e-06, "loss": 3.0462, "step": 1026000 }, { "epoch": 8.41, "learning_rate": 6.657844120403098e-06, "loss": 3.052, "step": 1026500 }, { "epoch": 8.41, "learning_rate": 6.656200167683178e-06, "loss": 3.0463, "step": 1027000 }, { "epoch": 8.42, "learning_rate": 6.654556214963258e-06, "loss": 3.0511, "step": 1027500 }, { "epoch": 8.42, "learning_rate": 6.652912262243338e-06, "loss": 3.0508, "step": 1028000 }, { "epoch": 8.43, "learning_rate": 6.651268309523418e-06, "loss": 3.0476, "step": 1028500 }, { "epoch": 8.43, "learning_rate": 6.649624356803499e-06, "loss": 3.0537, "step": 1029000 }, { "epoch": 8.43, "learning_rate": 6.6479804040835795e-06, "loss": 3.046, "step": 1029500 }, { "epoch": 8.44, "learning_rate": 6.64633645136366e-06, "loss": 3.0525, "step": 1030000 }, { "epoch": 8.44, "learning_rate": 6.64469249864374e-06, "loss": 3.0457, "step": 1030500 }, { "epoch": 8.45, "learning_rate": 6.643048545923821e-06, "loss": 3.0466, "step": 1031000 }, { "epoch": 8.45, "learning_rate": 6.641404593203899e-06, "loss": 3.0431, "step": 1031500 }, { "epoch": 8.45, "learning_rate": 6.63976064048398e-06, "loss": 3.0475, "step": 1032000 }, { "epoch": 8.46, "learning_rate": 6.63811668776406e-06, "loss": 3.0473, "step": 1032500 }, { "epoch": 8.46, "learning_rate": 6.63647273504414e-06, "loss": 3.0418, "step": 1033000 }, { "epoch": 8.47, "learning_rate": 6.6348287823242205e-06, "loss": 3.0426, "step": 1033500 }, { "epoch": 8.47, "learning_rate": 6.6331848296043015e-06, "loss": 3.0471, "step": 1034000 }, { "epoch": 8.48, "learning_rate": 6.631540876884382e-06, "loss": 3.0357, "step": 1034500 }, { "epoch": 8.48, "learning_rate": 6.629896924164462e-06, "loss": 3.0484, "step": 1035000 }, { "epoch": 8.48, "learning_rate": 6.628252971444542e-06, "loss": 3.051, "step": 1035500 }, { "epoch": 8.49, "learning_rate": 6.626609018724622e-06, "loss": 3.0439, "step": 1036000 }, { "epoch": 8.49, "learning_rate": 6.624965066004703e-06, "loss": 3.0461, "step": 1036500 }, { "epoch": 8.5, "learning_rate": 6.623321113284783e-06, "loss": 3.0467, "step": 1037000 }, { "epoch": 8.5, "learning_rate": 6.621677160564862e-06, "loss": 3.047, "step": 1037500 }, { "epoch": 8.5, "learning_rate": 6.6200332078449425e-06, "loss": 3.0444, "step": 1038000 }, { "epoch": 8.51, "learning_rate": 6.618389255125023e-06, "loss": 3.0433, "step": 1038500 }, { "epoch": 8.51, "learning_rate": 6.616745302405104e-06, "loss": 3.0416, "step": 1039000 }, { "epoch": 8.52, "learning_rate": 6.615101349685184e-06, "loss": 3.042, "step": 1039500 }, { "epoch": 8.52, "learning_rate": 6.613457396965264e-06, "loss": 3.042, "step": 1040000 }, { "epoch": 8.52, "learning_rate": 6.611813444245344e-06, "loss": 3.0503, "step": 1040500 }, { "epoch": 8.53, "learning_rate": 6.610169491525424e-06, "loss": 3.0503, "step": 1041000 }, { "epoch": 8.53, "learning_rate": 6.608525538805505e-06, "loss": 3.0391, "step": 1041500 }, { "epoch": 8.54, "learning_rate": 6.606881586085585e-06, "loss": 3.0456, "step": 1042000 }, { "epoch": 8.54, "learning_rate": 6.605237633365665e-06, "loss": 3.0408, "step": 1042500 }, { "epoch": 8.55, "learning_rate": 6.6035936806457454e-06, "loss": 3.0393, "step": 1043000 }, { "epoch": 8.55, "learning_rate": 6.601949727925825e-06, "loss": 3.048, "step": 1043500 }, { "epoch": 8.55, "learning_rate": 6.600305775205905e-06, "loss": 3.0361, "step": 1044000 }, { "epoch": 8.56, "learning_rate": 6.598661822485986e-06, "loss": 3.0415, "step": 1044500 }, { "epoch": 8.56, "learning_rate": 6.597017869766066e-06, "loss": 3.0402, "step": 1045000 }, { "epoch": 8.57, "learning_rate": 6.595373917046146e-06, "loss": 3.0475, "step": 1045500 }, { "epoch": 8.57, "learning_rate": 6.593729964326226e-06, "loss": 3.044, "step": 1046000 }, { "epoch": 8.57, "learning_rate": 6.592086011606307e-06, "loss": 3.0376, "step": 1046500 }, { "epoch": 8.58, "learning_rate": 6.590442058886387e-06, "loss": 3.0411, "step": 1047000 }, { "epoch": 8.58, "learning_rate": 6.588798106166467e-06, "loss": 3.0327, "step": 1047500 }, { "epoch": 8.59, "learning_rate": 6.5871541534465475e-06, "loss": 3.038, "step": 1048000 }, { "epoch": 8.59, "learning_rate": 6.585510200726628e-06, "loss": 3.0352, "step": 1048500 }, { "epoch": 8.59, "learning_rate": 6.583866248006709e-06, "loss": 3.0367, "step": 1049000 }, { "epoch": 8.6, "learning_rate": 6.582222295286788e-06, "loss": 3.0446, "step": 1049500 }, { "epoch": 8.6, "learning_rate": 6.580578342566868e-06, "loss": 3.0469, "step": 1050000 }, { "epoch": 8.6, "eval_accuracy": 0.4838990642400208, "eval_loss": 2.9014947414398193, "eval_runtime": 404.752, "eval_samples_per_second": 761.839, "eval_steps_per_second": 15.874, "step": 1050000 }, { "epoch": 8.61, "learning_rate": 6.578934389846948e-06, "loss": 3.0436, "step": 1050500 }, { "epoch": 8.61, "learning_rate": 6.577290437127028e-06, "loss": 3.0516, "step": 1051000 }, { "epoch": 8.61, "learning_rate": 6.5756464844071084e-06, "loss": 3.046, "step": 1051500 }, { "epoch": 8.62, "learning_rate": 6.574002531687189e-06, "loss": 3.0435, "step": 1052000 }, { "epoch": 8.62, "learning_rate": 6.5723585789672695e-06, "loss": 3.0383, "step": 1052500 }, { "epoch": 8.63, "learning_rate": 6.57071462624735e-06, "loss": 3.0484, "step": 1053000 }, { "epoch": 8.63, "learning_rate": 6.56907067352743e-06, "loss": 3.0375, "step": 1053500 }, { "epoch": 8.64, "learning_rate": 6.567426720807511e-06, "loss": 3.0459, "step": 1054000 }, { "epoch": 8.64, "learning_rate": 6.565782768087591e-06, "loss": 3.0424, "step": 1054500 }, { "epoch": 8.64, "learning_rate": 6.564138815367671e-06, "loss": 3.0355, "step": 1055000 }, { "epoch": 8.65, "learning_rate": 6.56249486264775e-06, "loss": 3.0374, "step": 1055500 }, { "epoch": 8.65, "learning_rate": 6.56085090992783e-06, "loss": 3.0422, "step": 1056000 }, { "epoch": 8.66, "learning_rate": 6.5592069572079105e-06, "loss": 3.0372, "step": 1056500 }, { "epoch": 8.66, "learning_rate": 6.5575630044879915e-06, "loss": 3.0446, "step": 1057000 }, { "epoch": 8.66, "learning_rate": 6.555919051768072e-06, "loss": 3.0456, "step": 1057500 }, { "epoch": 8.67, "learning_rate": 6.554275099048152e-06, "loss": 3.0372, "step": 1058000 }, { "epoch": 8.67, "learning_rate": 6.552631146328232e-06, "loss": 3.0409, "step": 1058500 }, { "epoch": 8.68, "learning_rate": 6.550987193608313e-06, "loss": 3.0376, "step": 1059000 }, { "epoch": 8.68, "learning_rate": 6.549343240888393e-06, "loss": 3.0434, "step": 1059500 }, { "epoch": 8.68, "learning_rate": 6.547699288168473e-06, "loss": 3.0451, "step": 1060000 }, { "epoch": 8.69, "learning_rate": 6.546055335448553e-06, "loss": 3.0341, "step": 1060500 }, { "epoch": 8.69, "learning_rate": 6.544411382728633e-06, "loss": 3.0415, "step": 1061000 }, { "epoch": 8.7, "learning_rate": 6.542767430008713e-06, "loss": 3.0404, "step": 1061500 }, { "epoch": 8.7, "learning_rate": 6.541123477288794e-06, "loss": 3.0347, "step": 1062000 }, { "epoch": 8.7, "learning_rate": 6.539479524568874e-06, "loss": 3.0389, "step": 1062500 }, { "epoch": 8.71, "learning_rate": 6.537835571848954e-06, "loss": 3.044, "step": 1063000 }, { "epoch": 8.71, "learning_rate": 6.536191619129034e-06, "loss": 3.0347, "step": 1063500 }, { "epoch": 8.72, "learning_rate": 6.534547666409114e-06, "loss": 3.0444, "step": 1064000 }, { "epoch": 8.72, "learning_rate": 6.532903713689195e-06, "loss": 3.0326, "step": 1064500 }, { "epoch": 8.73, "learning_rate": 6.531259760969275e-06, "loss": 3.0371, "step": 1065000 }, { "epoch": 8.73, "learning_rate": 6.529615808249355e-06, "loss": 3.0326, "step": 1065500 }, { "epoch": 8.73, "learning_rate": 6.5279718555294355e-06, "loss": 3.0373, "step": 1066000 }, { "epoch": 8.74, "learning_rate": 6.5263279028095165e-06, "loss": 3.0276, "step": 1066500 }, { "epoch": 8.74, "learning_rate": 6.524683950089597e-06, "loss": 3.0328, "step": 1067000 }, { "epoch": 8.75, "learning_rate": 6.523039997369676e-06, "loss": 3.0332, "step": 1067500 }, { "epoch": 8.75, "learning_rate": 6.521396044649756e-06, "loss": 3.0334, "step": 1068000 }, { "epoch": 8.75, "learning_rate": 6.519752091929836e-06, "loss": 3.0387, "step": 1068500 }, { "epoch": 8.76, "learning_rate": 6.518108139209916e-06, "loss": 3.0343, "step": 1069000 }, { "epoch": 8.76, "learning_rate": 6.516464186489997e-06, "loss": 3.0377, "step": 1069500 }, { "epoch": 8.77, "learning_rate": 6.514820233770077e-06, "loss": 3.0383, "step": 1070000 }, { "epoch": 8.77, "learning_rate": 6.5131762810501575e-06, "loss": 3.0366, "step": 1070500 }, { "epoch": 8.77, "learning_rate": 6.511532328330238e-06, "loss": 3.0359, "step": 1071000 }, { "epoch": 8.78, "learning_rate": 6.5098883756103186e-06, "loss": 3.0398, "step": 1071500 }, { "epoch": 8.78, "learning_rate": 6.508244422890399e-06, "loss": 3.037, "step": 1072000 }, { "epoch": 8.79, "learning_rate": 6.506600470170479e-06, "loss": 3.0314, "step": 1072500 }, { "epoch": 8.79, "learning_rate": 6.504956517450559e-06, "loss": 3.0387, "step": 1073000 }, { "epoch": 8.79, "learning_rate": 6.503312564730639e-06, "loss": 3.0297, "step": 1073500 }, { "epoch": 8.8, "learning_rate": 6.501668612010718e-06, "loss": 3.0405, "step": 1074000 }, { "epoch": 8.8, "learning_rate": 6.500024659290799e-06, "loss": 3.0421, "step": 1074500 }, { "epoch": 8.81, "learning_rate": 6.4983807065708795e-06, "loss": 3.0287, "step": 1075000 }, { "epoch": 8.81, "learning_rate": 6.49673675385096e-06, "loss": 3.0283, "step": 1075500 }, { "epoch": 8.82, "learning_rate": 6.49509280113104e-06, "loss": 3.0377, "step": 1076000 }, { "epoch": 8.82, "learning_rate": 6.49344884841112e-06, "loss": 3.0367, "step": 1076500 }, { "epoch": 8.82, "learning_rate": 6.491804895691201e-06, "loss": 3.0373, "step": 1077000 }, { "epoch": 8.83, "learning_rate": 6.490160942971281e-06, "loss": 3.0327, "step": 1077500 }, { "epoch": 8.83, "learning_rate": 6.488516990251361e-06, "loss": 3.0261, "step": 1078000 }, { "epoch": 8.84, "learning_rate": 6.486873037531441e-06, "loss": 3.0347, "step": 1078500 }, { "epoch": 8.84, "learning_rate": 6.485229084811522e-06, "loss": 3.0382, "step": 1079000 }, { "epoch": 8.84, "learning_rate": 6.483585132091602e-06, "loss": 3.0369, "step": 1079500 }, { "epoch": 8.85, "learning_rate": 6.4819411793716816e-06, "loss": 3.0377, "step": 1080000 }, { "epoch": 8.85, "eval_accuracy": 0.4847903545689289, "eval_loss": 2.895965814590454, "eval_runtime": 407.6957, "eval_samples_per_second": 756.339, "eval_steps_per_second": 15.759, "step": 1080000 }, { "epoch": 8.85, "learning_rate": 6.480297226651762e-06, "loss": 3.0378, "step": 1080500 }, { "epoch": 8.86, "learning_rate": 6.478653273931842e-06, "loss": 3.0374, "step": 1081000 }, { "epoch": 8.86, "learning_rate": 6.477009321211922e-06, "loss": 3.0315, "step": 1081500 }, { "epoch": 8.86, "learning_rate": 6.475365368492003e-06, "loss": 3.032, "step": 1082000 }, { "epoch": 8.87, "learning_rate": 6.473721415772083e-06, "loss": 3.0362, "step": 1082500 }, { "epoch": 8.87, "learning_rate": 6.472077463052163e-06, "loss": 3.0368, "step": 1083000 }, { "epoch": 8.88, "learning_rate": 6.470433510332243e-06, "loss": 3.0297, "step": 1083500 }, { "epoch": 8.88, "learning_rate": 6.468789557612324e-06, "loss": 3.031, "step": 1084000 }, { "epoch": 8.89, "learning_rate": 6.467145604892404e-06, "loss": 3.0321, "step": 1084500 }, { "epoch": 8.89, "learning_rate": 6.4655016521724845e-06, "loss": 3.0335, "step": 1085000 }, { "epoch": 8.89, "learning_rate": 6.463857699452565e-06, "loss": 3.0344, "step": 1085500 }, { "epoch": 8.9, "learning_rate": 6.462213746732644e-06, "loss": 3.0378, "step": 1086000 }, { "epoch": 8.9, "learning_rate": 6.460569794012724e-06, "loss": 3.0306, "step": 1086500 }, { "epoch": 8.91, "learning_rate": 6.458925841292805e-06, "loss": 3.0363, "step": 1087000 }, { "epoch": 8.91, "learning_rate": 6.457281888572885e-06, "loss": 3.0303, "step": 1087500 }, { "epoch": 8.91, "learning_rate": 6.455637935852965e-06, "loss": 3.0305, "step": 1088000 }, { "epoch": 8.92, "learning_rate": 6.453993983133045e-06, "loss": 3.0352, "step": 1088500 }, { "epoch": 8.92, "learning_rate": 6.4523500304131255e-06, "loss": 3.0338, "step": 1089000 }, { "epoch": 8.93, "learning_rate": 6.4507060776932065e-06, "loss": 3.036, "step": 1089500 }, { "epoch": 8.93, "learning_rate": 6.449062124973287e-06, "loss": 3.0388, "step": 1090000 }, { "epoch": 8.93, "learning_rate": 6.447418172253367e-06, "loss": 3.0292, "step": 1090500 }, { "epoch": 8.94, "learning_rate": 6.445774219533447e-06, "loss": 3.0351, "step": 1091000 }, { "epoch": 8.94, "learning_rate": 6.444130266813528e-06, "loss": 3.0325, "step": 1091500 }, { "epoch": 8.95, "learning_rate": 6.442486314093606e-06, "loss": 3.0258, "step": 1092000 }, { "epoch": 8.95, "learning_rate": 6.440842361373687e-06, "loss": 3.0293, "step": 1092500 }, { "epoch": 8.95, "learning_rate": 6.439198408653767e-06, "loss": 3.0273, "step": 1093000 }, { "epoch": 8.96, "learning_rate": 6.4375544559338475e-06, "loss": 3.0372, "step": 1093500 }, { "epoch": 8.96, "learning_rate": 6.435910503213928e-06, "loss": 3.0329, "step": 1094000 }, { "epoch": 8.97, "learning_rate": 6.434266550494009e-06, "loss": 3.0329, "step": 1094500 }, { "epoch": 8.97, "learning_rate": 6.432622597774089e-06, "loss": 3.0295, "step": 1095000 }, { "epoch": 8.98, "learning_rate": 6.430978645054169e-06, "loss": 3.0264, "step": 1095500 }, { "epoch": 8.98, "learning_rate": 6.429334692334249e-06, "loss": 3.028, "step": 1096000 }, { "epoch": 8.98, "learning_rate": 6.427690739614329e-06, "loss": 3.0408, "step": 1096500 }, { "epoch": 8.99, "learning_rate": 6.42604678689441e-06, "loss": 3.0269, "step": 1097000 }, { "epoch": 8.99, "learning_rate": 6.42440283417449e-06, "loss": 3.0304, "step": 1097500 }, { "epoch": 9.0, "learning_rate": 6.4227588814545695e-06, "loss": 3.0328, "step": 1098000 }, { "epoch": 9.0, "learning_rate": 6.42111492873465e-06, "loss": 3.0254, "step": 1098500 }, { "epoch": 9.0, "learning_rate": 6.41947097601473e-06, "loss": 3.0365, "step": 1099000 }, { "epoch": 9.01, "learning_rate": 6.417827023294811e-06, "loss": 3.0218, "step": 1099500 }, { "epoch": 9.01, "learning_rate": 6.416183070574891e-06, "loss": 3.0276, "step": 1100000 }, { "epoch": 9.02, "learning_rate": 6.414539117854971e-06, "loss": 3.0299, "step": 1100500 }, { "epoch": 9.02, "learning_rate": 6.412895165135051e-06, "loss": 3.0222, "step": 1101000 }, { "epoch": 9.02, "learning_rate": 6.411251212415131e-06, "loss": 3.0279, "step": 1101500 }, { "epoch": 9.03, "learning_rate": 6.409607259695212e-06, "loss": 3.0317, "step": 1102000 }, { "epoch": 9.03, "learning_rate": 6.407963306975292e-06, "loss": 3.0244, "step": 1102500 }, { "epoch": 9.04, "learning_rate": 6.4063193542553725e-06, "loss": 3.0241, "step": 1103000 }, { "epoch": 9.04, "learning_rate": 6.404675401535453e-06, "loss": 3.0223, "step": 1103500 }, { "epoch": 9.04, "learning_rate": 6.403031448815532e-06, "loss": 3.0291, "step": 1104000 }, { "epoch": 9.05, "learning_rate": 6.401387496095612e-06, "loss": 3.0236, "step": 1104500 }, { "epoch": 9.05, "learning_rate": 6.399743543375693e-06, "loss": 3.0248, "step": 1105000 }, { "epoch": 9.06, "learning_rate": 6.398099590655773e-06, "loss": 3.0271, "step": 1105500 }, { "epoch": 9.06, "learning_rate": 6.396455637935853e-06, "loss": 3.0282, "step": 1106000 }, { "epoch": 9.07, "learning_rate": 6.394811685215933e-06, "loss": 3.0316, "step": 1106500 }, { "epoch": 9.07, "learning_rate": 6.393167732496014e-06, "loss": 3.0257, "step": 1107000 }, { "epoch": 9.07, "learning_rate": 6.3915237797760945e-06, "loss": 3.0205, "step": 1107500 }, { "epoch": 9.08, "learning_rate": 6.389879827056175e-06, "loss": 3.0294, "step": 1108000 }, { "epoch": 9.08, "learning_rate": 6.388235874336255e-06, "loss": 3.0295, "step": 1108500 }, { "epoch": 9.09, "learning_rate": 6.386591921616335e-06, "loss": 3.0257, "step": 1109000 }, { "epoch": 9.09, "learning_rate": 6.384947968896416e-06, "loss": 3.0275, "step": 1109500 }, { "epoch": 9.09, "learning_rate": 6.383304016176495e-06, "loss": 3.0284, "step": 1110000 }, { "epoch": 9.09, "eval_accuracy": 0.48608889348818574, "eval_loss": 2.885928153991699, "eval_runtime": 407.7381, "eval_samples_per_second": 756.26, "eval_steps_per_second": 15.758, "step": 1110000 }, { "epoch": 9.1, "learning_rate": 6.381660063456575e-06, "loss": 3.0216, "step": 1110500 }, { "epoch": 9.1, "learning_rate": 6.380016110736655e-06, "loss": 3.0199, "step": 1111000 }, { "epoch": 9.11, "learning_rate": 6.3783721580167355e-06, "loss": 3.0232, "step": 1111500 }, { "epoch": 9.11, "learning_rate": 6.3767282052968164e-06, "loss": 3.0248, "step": 1112000 }, { "epoch": 9.11, "learning_rate": 6.3750842525768966e-06, "loss": 3.0276, "step": 1112500 }, { "epoch": 9.12, "learning_rate": 6.373440299856977e-06, "loss": 3.0284, "step": 1113000 }, { "epoch": 9.12, "learning_rate": 6.371796347137057e-06, "loss": 3.0302, "step": 1113500 }, { "epoch": 9.13, "learning_rate": 6.370152394417137e-06, "loss": 3.0274, "step": 1114000 }, { "epoch": 9.13, "learning_rate": 6.368508441697218e-06, "loss": 3.0335, "step": 1114500 }, { "epoch": 9.14, "learning_rate": 6.366864488977298e-06, "loss": 3.0209, "step": 1115000 }, { "epoch": 9.14, "learning_rate": 6.365220536257378e-06, "loss": 3.0297, "step": 1115500 }, { "epoch": 9.14, "learning_rate": 6.3635765835374575e-06, "loss": 3.0267, "step": 1116000 }, { "epoch": 9.15, "learning_rate": 6.361932630817538e-06, "loss": 3.0261, "step": 1116500 }, { "epoch": 9.15, "learning_rate": 6.360288678097618e-06, "loss": 3.0273, "step": 1117000 }, { "epoch": 9.16, "learning_rate": 6.358644725377699e-06, "loss": 3.0163, "step": 1117500 }, { "epoch": 9.16, "learning_rate": 6.357000772657779e-06, "loss": 3.0258, "step": 1118000 }, { "epoch": 9.16, "learning_rate": 6.355356819937859e-06, "loss": 3.0321, "step": 1118500 }, { "epoch": 9.17, "learning_rate": 6.353712867217939e-06, "loss": 3.0181, "step": 1119000 }, { "epoch": 9.17, "learning_rate": 6.35206891449802e-06, "loss": 3.0194, "step": 1119500 }, { "epoch": 9.18, "learning_rate": 6.3504249617781e-06, "loss": 3.0279, "step": 1120000 }, { "epoch": 9.18, "learning_rate": 6.34878100905818e-06, "loss": 3.0236, "step": 1120500 }, { "epoch": 9.18, "learning_rate": 6.34713705633826e-06, "loss": 3.0279, "step": 1121000 }, { "epoch": 9.19, "learning_rate": 6.3454931036183405e-06, "loss": 3.026, "step": 1121500 }, { "epoch": 9.19, "learning_rate": 6.34384915089842e-06, "loss": 3.0217, "step": 1122000 }, { "epoch": 9.2, "learning_rate": 6.342205198178501e-06, "loss": 3.0299, "step": 1122500 }, { "epoch": 9.2, "learning_rate": 6.340561245458581e-06, "loss": 3.0203, "step": 1123000 }, { "epoch": 9.2, "learning_rate": 6.338917292738661e-06, "loss": 3.0224, "step": 1123500 }, { "epoch": 9.21, "learning_rate": 6.337273340018741e-06, "loss": 3.0184, "step": 1124000 }, { "epoch": 9.21, "learning_rate": 6.335629387298821e-06, "loss": 3.0215, "step": 1124500 }, { "epoch": 9.22, "learning_rate": 6.333985434578902e-06, "loss": 3.0261, "step": 1125000 }, { "epoch": 9.22, "learning_rate": 6.332341481858982e-06, "loss": 3.0242, "step": 1125500 }, { "epoch": 9.23, "learning_rate": 6.3306975291390625e-06, "loss": 3.0246, "step": 1126000 }, { "epoch": 9.23, "learning_rate": 6.329053576419143e-06, "loss": 3.0327, "step": 1126500 }, { "epoch": 9.23, "learning_rate": 6.327409623699224e-06, "loss": 3.0222, "step": 1127000 }, { "epoch": 9.24, "learning_rate": 6.325765670979304e-06, "loss": 3.0257, "step": 1127500 }, { "epoch": 9.24, "learning_rate": 6.324121718259384e-06, "loss": 3.0228, "step": 1128000 }, { "epoch": 9.25, "learning_rate": 6.322477765539463e-06, "loss": 3.0246, "step": 1128500 }, { "epoch": 9.25, "learning_rate": 6.320833812819543e-06, "loss": 3.0205, "step": 1129000 }, { "epoch": 9.25, "learning_rate": 6.319189860099623e-06, "loss": 3.0198, "step": 1129500 }, { "epoch": 9.26, "learning_rate": 6.317545907379704e-06, "loss": 3.0227, "step": 1130000 }, { "epoch": 9.26, "learning_rate": 6.3159019546597845e-06, "loss": 3.0205, "step": 1130500 }, { "epoch": 9.27, "learning_rate": 6.314258001939865e-06, "loss": 3.0218, "step": 1131000 }, { "epoch": 9.27, "learning_rate": 6.312614049219945e-06, "loss": 3.0221, "step": 1131500 }, { "epoch": 9.27, "learning_rate": 6.310970096500026e-06, "loss": 3.017, "step": 1132000 }, { "epoch": 9.28, "learning_rate": 6.309326143780106e-06, "loss": 3.0187, "step": 1132500 }, { "epoch": 9.28, "learning_rate": 6.307682191060186e-06, "loss": 3.0173, "step": 1133000 }, { "epoch": 9.29, "learning_rate": 6.306038238340266e-06, "loss": 3.0229, "step": 1133500 }, { "epoch": 9.29, "learning_rate": 6.304394285620346e-06, "loss": 3.0254, "step": 1134000 }, { "epoch": 9.29, "learning_rate": 6.3027503329004255e-06, "loss": 3.0292, "step": 1134500 }, { "epoch": 9.3, "learning_rate": 6.3011063801805065e-06, "loss": 3.0172, "step": 1135000 }, { "epoch": 9.3, "learning_rate": 6.299462427460587e-06, "loss": 3.0233, "step": 1135500 }, { "epoch": 9.31, "learning_rate": 6.297818474740667e-06, "loss": 3.0245, "step": 1136000 }, { "epoch": 9.31, "learning_rate": 6.296174522020747e-06, "loss": 3.0165, "step": 1136500 }, { "epoch": 9.32, "learning_rate": 6.294530569300827e-06, "loss": 3.0161, "step": 1137000 }, { "epoch": 9.32, "learning_rate": 6.292886616580908e-06, "loss": 3.017, "step": 1137500 }, { "epoch": 9.32, "learning_rate": 6.291242663860988e-06, "loss": 3.0244, "step": 1138000 }, { "epoch": 9.33, "learning_rate": 6.289598711141068e-06, "loss": 3.0189, "step": 1138500 }, { "epoch": 9.33, "learning_rate": 6.287954758421148e-06, "loss": 3.0196, "step": 1139000 }, { "epoch": 9.34, "learning_rate": 6.286310805701229e-06, "loss": 3.0169, "step": 1139500 }, { "epoch": 9.34, "learning_rate": 6.2846668529813095e-06, "loss": 3.0224, "step": 1140000 }, { "epoch": 9.34, "eval_accuracy": 0.4867489885373119, "eval_loss": 2.881913185119629, "eval_runtime": 406.5724, "eval_samples_per_second": 758.428, "eval_steps_per_second": 15.803, "step": 1140000 }, { "epoch": 9.34, "learning_rate": 6.283022900261389e-06, "loss": 3.0198, "step": 1140500 }, { "epoch": 9.35, "learning_rate": 6.281378947541469e-06, "loss": 3.0248, "step": 1141000 }, { "epoch": 9.35, "learning_rate": 6.279734994821549e-06, "loss": 3.0256, "step": 1141500 }, { "epoch": 9.36, "learning_rate": 6.278091042101629e-06, "loss": 3.0153, "step": 1142000 }, { "epoch": 9.36, "learning_rate": 6.27644708938171e-06, "loss": 3.0159, "step": 1142500 }, { "epoch": 9.36, "learning_rate": 6.27480313666179e-06, "loss": 3.0145, "step": 1143000 }, { "epoch": 9.37, "learning_rate": 6.27315918394187e-06, "loss": 3.0236, "step": 1143500 }, { "epoch": 9.37, "learning_rate": 6.2715152312219505e-06, "loss": 3.0184, "step": 1144000 }, { "epoch": 9.38, "learning_rate": 6.2698712785020314e-06, "loss": 3.0168, "step": 1144500 }, { "epoch": 9.38, "learning_rate": 6.2682273257821116e-06, "loss": 3.0201, "step": 1145000 }, { "epoch": 9.38, "learning_rate": 6.266583373062192e-06, "loss": 3.0186, "step": 1145500 }, { "epoch": 9.39, "learning_rate": 6.264939420342272e-06, "loss": 3.0217, "step": 1146000 }, { "epoch": 9.39, "learning_rate": 6.263295467622351e-06, "loss": 3.0154, "step": 1146500 }, { "epoch": 9.4, "learning_rate": 6.261651514902431e-06, "loss": 3.0197, "step": 1147000 }, { "epoch": 9.4, "learning_rate": 6.260007562182512e-06, "loss": 3.0149, "step": 1147500 }, { "epoch": 9.41, "learning_rate": 6.258363609462592e-06, "loss": 3.0203, "step": 1148000 }, { "epoch": 9.41, "learning_rate": 6.2567196567426725e-06, "loss": 3.0206, "step": 1148500 }, { "epoch": 9.41, "learning_rate": 6.255075704022753e-06, "loss": 3.0151, "step": 1149000 }, { "epoch": 9.42, "learning_rate": 6.253431751302833e-06, "loss": 3.0192, "step": 1149500 }, { "epoch": 9.42, "learning_rate": 6.251787798582914e-06, "loss": 3.0161, "step": 1150000 }, { "epoch": 9.43, "learning_rate": 6.250143845862994e-06, "loss": 3.0224, "step": 1150500 }, { "epoch": 9.43, "learning_rate": 6.248499893143074e-06, "loss": 3.02, "step": 1151000 }, { "epoch": 9.43, "learning_rate": 6.246855940423154e-06, "loss": 3.0231, "step": 1151500 }, { "epoch": 9.44, "learning_rate": 6.245211987703235e-06, "loss": 3.0199, "step": 1152000 }, { "epoch": 9.44, "learning_rate": 6.2435680349833135e-06, "loss": 3.0159, "step": 1152500 }, { "epoch": 9.45, "learning_rate": 6.2419240822633944e-06, "loss": 3.0192, "step": 1153000 }, { "epoch": 9.45, "learning_rate": 6.2402801295434746e-06, "loss": 3.0151, "step": 1153500 }, { "epoch": 9.45, "learning_rate": 6.238636176823555e-06, "loss": 3.0198, "step": 1154000 }, { "epoch": 9.46, "learning_rate": 6.236992224103635e-06, "loss": 3.0142, "step": 1154500 }, { "epoch": 9.46, "learning_rate": 6.235348271383716e-06, "loss": 3.0177, "step": 1155000 }, { "epoch": 9.47, "learning_rate": 6.233704318663796e-06, "loss": 3.0148, "step": 1155500 }, { "epoch": 9.47, "learning_rate": 6.232060365943876e-06, "loss": 3.0101, "step": 1156000 }, { "epoch": 9.48, "learning_rate": 6.230416413223956e-06, "loss": 3.0113, "step": 1156500 }, { "epoch": 9.48, "learning_rate": 6.228772460504036e-06, "loss": 3.0084, "step": 1157000 }, { "epoch": 9.48, "learning_rate": 6.227128507784117e-06, "loss": 3.0133, "step": 1157500 }, { "epoch": 9.49, "learning_rate": 6.225484555064197e-06, "loss": 3.0143, "step": 1158000 }, { "epoch": 9.49, "learning_rate": 6.223840602344277e-06, "loss": 3.0167, "step": 1158500 }, { "epoch": 9.5, "learning_rate": 6.222196649624357e-06, "loss": 3.0136, "step": 1159000 }, { "epoch": 9.5, "learning_rate": 6.220552696904437e-06, "loss": 3.012, "step": 1159500 }, { "epoch": 9.5, "learning_rate": 6.218908744184518e-06, "loss": 3.012, "step": 1160000 }, { "epoch": 9.51, "learning_rate": 6.217264791464598e-06, "loss": 3.0111, "step": 1160500 }, { "epoch": 9.51, "learning_rate": 6.215620838744678e-06, "loss": 3.0147, "step": 1161000 }, { "epoch": 9.52, "learning_rate": 6.213976886024758e-06, "loss": 3.015, "step": 1161500 }, { "epoch": 9.52, "learning_rate": 6.212332933304838e-06, "loss": 3.017, "step": 1162000 }, { "epoch": 9.52, "learning_rate": 6.210688980584919e-06, "loss": 3.0108, "step": 1162500 }, { "epoch": 9.53, "learning_rate": 6.2090450278649995e-06, "loss": 3.0125, "step": 1163000 }, { "epoch": 9.53, "learning_rate": 6.20740107514508e-06, "loss": 3.0126, "step": 1163500 }, { "epoch": 9.54, "learning_rate": 6.20575712242516e-06, "loss": 3.0209, "step": 1164000 }, { "epoch": 9.54, "learning_rate": 6.204113169705239e-06, "loss": 3.0089, "step": 1164500 }, { "epoch": 9.54, "learning_rate": 6.202469216985319e-06, "loss": 3.0135, "step": 1165000 }, { "epoch": 9.55, "learning_rate": 6.2008252642654e-06, "loss": 3.011, "step": 1165500 }, { "epoch": 9.55, "learning_rate": 6.19918131154548e-06, "loss": 3.0125, "step": 1166000 }, { "epoch": 9.56, "learning_rate": 6.19753735882556e-06, "loss": 3.0127, "step": 1166500 }, { "epoch": 9.56, "learning_rate": 6.1958934061056405e-06, "loss": 3.0144, "step": 1167000 }, { "epoch": 9.57, "learning_rate": 6.1942494533857215e-06, "loss": 3.0174, "step": 1167500 }, { "epoch": 9.57, "learning_rate": 6.192605500665802e-06, "loss": 3.0088, "step": 1168000 }, { "epoch": 9.57, "learning_rate": 6.190961547945882e-06, "loss": 3.0135, "step": 1168500 }, { "epoch": 9.58, "learning_rate": 6.189317595225962e-06, "loss": 3.0141, "step": 1169000 }, { "epoch": 9.58, "learning_rate": 6.187673642506042e-06, "loss": 3.0075, "step": 1169500 }, { "epoch": 9.59, "learning_rate": 6.186029689786123e-06, "loss": 3.019, "step": 1170000 }, { "epoch": 9.59, "eval_accuracy": 0.4878472453037176, "eval_loss": 2.873086929321289, "eval_runtime": 405.404, "eval_samples_per_second": 760.614, "eval_steps_per_second": 15.848, "step": 1170000 }, { "epoch": 9.59, "learning_rate": 6.184385737066202e-06, "loss": 3.0057, "step": 1170500 }, { "epoch": 9.59, "learning_rate": 6.182741784346282e-06, "loss": 3.0136, "step": 1171000 }, { "epoch": 9.6, "learning_rate": 6.1810978316263625e-06, "loss": 3.0157, "step": 1171500 }, { "epoch": 9.6, "learning_rate": 6.179453878906443e-06, "loss": 3.0087, "step": 1172000 }, { "epoch": 9.61, "learning_rate": 6.177809926186524e-06, "loss": 3.0223, "step": 1172500 }, { "epoch": 9.61, "learning_rate": 6.176165973466604e-06, "loss": 3.0064, "step": 1173000 }, { "epoch": 9.61, "learning_rate": 6.174522020746684e-06, "loss": 3.0084, "step": 1173500 }, { "epoch": 9.62, "learning_rate": 6.172878068026764e-06, "loss": 3.0118, "step": 1174000 }, { "epoch": 9.62, "learning_rate": 6.171234115306844e-06, "loss": 3.0115, "step": 1174500 }, { "epoch": 9.63, "learning_rate": 6.169590162586925e-06, "loss": 3.0137, "step": 1175000 }, { "epoch": 9.63, "learning_rate": 6.167946209867005e-06, "loss": 3.0106, "step": 1175500 }, { "epoch": 9.63, "learning_rate": 6.166302257147085e-06, "loss": 3.0104, "step": 1176000 }, { "epoch": 9.64, "learning_rate": 6.1646583044271655e-06, "loss": 3.0126, "step": 1176500 }, { "epoch": 9.64, "learning_rate": 6.163014351707245e-06, "loss": 3.0086, "step": 1177000 }, { "epoch": 9.65, "learning_rate": 6.161370398987325e-06, "loss": 3.0148, "step": 1177500 }, { "epoch": 9.65, "learning_rate": 6.159726446267406e-06, "loss": 3.0146, "step": 1178000 }, { "epoch": 9.66, "learning_rate": 6.158082493547486e-06, "loss": 3.0155, "step": 1178500 }, { "epoch": 9.66, "learning_rate": 6.156438540827566e-06, "loss": 3.012, "step": 1179000 }, { "epoch": 9.66, "learning_rate": 6.154794588107646e-06, "loss": 3.0118, "step": 1179500 }, { "epoch": 9.67, "learning_rate": 6.153150635387727e-06, "loss": 3.0086, "step": 1180000 }, { "epoch": 9.67, "learning_rate": 6.151506682667807e-06, "loss": 3.0139, "step": 1180500 }, { "epoch": 9.68, "learning_rate": 6.1498627299478875e-06, "loss": 3.0105, "step": 1181000 }, { "epoch": 9.68, "learning_rate": 6.148218777227968e-06, "loss": 3.012, "step": 1181500 }, { "epoch": 9.68, "learning_rate": 6.146574824508048e-06, "loss": 3.0096, "step": 1182000 }, { "epoch": 9.69, "learning_rate": 6.144930871788129e-06, "loss": 3.0109, "step": 1182500 }, { "epoch": 9.69, "learning_rate": 6.143286919068208e-06, "loss": 3.0119, "step": 1183000 }, { "epoch": 9.7, "learning_rate": 6.141642966348288e-06, "loss": 3.0086, "step": 1183500 }, { "epoch": 9.7, "learning_rate": 6.139999013628368e-06, "loss": 3.014, "step": 1184000 }, { "epoch": 9.7, "learning_rate": 6.138355060908448e-06, "loss": 3.0107, "step": 1184500 }, { "epoch": 9.71, "learning_rate": 6.1367111081885285e-06, "loss": 3.0039, "step": 1185000 }, { "epoch": 9.71, "learning_rate": 6.1350671554686094e-06, "loss": 3.0066, "step": 1185500 }, { "epoch": 9.72, "learning_rate": 6.1334232027486896e-06, "loss": 3.0103, "step": 1186000 }, { "epoch": 9.72, "learning_rate": 6.13177925002877e-06, "loss": 3.0094, "step": 1186500 }, { "epoch": 9.72, "learning_rate": 6.13013529730885e-06, "loss": 3.011, "step": 1187000 }, { "epoch": 9.73, "learning_rate": 6.128491344588931e-06, "loss": 3.0094, "step": 1187500 }, { "epoch": 9.73, "learning_rate": 6.126847391869011e-06, "loss": 3.0036, "step": 1188000 }, { "epoch": 9.74, "learning_rate": 6.125203439149091e-06, "loss": 3.0096, "step": 1188500 }, { "epoch": 9.74, "learning_rate": 6.12355948642917e-06, "loss": 3.0105, "step": 1189000 }, { "epoch": 9.75, "learning_rate": 6.1219155337092504e-06, "loss": 3.008, "step": 1189500 }, { "epoch": 9.75, "learning_rate": 6.1202715809893306e-06, "loss": 3.0032, "step": 1190000 }, { "epoch": 9.75, "learning_rate": 6.1186276282694115e-06, "loss": 3.0091, "step": 1190500 }, { "epoch": 9.76, "learning_rate": 6.116983675549492e-06, "loss": 3.0035, "step": 1191000 }, { "epoch": 9.76, "learning_rate": 6.115339722829572e-06, "loss": 3.0091, "step": 1191500 }, { "epoch": 9.77, "learning_rate": 6.113695770109652e-06, "loss": 3.0013, "step": 1192000 }, { "epoch": 9.77, "learning_rate": 6.112051817389733e-06, "loss": 3.0043, "step": 1192500 }, { "epoch": 9.77, "learning_rate": 6.110407864669813e-06, "loss": 3.0081, "step": 1193000 }, { "epoch": 9.78, "learning_rate": 6.108763911949893e-06, "loss": 3.0055, "step": 1193500 }, { "epoch": 9.78, "learning_rate": 6.107119959229973e-06, "loss": 3.011, "step": 1194000 }, { "epoch": 9.79, "learning_rate": 6.105476006510053e-06, "loss": 3.0094, "step": 1194500 }, { "epoch": 9.79, "learning_rate": 6.103832053790133e-06, "loss": 3.0133, "step": 1195000 }, { "epoch": 9.79, "learning_rate": 6.102188101070214e-06, "loss": 3.0032, "step": 1195500 }, { "epoch": 9.8, "learning_rate": 6.100544148350294e-06, "loss": 3.0075, "step": 1196000 }, { "epoch": 9.8, "learning_rate": 6.098900195630374e-06, "loss": 3.0091, "step": 1196500 }, { "epoch": 9.81, "learning_rate": 6.097256242910454e-06, "loss": 3.011, "step": 1197000 }, { "epoch": 9.81, "learning_rate": 6.095612290190534e-06, "loss": 3.0039, "step": 1197500 }, { "epoch": 9.82, "learning_rate": 6.093968337470615e-06, "loss": 3.0036, "step": 1198000 }, { "epoch": 9.82, "learning_rate": 6.092324384750695e-06, "loss": 3.0069, "step": 1198500 }, { "epoch": 9.82, "learning_rate": 6.090680432030775e-06, "loss": 3.0114, "step": 1199000 }, { "epoch": 9.83, "learning_rate": 6.0890364793108555e-06, "loss": 3.006, "step": 1199500 }, { "epoch": 9.83, "learning_rate": 6.0873925265909365e-06, "loss": 3.0094, "step": 1200000 }, { "epoch": 9.83, "eval_accuracy": 0.48851002828245305, "eval_loss": 2.8687474727630615, "eval_runtime": 404.2357, "eval_samples_per_second": 762.812, "eval_steps_per_second": 15.894, "step": 1200000 }, { "epoch": 9.84, "learning_rate": 6.085748573871017e-06, "loss": 2.9995, "step": 1200500 }, { "epoch": 9.84, "learning_rate": 6.084104621151096e-06, "loss": 3.0002, "step": 1201000 }, { "epoch": 9.84, "learning_rate": 6.082460668431176e-06, "loss": 3.0047, "step": 1201500 }, { "epoch": 9.85, "learning_rate": 6.080816715711256e-06, "loss": 3.0093, "step": 1202000 }, { "epoch": 9.85, "learning_rate": 6.079172762991336e-06, "loss": 3.0018, "step": 1202500 }, { "epoch": 9.86, "learning_rate": 6.077528810271417e-06, "loss": 3.0067, "step": 1203000 }, { "epoch": 9.86, "learning_rate": 6.075884857551497e-06, "loss": 3.01, "step": 1203500 }, { "epoch": 9.86, "learning_rate": 6.0742409048315775e-06, "loss": 3.0108, "step": 1204000 }, { "epoch": 9.87, "learning_rate": 6.072596952111658e-06, "loss": 3.0089, "step": 1204500 }, { "epoch": 9.87, "learning_rate": 6.070952999391739e-06, "loss": 3.0047, "step": 1205000 }, { "epoch": 9.88, "learning_rate": 6.069309046671819e-06, "loss": 3.0057, "step": 1205500 }, { "epoch": 9.88, "learning_rate": 6.067665093951899e-06, "loss": 3.006, "step": 1206000 }, { "epoch": 9.88, "learning_rate": 6.066021141231979e-06, "loss": 3.0104, "step": 1206500 }, { "epoch": 9.89, "learning_rate": 6.064377188512058e-06, "loss": 3.0047, "step": 1207000 }, { "epoch": 9.89, "learning_rate": 6.062733235792138e-06, "loss": 3.0062, "step": 1207500 }, { "epoch": 9.9, "learning_rate": 6.061089283072219e-06, "loss": 3.0086, "step": 1208000 }, { "epoch": 9.9, "learning_rate": 6.0594453303522995e-06, "loss": 3.0032, "step": 1208500 }, { "epoch": 9.91, "learning_rate": 6.05780137763238e-06, "loss": 3.0042, "step": 1209000 }, { "epoch": 9.91, "learning_rate": 6.05615742491246e-06, "loss": 3.0064, "step": 1209500 }, { "epoch": 9.91, "learning_rate": 6.05451347219254e-06, "loss": 3.0005, "step": 1210000 }, { "epoch": 9.92, "learning_rate": 6.052869519472621e-06, "loss": 3.0043, "step": 1210500 }, { "epoch": 9.92, "learning_rate": 6.051225566752701e-06, "loss": 3.0002, "step": 1211000 }, { "epoch": 9.93, "learning_rate": 6.049581614032781e-06, "loss": 3.011, "step": 1211500 }, { "epoch": 9.93, "learning_rate": 6.047937661312861e-06, "loss": 3.0027, "step": 1212000 }, { "epoch": 9.93, "learning_rate": 6.046293708592942e-06, "loss": 3.0036, "step": 1212500 }, { "epoch": 9.94, "learning_rate": 6.044649755873021e-06, "loss": 3.0021, "step": 1213000 }, { "epoch": 9.94, "learning_rate": 6.043005803153102e-06, "loss": 2.9972, "step": 1213500 }, { "epoch": 9.95, "learning_rate": 6.041361850433182e-06, "loss": 3.0094, "step": 1214000 }, { "epoch": 9.95, "learning_rate": 6.039717897713262e-06, "loss": 3.0087, "step": 1214500 }, { "epoch": 9.95, "learning_rate": 6.038073944993342e-06, "loss": 3.0098, "step": 1215000 }, { "epoch": 9.96, "learning_rate": 6.036429992273423e-06, "loss": 3.0037, "step": 1215500 }, { "epoch": 9.96, "learning_rate": 6.034786039553503e-06, "loss": 3.0057, "step": 1216000 }, { "epoch": 9.97, "learning_rate": 6.033142086833583e-06, "loss": 3.0026, "step": 1216500 }, { "epoch": 9.97, "learning_rate": 6.031498134113663e-06, "loss": 3.0057, "step": 1217000 }, { "epoch": 9.97, "learning_rate": 6.029854181393744e-06, "loss": 3.0037, "step": 1217500 }, { "epoch": 9.98, "learning_rate": 6.0282102286738244e-06, "loss": 3.0066, "step": 1218000 }, { "epoch": 9.98, "learning_rate": 6.0265662759539046e-06, "loss": 3.0039, "step": 1218500 }, { "epoch": 9.99, "learning_rate": 6.024922323233984e-06, "loss": 3.0073, "step": 1219000 }, { "epoch": 9.99, "learning_rate": 6.023278370514064e-06, "loss": 2.9989, "step": 1219500 }, { "epoch": 10.0, "learning_rate": 6.021634417794144e-06, "loss": 3.0029, "step": 1220000 }, { "epoch": 10.0, "learning_rate": 6.019990465074225e-06, "loss": 2.9987, "step": 1220500 }, { "epoch": 10.0, "learning_rate": 6.018346512354305e-06, "loss": 3.0004, "step": 1221000 }, { "epoch": 10.01, "learning_rate": 6.016702559634385e-06, "loss": 3.006, "step": 1221500 }, { "epoch": 10.01, "learning_rate": 6.0150586069144654e-06, "loss": 3.0002, "step": 1222000 }, { "epoch": 10.02, "learning_rate": 6.0134146541945456e-06, "loss": 3.0044, "step": 1222500 }, { "epoch": 10.02, "learning_rate": 6.0117707014746265e-06, "loss": 2.9957, "step": 1223000 }, { "epoch": 10.02, "learning_rate": 6.010126748754707e-06, "loss": 3.0001, "step": 1223500 }, { "epoch": 10.03, "learning_rate": 6.008482796034787e-06, "loss": 3.0058, "step": 1224000 }, { "epoch": 10.03, "learning_rate": 6.006838843314867e-06, "loss": 2.9946, "step": 1224500 }, { "epoch": 10.04, "learning_rate": 6.005194890594948e-06, "loss": 3.0, "step": 1225000 }, { "epoch": 10.04, "learning_rate": 6.003550937875026e-06, "loss": 2.9992, "step": 1225500 }, { "epoch": 10.04, "learning_rate": 6.001906985155107e-06, "loss": 2.9963, "step": 1226000 }, { "epoch": 10.05, "learning_rate": 6.0002630324351874e-06, "loss": 2.9928, "step": 1226500 }, { "epoch": 10.05, "learning_rate": 5.9986190797152676e-06, "loss": 3.0001, "step": 1227000 }, { "epoch": 10.06, "learning_rate": 5.996975126995348e-06, "loss": 3.0003, "step": 1227500 }, { "epoch": 10.06, "learning_rate": 5.995331174275429e-06, "loss": 2.9989, "step": 1228000 }, { "epoch": 10.06, "learning_rate": 5.993687221555509e-06, "loss": 2.9957, "step": 1228500 }, { "epoch": 10.07, "learning_rate": 5.992043268835589e-06, "loss": 3.0049, "step": 1229000 }, { "epoch": 10.07, "learning_rate": 5.990399316115669e-06, "loss": 2.9962, "step": 1229500 }, { "epoch": 10.08, "learning_rate": 5.988755363395749e-06, "loss": 3.0065, "step": 1230000 }, { "epoch": 10.08, "eval_accuracy": 0.48933464137984234, "eval_loss": 2.8635189533233643, "eval_runtime": 406.4876, "eval_samples_per_second": 758.587, "eval_steps_per_second": 15.806, "step": 1230000 }, { "epoch": 10.08, "learning_rate": 5.98711141067583e-06, "loss": 2.998, "step": 1230500 }, { "epoch": 10.09, "learning_rate": 5.98546745795591e-06, "loss": 3.0041, "step": 1231000 }, { "epoch": 10.09, "learning_rate": 5.9838235052359895e-06, "loss": 3.0004, "step": 1231500 }, { "epoch": 10.09, "learning_rate": 5.98217955251607e-06, "loss": 3.0011, "step": 1232000 }, { "epoch": 10.1, "learning_rate": 5.98053559979615e-06, "loss": 3.0099, "step": 1232500 }, { "epoch": 10.1, "learning_rate": 5.978891647076231e-06, "loss": 3.0018, "step": 1233000 }, { "epoch": 10.11, "learning_rate": 5.977247694356311e-06, "loss": 2.9946, "step": 1233500 }, { "epoch": 10.11, "learning_rate": 5.975603741636391e-06, "loss": 3.0006, "step": 1234000 }, { "epoch": 10.11, "learning_rate": 5.973959788916471e-06, "loss": 2.9976, "step": 1234500 }, { "epoch": 10.12, "learning_rate": 5.972315836196551e-06, "loss": 2.9933, "step": 1235000 }, { "epoch": 10.12, "learning_rate": 5.970671883476632e-06, "loss": 2.997, "step": 1235500 }, { "epoch": 10.13, "learning_rate": 5.969027930756712e-06, "loss": 3.0016, "step": 1236000 }, { "epoch": 10.13, "learning_rate": 5.9673839780367925e-06, "loss": 2.9995, "step": 1236500 }, { "epoch": 10.13, "learning_rate": 5.965740025316873e-06, "loss": 3.0035, "step": 1237000 }, { "epoch": 10.14, "learning_rate": 5.964096072596952e-06, "loss": 3.002, "step": 1237500 }, { "epoch": 10.14, "learning_rate": 5.962452119877032e-06, "loss": 2.9986, "step": 1238000 }, { "epoch": 10.15, "learning_rate": 5.960808167157113e-06, "loss": 3.001, "step": 1238500 }, { "epoch": 10.15, "learning_rate": 5.959164214437193e-06, "loss": 2.9956, "step": 1239000 }, { "epoch": 10.16, "learning_rate": 5.957520261717273e-06, "loss": 3.0005, "step": 1239500 }, { "epoch": 10.16, "learning_rate": 5.955876308997353e-06, "loss": 2.9997, "step": 1240000 }, { "epoch": 10.16, "learning_rate": 5.954232356277434e-06, "loss": 2.9939, "step": 1240500 }, { "epoch": 10.17, "learning_rate": 5.9525884035575145e-06, "loss": 2.9964, "step": 1241000 }, { "epoch": 10.17, "learning_rate": 5.950944450837595e-06, "loss": 2.9955, "step": 1241500 }, { "epoch": 10.18, "learning_rate": 5.949300498117675e-06, "loss": 3.0, "step": 1242000 }, { "epoch": 10.18, "learning_rate": 5.947656545397755e-06, "loss": 3.0017, "step": 1242500 }, { "epoch": 10.18, "learning_rate": 5.946012592677836e-06, "loss": 3.0005, "step": 1243000 }, { "epoch": 10.19, "learning_rate": 5.944368639957915e-06, "loss": 2.9954, "step": 1243500 }, { "epoch": 10.19, "learning_rate": 5.942724687237995e-06, "loss": 2.9982, "step": 1244000 }, { "epoch": 10.2, "learning_rate": 5.941080734518075e-06, "loss": 3.0029, "step": 1244500 }, { "epoch": 10.2, "learning_rate": 5.9394367817981555e-06, "loss": 2.9995, "step": 1245000 }, { "epoch": 10.2, "learning_rate": 5.9377928290782365e-06, "loss": 3.0004, "step": 1245500 }, { "epoch": 10.21, "learning_rate": 5.936148876358317e-06, "loss": 3.0033, "step": 1246000 }, { "epoch": 10.21, "learning_rate": 5.934504923638397e-06, "loss": 2.9923, "step": 1246500 }, { "epoch": 10.22, "learning_rate": 5.932860970918477e-06, "loss": 2.9955, "step": 1247000 }, { "epoch": 10.22, "learning_rate": 5.931217018198557e-06, "loss": 2.9977, "step": 1247500 }, { "epoch": 10.22, "learning_rate": 5.929573065478638e-06, "loss": 2.9962, "step": 1248000 }, { "epoch": 10.23, "learning_rate": 5.927929112758718e-06, "loss": 2.9916, "step": 1248500 }, { "epoch": 10.23, "learning_rate": 5.926285160038798e-06, "loss": 2.9935, "step": 1249000 }, { "epoch": 10.24, "learning_rate": 5.9246412073188775e-06, "loss": 2.9927, "step": 1249500 }, { "epoch": 10.24, "learning_rate": 5.922997254598958e-06, "loss": 2.9976, "step": 1250000 }, { "epoch": 10.25, "learning_rate": 5.921353301879038e-06, "loss": 2.9944, "step": 1250500 }, { "epoch": 10.25, "learning_rate": 5.919709349159119e-06, "loss": 3.0006, "step": 1251000 }, { "epoch": 10.25, "learning_rate": 5.918065396439199e-06, "loss": 2.9983, "step": 1251500 }, { "epoch": 10.26, "learning_rate": 5.916421443719279e-06, "loss": 2.9885, "step": 1252000 }, { "epoch": 10.26, "learning_rate": 5.914777490999359e-06, "loss": 2.9992, "step": 1252500 }, { "epoch": 10.27, "learning_rate": 5.91313353827944e-06, "loss": 3.0021, "step": 1253000 }, { "epoch": 10.27, "learning_rate": 5.91148958555952e-06, "loss": 2.9901, "step": 1253500 }, { "epoch": 10.27, "learning_rate": 5.9098456328396e-06, "loss": 2.9947, "step": 1254000 }, { "epoch": 10.28, "learning_rate": 5.9082016801196804e-06, "loss": 2.9951, "step": 1254500 }, { "epoch": 10.28, "learning_rate": 5.9065577273997606e-06, "loss": 2.9973, "step": 1255000 }, { "epoch": 10.29, "learning_rate": 5.90491377467984e-06, "loss": 2.9943, "step": 1255500 }, { "epoch": 10.29, "learning_rate": 5.903269821959921e-06, "loss": 2.9956, "step": 1256000 }, { "epoch": 10.29, "learning_rate": 5.901625869240001e-06, "loss": 2.9963, "step": 1256500 }, { "epoch": 10.3, "learning_rate": 5.899981916520081e-06, "loss": 2.9933, "step": 1257000 }, { "epoch": 10.3, "learning_rate": 5.898337963800161e-06, "loss": 2.9907, "step": 1257500 }, { "epoch": 10.31, "learning_rate": 5.896694011080241e-06, "loss": 2.9903, "step": 1258000 }, { "epoch": 10.31, "learning_rate": 5.895050058360322e-06, "loss": 2.9945, "step": 1258500 }, { "epoch": 10.31, "learning_rate": 5.8934061056404024e-06, "loss": 2.9894, "step": 1259000 }, { "epoch": 10.32, "learning_rate": 5.8917621529204826e-06, "loss": 2.9936, "step": 1259500 }, { "epoch": 10.32, "learning_rate": 5.890118200200563e-06, "loss": 2.9983, "step": 1260000 }, { "epoch": 10.32, "eval_accuracy": 0.4899989186098096, "eval_loss": 2.856139659881592, "eval_runtime": 405.6967, "eval_samples_per_second": 760.065, "eval_steps_per_second": 15.837, "step": 1260000 }, { "epoch": 10.33, "learning_rate": 5.888474247480644e-06, "loss": 2.9992, "step": 1260500 }, { "epoch": 10.33, "learning_rate": 5.886830294760724e-06, "loss": 2.9973, "step": 1261000 }, { "epoch": 10.34, "learning_rate": 5.885186342040803e-06, "loss": 2.996, "step": 1261500 }, { "epoch": 10.34, "learning_rate": 5.883542389320883e-06, "loss": 2.9911, "step": 1262000 }, { "epoch": 10.34, "learning_rate": 5.881898436600963e-06, "loss": 2.9913, "step": 1262500 }, { "epoch": 10.35, "learning_rate": 5.8802544838810434e-06, "loss": 2.9906, "step": 1263000 }, { "epoch": 10.35, "learning_rate": 5.878610531161124e-06, "loss": 3.0007, "step": 1263500 }, { "epoch": 10.36, "learning_rate": 5.8769665784412045e-06, "loss": 2.9983, "step": 1264000 }, { "epoch": 10.36, "learning_rate": 5.875322625721285e-06, "loss": 2.9929, "step": 1264500 }, { "epoch": 10.36, "learning_rate": 5.873678673001365e-06, "loss": 2.999, "step": 1265000 }, { "epoch": 10.37, "learning_rate": 5.872034720281446e-06, "loss": 2.9903, "step": 1265500 }, { "epoch": 10.37, "learning_rate": 5.870390767561526e-06, "loss": 2.9906, "step": 1266000 }, { "epoch": 10.38, "learning_rate": 5.868746814841606e-06, "loss": 2.99, "step": 1266500 }, { "epoch": 10.38, "learning_rate": 5.867102862121686e-06, "loss": 2.9936, "step": 1267000 }, { "epoch": 10.38, "learning_rate": 5.8654589094017654e-06, "loss": 2.9955, "step": 1267500 }, { "epoch": 10.39, "learning_rate": 5.8638149566818456e-06, "loss": 2.9949, "step": 1268000 }, { "epoch": 10.39, "learning_rate": 5.8621710039619265e-06, "loss": 2.9959, "step": 1268500 }, { "epoch": 10.4, "learning_rate": 5.860527051242007e-06, "loss": 2.9974, "step": 1269000 }, { "epoch": 10.4, "learning_rate": 5.858883098522087e-06, "loss": 2.9972, "step": 1269500 }, { "epoch": 10.4, "learning_rate": 5.857239145802167e-06, "loss": 2.9952, "step": 1270000 }, { "epoch": 10.41, "learning_rate": 5.855595193082247e-06, "loss": 2.9968, "step": 1270500 }, { "epoch": 10.41, "learning_rate": 5.853951240362328e-06, "loss": 2.9893, "step": 1271000 }, { "epoch": 10.42, "learning_rate": 5.852307287642408e-06, "loss": 2.9974, "step": 1271500 }, { "epoch": 10.42, "learning_rate": 5.850663334922488e-06, "loss": 2.9862, "step": 1272000 }, { "epoch": 10.43, "learning_rate": 5.849019382202568e-06, "loss": 2.9944, "step": 1272500 }, { "epoch": 10.43, "learning_rate": 5.847375429482649e-06, "loss": 2.9933, "step": 1273000 }, { "epoch": 10.43, "learning_rate": 5.845731476762729e-06, "loss": 2.993, "step": 1273500 }, { "epoch": 10.44, "learning_rate": 5.844087524042809e-06, "loss": 2.9942, "step": 1274000 }, { "epoch": 10.44, "learning_rate": 5.842443571322889e-06, "loss": 2.9952, "step": 1274500 }, { "epoch": 10.45, "learning_rate": 5.840799618602969e-06, "loss": 2.9978, "step": 1275000 }, { "epoch": 10.45, "learning_rate": 5.839155665883049e-06, "loss": 2.9936, "step": 1275500 }, { "epoch": 10.45, "learning_rate": 5.83751171316313e-06, "loss": 2.9934, "step": 1276000 }, { "epoch": 10.46, "learning_rate": 5.83586776044321e-06, "loss": 2.9946, "step": 1276500 }, { "epoch": 10.46, "learning_rate": 5.83422380772329e-06, "loss": 2.9954, "step": 1277000 }, { "epoch": 10.47, "learning_rate": 5.8325798550033705e-06, "loss": 2.9865, "step": 1277500 }, { "epoch": 10.47, "learning_rate": 5.8309359022834515e-06, "loss": 2.9931, "step": 1278000 }, { "epoch": 10.47, "learning_rate": 5.829291949563532e-06, "loss": 2.9937, "step": 1278500 }, { "epoch": 10.48, "learning_rate": 5.827647996843612e-06, "loss": 2.9888, "step": 1279000 }, { "epoch": 10.48, "learning_rate": 5.826004044123692e-06, "loss": 2.9879, "step": 1279500 }, { "epoch": 10.49, "learning_rate": 5.824360091403771e-06, "loss": 2.9912, "step": 1280000 }, { "epoch": 10.49, "learning_rate": 5.822716138683851e-06, "loss": 3.0024, "step": 1280500 }, { "epoch": 10.5, "learning_rate": 5.821072185963932e-06, "loss": 2.9928, "step": 1281000 }, { "epoch": 10.5, "learning_rate": 5.819428233244012e-06, "loss": 2.9852, "step": 1281500 }, { "epoch": 10.5, "learning_rate": 5.8177842805240925e-06, "loss": 2.9819, "step": 1282000 }, { "epoch": 10.51, "learning_rate": 5.816140327804173e-06, "loss": 2.9923, "step": 1282500 }, { "epoch": 10.51, "learning_rate": 5.814496375084253e-06, "loss": 2.9913, "step": 1283000 }, { "epoch": 10.52, "learning_rate": 5.812852422364334e-06, "loss": 2.985, "step": 1283500 }, { "epoch": 10.52, "learning_rate": 5.811208469644414e-06, "loss": 2.9962, "step": 1284000 }, { "epoch": 10.52, "learning_rate": 5.809564516924494e-06, "loss": 2.9926, "step": 1284500 }, { "epoch": 10.53, "learning_rate": 5.807920564204574e-06, "loss": 2.9897, "step": 1285000 }, { "epoch": 10.53, "learning_rate": 5.806276611484655e-06, "loss": 2.9893, "step": 1285500 }, { "epoch": 10.54, "learning_rate": 5.8046326587647335e-06, "loss": 2.9912, "step": 1286000 }, { "epoch": 10.54, "learning_rate": 5.8029887060448145e-06, "loss": 2.9872, "step": 1286500 }, { "epoch": 10.54, "learning_rate": 5.801344753324895e-06, "loss": 2.9919, "step": 1287000 }, { "epoch": 10.55, "learning_rate": 5.799700800604975e-06, "loss": 2.991, "step": 1287500 }, { "epoch": 10.55, "learning_rate": 5.798056847885055e-06, "loss": 2.9917, "step": 1288000 }, { "epoch": 10.56, "learning_rate": 5.796412895165136e-06, "loss": 2.9909, "step": 1288500 }, { "epoch": 10.56, "learning_rate": 5.794768942445216e-06, "loss": 2.988, "step": 1289000 }, { "epoch": 10.56, "learning_rate": 5.793124989725296e-06, "loss": 2.9947, "step": 1289500 }, { "epoch": 10.57, "learning_rate": 5.791481037005376e-06, "loss": 2.9834, "step": 1290000 }, { "epoch": 10.57, "eval_accuracy": 0.490716405708734, "eval_loss": 2.852367401123047, "eval_runtime": 409.9329, "eval_samples_per_second": 752.211, "eval_steps_per_second": 15.673, "step": 1290000 }, { "epoch": 10.57, "learning_rate": 5.789837084285457e-06, "loss": 2.9875, "step": 1290500 }, { "epoch": 10.58, "learning_rate": 5.788193131565537e-06, "loss": 2.99, "step": 1291000 }, { "epoch": 10.58, "learning_rate": 5.7865491788456174e-06, "loss": 2.9915, "step": 1291500 }, { "epoch": 10.59, "learning_rate": 5.784905226125697e-06, "loss": 2.989, "step": 1292000 }, { "epoch": 10.59, "learning_rate": 5.783261273405777e-06, "loss": 2.9908, "step": 1292500 }, { "epoch": 10.59, "learning_rate": 5.781617320685857e-06, "loss": 2.9896, "step": 1293000 }, { "epoch": 10.6, "learning_rate": 5.779973367965938e-06, "loss": 2.9904, "step": 1293500 }, { "epoch": 10.6, "learning_rate": 5.778329415246018e-06, "loss": 2.9917, "step": 1294000 }, { "epoch": 10.61, "learning_rate": 5.776685462526098e-06, "loss": 2.9959, "step": 1294500 }, { "epoch": 10.61, "learning_rate": 5.775041509806178e-06, "loss": 2.9892, "step": 1295000 }, { "epoch": 10.61, "learning_rate": 5.7733975570862584e-06, "loss": 2.9959, "step": 1295500 }, { "epoch": 10.62, "learning_rate": 5.771753604366339e-06, "loss": 2.9883, "step": 1296000 }, { "epoch": 10.62, "learning_rate": 5.7701096516464195e-06, "loss": 2.9849, "step": 1296500 }, { "epoch": 10.63, "learning_rate": 5.7684656989265e-06, "loss": 2.99, "step": 1297000 }, { "epoch": 10.63, "learning_rate": 5.76682174620658e-06, "loss": 2.9891, "step": 1297500 }, { "epoch": 10.63, "learning_rate": 5.765177793486659e-06, "loss": 2.9913, "step": 1298000 }, { "epoch": 10.64, "learning_rate": 5.763533840766739e-06, "loss": 2.9844, "step": 1298500 }, { "epoch": 10.64, "learning_rate": 5.76188988804682e-06, "loss": 2.988, "step": 1299000 }, { "epoch": 10.65, "learning_rate": 5.7602459353269e-06, "loss": 2.9861, "step": 1299500 }, { "epoch": 10.65, "learning_rate": 5.7586019826069804e-06, "loss": 2.9871, "step": 1300000 }, { "epoch": 10.65, "learning_rate": 5.7569580298870606e-06, "loss": 2.9841, "step": 1300500 }, { "epoch": 10.66, "learning_rate": 5.7553140771671415e-06, "loss": 2.9844, "step": 1301000 }, { "epoch": 10.66, "learning_rate": 5.753670124447222e-06, "loss": 2.9895, "step": 1301500 }, { "epoch": 10.67, "learning_rate": 5.752026171727302e-06, "loss": 2.9843, "step": 1302000 }, { "epoch": 10.67, "learning_rate": 5.750382219007382e-06, "loss": 2.9913, "step": 1302500 }, { "epoch": 10.68, "learning_rate": 5.748738266287462e-06, "loss": 2.9902, "step": 1303000 }, { "epoch": 10.68, "learning_rate": 5.747094313567543e-06, "loss": 2.9856, "step": 1303500 }, { "epoch": 10.68, "learning_rate": 5.745450360847622e-06, "loss": 2.9884, "step": 1304000 }, { "epoch": 10.69, "learning_rate": 5.743806408127702e-06, "loss": 2.9866, "step": 1304500 }, { "epoch": 10.69, "learning_rate": 5.7421624554077825e-06, "loss": 2.9871, "step": 1305000 }, { "epoch": 10.7, "learning_rate": 5.740518502687863e-06, "loss": 2.9865, "step": 1305500 }, { "epoch": 10.7, "learning_rate": 5.738874549967944e-06, "loss": 2.9871, "step": 1306000 }, { "epoch": 10.7, "learning_rate": 5.737230597248024e-06, "loss": 2.9857, "step": 1306500 }, { "epoch": 10.71, "learning_rate": 5.735586644528104e-06, "loss": 2.9791, "step": 1307000 }, { "epoch": 10.71, "learning_rate": 5.733942691808184e-06, "loss": 2.9903, "step": 1307500 }, { "epoch": 10.72, "learning_rate": 5.732298739088264e-06, "loss": 2.9838, "step": 1308000 }, { "epoch": 10.72, "learning_rate": 5.730654786368345e-06, "loss": 2.9891, "step": 1308500 }, { "epoch": 10.72, "learning_rate": 5.729010833648425e-06, "loss": 2.9863, "step": 1309000 }, { "epoch": 10.73, "learning_rate": 5.727366880928505e-06, "loss": 2.9899, "step": 1309500 }, { "epoch": 10.73, "learning_rate": 5.725722928208585e-06, "loss": 2.9845, "step": 1310000 }, { "epoch": 10.74, "learning_rate": 5.724078975488665e-06, "loss": 2.9834, "step": 1310500 }, { "epoch": 10.74, "learning_rate": 5.722435022768745e-06, "loss": 2.9827, "step": 1311000 }, { "epoch": 10.74, "learning_rate": 5.720791070048826e-06, "loss": 2.9816, "step": 1311500 }, { "epoch": 10.75, "learning_rate": 5.719147117328906e-06, "loss": 2.9773, "step": 1312000 }, { "epoch": 10.75, "learning_rate": 5.717503164608986e-06, "loss": 2.9882, "step": 1312500 }, { "epoch": 10.76, "learning_rate": 5.715859211889066e-06, "loss": 2.9822, "step": 1313000 }, { "epoch": 10.76, "learning_rate": 5.714215259169147e-06, "loss": 2.9863, "step": 1313500 }, { "epoch": 10.77, "learning_rate": 5.712571306449227e-06, "loss": 2.9802, "step": 1314000 }, { "epoch": 10.77, "learning_rate": 5.7109273537293075e-06, "loss": 2.9899, "step": 1314500 }, { "epoch": 10.77, "learning_rate": 5.709283401009388e-06, "loss": 2.9936, "step": 1315000 }, { "epoch": 10.78, "learning_rate": 5.707639448289468e-06, "loss": 2.9852, "step": 1315500 }, { "epoch": 10.78, "learning_rate": 5.705995495569547e-06, "loss": 2.9875, "step": 1316000 }, { "epoch": 10.79, "learning_rate": 5.704351542849628e-06, "loss": 2.9867, "step": 1316500 }, { "epoch": 10.79, "learning_rate": 5.702707590129708e-06, "loss": 2.982, "step": 1317000 }, { "epoch": 10.79, "learning_rate": 5.701063637409788e-06, "loss": 2.9805, "step": 1317500 }, { "epoch": 10.8, "learning_rate": 5.699419684689868e-06, "loss": 2.9825, "step": 1318000 }, { "epoch": 10.8, "learning_rate": 5.697775731969949e-06, "loss": 2.9785, "step": 1318500 }, { "epoch": 10.81, "learning_rate": 5.6961317792500295e-06, "loss": 2.9884, "step": 1319000 }, { "epoch": 10.81, "learning_rate": 5.69448782653011e-06, "loss": 2.9854, "step": 1319500 }, { "epoch": 10.81, "learning_rate": 5.69284387381019e-06, "loss": 2.9873, "step": 1320000 }, { "epoch": 10.81, "eval_accuracy": 0.4910966306213036, "eval_loss": 2.8484363555908203, "eval_runtime": 403.7174, "eval_samples_per_second": 763.792, "eval_steps_per_second": 15.915, "step": 1320000 }, { "epoch": 10.82, "learning_rate": 5.69119992109027e-06, "loss": 2.9893, "step": 1320500 }, { "epoch": 10.82, "learning_rate": 5.689555968370351e-06, "loss": 2.9849, "step": 1321000 }, { "epoch": 10.83, "learning_rate": 5.687912015650431e-06, "loss": 2.9878, "step": 1321500 }, { "epoch": 10.83, "learning_rate": 5.68626806293051e-06, "loss": 2.9807, "step": 1322000 }, { "epoch": 10.84, "learning_rate": 5.68462411021059e-06, "loss": 2.9778, "step": 1322500 }, { "epoch": 10.84, "learning_rate": 5.6829801574906705e-06, "loss": 2.9867, "step": 1323000 }, { "epoch": 10.84, "learning_rate": 5.681336204770751e-06, "loss": 2.9818, "step": 1323500 }, { "epoch": 10.85, "learning_rate": 5.679692252050832e-06, "loss": 2.9833, "step": 1324000 }, { "epoch": 10.85, "learning_rate": 5.678048299330912e-06, "loss": 2.982, "step": 1324500 }, { "epoch": 10.86, "learning_rate": 5.676404346610992e-06, "loss": 2.9867, "step": 1325000 }, { "epoch": 10.86, "learning_rate": 5.674760393891072e-06, "loss": 2.9878, "step": 1325500 }, { "epoch": 10.86, "learning_rate": 5.673116441171153e-06, "loss": 2.9814, "step": 1326000 }, { "epoch": 10.87, "learning_rate": 5.671472488451233e-06, "loss": 2.9849, "step": 1326500 }, { "epoch": 10.87, "learning_rate": 5.669828535731313e-06, "loss": 2.9777, "step": 1327000 }, { "epoch": 10.88, "learning_rate": 5.668184583011393e-06, "loss": 2.9794, "step": 1327500 }, { "epoch": 10.88, "learning_rate": 5.6665406302914734e-06, "loss": 2.983, "step": 1328000 }, { "epoch": 10.88, "learning_rate": 5.664896677571553e-06, "loss": 2.9901, "step": 1328500 }, { "epoch": 10.89, "learning_rate": 5.663252724851634e-06, "loss": 2.9791, "step": 1329000 }, { "epoch": 10.89, "learning_rate": 5.661608772131714e-06, "loss": 2.9856, "step": 1329500 }, { "epoch": 10.9, "learning_rate": 5.659964819411794e-06, "loss": 2.9838, "step": 1330000 }, { "epoch": 10.9, "learning_rate": 5.658320866691874e-06, "loss": 2.9832, "step": 1330500 }, { "epoch": 10.9, "learning_rate": 5.656676913971954e-06, "loss": 2.985, "step": 1331000 }, { "epoch": 10.91, "learning_rate": 5.655032961252035e-06, "loss": 2.9829, "step": 1331500 }, { "epoch": 10.91, "learning_rate": 5.653389008532115e-06, "loss": 2.9868, "step": 1332000 }, { "epoch": 10.92, "learning_rate": 5.6517450558121954e-06, "loss": 2.9833, "step": 1332500 }, { "epoch": 10.92, "learning_rate": 5.6501011030922756e-06, "loss": 2.9865, "step": 1333000 }, { "epoch": 10.93, "learning_rate": 5.6484571503723565e-06, "loss": 2.9829, "step": 1333500 }, { "epoch": 10.93, "learning_rate": 5.646813197652437e-06, "loss": 2.9747, "step": 1334000 }, { "epoch": 10.93, "learning_rate": 5.645169244932516e-06, "loss": 2.9909, "step": 1334500 }, { "epoch": 10.94, "learning_rate": 5.643525292212596e-06, "loss": 2.9836, "step": 1335000 }, { "epoch": 10.94, "learning_rate": 5.641881339492676e-06, "loss": 2.9859, "step": 1335500 }, { "epoch": 10.95, "learning_rate": 5.640237386772756e-06, "loss": 2.9849, "step": 1336000 }, { "epoch": 10.95, "learning_rate": 5.638593434052837e-06, "loss": 2.9826, "step": 1336500 }, { "epoch": 10.95, "learning_rate": 5.636949481332917e-06, "loss": 2.9834, "step": 1337000 }, { "epoch": 10.96, "learning_rate": 5.6353055286129975e-06, "loss": 2.9836, "step": 1337500 }, { "epoch": 10.96, "learning_rate": 5.633661575893078e-06, "loss": 2.9822, "step": 1338000 }, { "epoch": 10.97, "learning_rate": 5.632017623173159e-06, "loss": 2.9908, "step": 1338500 }, { "epoch": 10.97, "learning_rate": 5.630373670453239e-06, "loss": 2.9815, "step": 1339000 }, { "epoch": 10.97, "learning_rate": 5.628729717733319e-06, "loss": 2.9858, "step": 1339500 }, { "epoch": 10.98, "learning_rate": 5.627085765013399e-06, "loss": 2.9802, "step": 1340000 }, { "epoch": 10.98, "learning_rate": 5.625441812293478e-06, "loss": 2.9778, "step": 1340500 }, { "epoch": 10.99, "learning_rate": 5.623797859573558e-06, "loss": 2.9915, "step": 1341000 }, { "epoch": 10.99, "learning_rate": 5.622153906853639e-06, "loss": 2.9819, "step": 1341500 }, { "epoch": 10.99, "learning_rate": 5.6205099541337195e-06, "loss": 2.9803, "step": 1342000 }, { "epoch": 11.0, "learning_rate": 5.6188660014138e-06, "loss": 2.9769, "step": 1342500 }, { "epoch": 11.0, "learning_rate": 5.61722204869388e-06, "loss": 2.9809, "step": 1343000 }, { "epoch": 11.01, "learning_rate": 5.61557809597396e-06, "loss": 2.981, "step": 1343500 }, { "epoch": 11.01, "learning_rate": 5.613934143254041e-06, "loss": 2.9829, "step": 1344000 }, { "epoch": 11.02, "learning_rate": 5.612290190534121e-06, "loss": 2.9789, "step": 1344500 }, { "epoch": 11.02, "learning_rate": 5.610646237814201e-06, "loss": 2.9819, "step": 1345000 }, { "epoch": 11.02, "learning_rate": 5.609002285094281e-06, "loss": 2.978, "step": 1345500 }, { "epoch": 11.03, "learning_rate": 5.607358332374362e-06, "loss": 2.987, "step": 1346000 }, { "epoch": 11.03, "learning_rate": 5.6057143796544415e-06, "loss": 2.9808, "step": 1346500 }, { "epoch": 11.04, "learning_rate": 5.604070426934522e-06, "loss": 2.9786, "step": 1347000 }, { "epoch": 11.04, "learning_rate": 5.602426474214602e-06, "loss": 2.9847, "step": 1347500 }, { "epoch": 11.04, "learning_rate": 5.600782521494682e-06, "loss": 2.9829, "step": 1348000 }, { "epoch": 11.05, "learning_rate": 5.599138568774762e-06, "loss": 2.9851, "step": 1348500 }, { "epoch": 11.05, "learning_rate": 5.597494616054843e-06, "loss": 2.9856, "step": 1349000 }, { "epoch": 11.06, "learning_rate": 5.595850663334923e-06, "loss": 2.9804, "step": 1349500 }, { "epoch": 11.06, "learning_rate": 5.594206710615003e-06, "loss": 2.978, "step": 1350000 }, { "epoch": 11.06, "eval_accuracy": 0.4923695335880713, "eval_loss": 2.841365337371826, "eval_runtime": 412.3409, "eval_samples_per_second": 747.818, "eval_steps_per_second": 15.582, "step": 1350000 }, { "epoch": 11.06, "learning_rate": 5.592562757895083e-06, "loss": 2.9795, "step": 1350500 }, { "epoch": 11.07, "learning_rate": 5.590918805175164e-06, "loss": 2.979, "step": 1351000 }, { "epoch": 11.07, "learning_rate": 5.5892748524552445e-06, "loss": 2.9805, "step": 1351500 }, { "epoch": 11.08, "learning_rate": 5.587630899735325e-06, "loss": 2.9765, "step": 1352000 }, { "epoch": 11.08, "learning_rate": 5.585986947015404e-06, "loss": 2.984, "step": 1352500 }, { "epoch": 11.08, "learning_rate": 5.584342994295484e-06, "loss": 2.9842, "step": 1353000 }, { "epoch": 11.09, "learning_rate": 5.582699041575564e-06, "loss": 2.9697, "step": 1353500 }, { "epoch": 11.09, "learning_rate": 5.581055088855645e-06, "loss": 2.9756, "step": 1354000 }, { "epoch": 11.1, "learning_rate": 5.579411136135725e-06, "loss": 2.974, "step": 1354500 }, { "epoch": 11.1, "learning_rate": 5.577767183415805e-06, "loss": 2.9776, "step": 1355000 }, { "epoch": 11.11, "learning_rate": 5.5761232306958855e-06, "loss": 2.9763, "step": 1355500 }, { "epoch": 11.11, "learning_rate": 5.574479277975966e-06, "loss": 2.9764, "step": 1356000 }, { "epoch": 11.11, "learning_rate": 5.572835325256047e-06, "loss": 2.9828, "step": 1356500 }, { "epoch": 11.12, "learning_rate": 5.571191372536127e-06, "loss": 2.9758, "step": 1357000 }, { "epoch": 11.12, "learning_rate": 5.569547419816207e-06, "loss": 2.979, "step": 1357500 }, { "epoch": 11.13, "learning_rate": 5.567903467096287e-06, "loss": 2.9827, "step": 1358000 }, { "epoch": 11.13, "learning_rate": 5.566259514376366e-06, "loss": 2.9817, "step": 1358500 }, { "epoch": 11.13, "learning_rate": 5.564615561656446e-06, "loss": 2.9774, "step": 1359000 }, { "epoch": 11.14, "learning_rate": 5.562971608936527e-06, "loss": 2.9756, "step": 1359500 }, { "epoch": 11.14, "learning_rate": 5.5613276562166075e-06, "loss": 2.9787, "step": 1360000 }, { "epoch": 11.15, "learning_rate": 5.559683703496688e-06, "loss": 2.9761, "step": 1360500 }, { "epoch": 11.15, "learning_rate": 5.558039750776768e-06, "loss": 2.9789, "step": 1361000 }, { "epoch": 11.15, "learning_rate": 5.556395798056849e-06, "loss": 2.9803, "step": 1361500 }, { "epoch": 11.16, "learning_rate": 5.554751845336929e-06, "loss": 2.9784, "step": 1362000 }, { "epoch": 11.16, "learning_rate": 5.553107892617009e-06, "loss": 2.9759, "step": 1362500 }, { "epoch": 11.17, "learning_rate": 5.551463939897089e-06, "loss": 2.9823, "step": 1363000 }, { "epoch": 11.17, "learning_rate": 5.54981998717717e-06, "loss": 2.9767, "step": 1363500 }, { "epoch": 11.18, "learning_rate": 5.54817603445725e-06, "loss": 2.978, "step": 1364000 }, { "epoch": 11.18, "learning_rate": 5.5465320817373294e-06, "loss": 2.9843, "step": 1364500 }, { "epoch": 11.18, "learning_rate": 5.5448881290174096e-06, "loss": 2.9708, "step": 1365000 }, { "epoch": 11.19, "learning_rate": 5.54324417629749e-06, "loss": 2.9756, "step": 1365500 }, { "epoch": 11.19, "learning_rate": 5.54160022357757e-06, "loss": 2.973, "step": 1366000 }, { "epoch": 11.2, "learning_rate": 5.539956270857651e-06, "loss": 2.981, "step": 1366500 }, { "epoch": 11.2, "learning_rate": 5.538312318137731e-06, "loss": 2.971, "step": 1367000 }, { "epoch": 11.2, "learning_rate": 5.536668365417811e-06, "loss": 2.9819, "step": 1367500 }, { "epoch": 11.21, "learning_rate": 5.535024412697891e-06, "loss": 2.975, "step": 1368000 }, { "epoch": 11.21, "learning_rate": 5.533380459977971e-06, "loss": 2.9697, "step": 1368500 }, { "epoch": 11.22, "learning_rate": 5.531736507258052e-06, "loss": 2.9745, "step": 1369000 }, { "epoch": 11.22, "learning_rate": 5.530092554538132e-06, "loss": 2.9812, "step": 1369500 }, { "epoch": 11.22, "learning_rate": 5.5284486018182125e-06, "loss": 2.9816, "step": 1370000 }, { "epoch": 11.23, "learning_rate": 5.526804649098292e-06, "loss": 2.9712, "step": 1370500 }, { "epoch": 11.23, "learning_rate": 5.525160696378372e-06, "loss": 2.9771, "step": 1371000 }, { "epoch": 11.24, "learning_rate": 5.523516743658452e-06, "loss": 2.9747, "step": 1371500 }, { "epoch": 11.24, "learning_rate": 5.521872790938533e-06, "loss": 2.9702, "step": 1372000 }, { "epoch": 11.24, "learning_rate": 5.520228838218613e-06, "loss": 2.9757, "step": 1372500 }, { "epoch": 11.25, "learning_rate": 5.518584885498693e-06, "loss": 2.9752, "step": 1373000 }, { "epoch": 11.25, "learning_rate": 5.516940932778773e-06, "loss": 2.9699, "step": 1373500 }, { "epoch": 11.26, "learning_rate": 5.515296980058854e-06, "loss": 2.9753, "step": 1374000 }, { "epoch": 11.26, "learning_rate": 5.5136530273389345e-06, "loss": 2.9799, "step": 1374500 }, { "epoch": 11.27, "learning_rate": 5.512009074619015e-06, "loss": 2.9813, "step": 1375000 }, { "epoch": 11.27, "learning_rate": 5.510365121899095e-06, "loss": 2.9736, "step": 1375500 }, { "epoch": 11.27, "learning_rate": 5.508721169179175e-06, "loss": 2.9802, "step": 1376000 }, { "epoch": 11.28, "learning_rate": 5.507077216459256e-06, "loss": 2.9686, "step": 1376500 }, { "epoch": 11.28, "learning_rate": 5.505433263739335e-06, "loss": 2.9786, "step": 1377000 }, { "epoch": 11.29, "learning_rate": 5.503789311019415e-06, "loss": 2.9808, "step": 1377500 }, { "epoch": 11.29, "learning_rate": 5.502145358299495e-06, "loss": 2.9748, "step": 1378000 }, { "epoch": 11.29, "learning_rate": 5.5005014055795755e-06, "loss": 2.9771, "step": 1378500 }, { "epoch": 11.3, "learning_rate": 5.4988574528596565e-06, "loss": 2.9765, "step": 1379000 }, { "epoch": 11.3, "learning_rate": 5.497213500139737e-06, "loss": 2.9745, "step": 1379500 }, { "epoch": 11.31, "learning_rate": 5.495569547419817e-06, "loss": 2.9709, "step": 1380000 }, { "epoch": 11.31, "eval_accuracy": 0.4926664563281789, "eval_loss": 2.8374874591827393, "eval_runtime": 405.084, "eval_samples_per_second": 761.215, "eval_steps_per_second": 15.861, "step": 1380000 }, { "epoch": 11.31, "learning_rate": 5.493925594699897e-06, "loss": 2.9771, "step": 1380500 }, { "epoch": 11.31, "learning_rate": 5.492281641979977e-06, "loss": 2.9832, "step": 1381000 }, { "epoch": 11.32, "learning_rate": 5.490637689260058e-06, "loss": 2.9741, "step": 1381500 }, { "epoch": 11.32, "learning_rate": 5.488993736540138e-06, "loss": 2.9815, "step": 1382000 }, { "epoch": 11.33, "learning_rate": 5.487349783820218e-06, "loss": 2.9758, "step": 1382500 }, { "epoch": 11.33, "learning_rate": 5.4857058311002975e-06, "loss": 2.9791, "step": 1383000 }, { "epoch": 11.33, "learning_rate": 5.484061878380378e-06, "loss": 2.9798, "step": 1383500 }, { "epoch": 11.34, "learning_rate": 5.482417925660458e-06, "loss": 2.9725, "step": 1384000 }, { "epoch": 11.34, "learning_rate": 5.480773972940539e-06, "loss": 2.975, "step": 1384500 }, { "epoch": 11.35, "learning_rate": 5.479130020220619e-06, "loss": 2.9717, "step": 1385000 }, { "epoch": 11.35, "learning_rate": 5.477486067500699e-06, "loss": 2.9701, "step": 1385500 }, { "epoch": 11.36, "learning_rate": 5.475842114780779e-06, "loss": 2.9725, "step": 1386000 }, { "epoch": 11.36, "learning_rate": 5.47419816206086e-06, "loss": 2.9766, "step": 1386500 }, { "epoch": 11.36, "learning_rate": 5.47255420934094e-06, "loss": 2.9762, "step": 1387000 }, { "epoch": 11.37, "learning_rate": 5.47091025662102e-06, "loss": 2.968, "step": 1387500 }, { "epoch": 11.37, "learning_rate": 5.4692663039011005e-06, "loss": 2.9754, "step": 1388000 }, { "epoch": 11.38, "learning_rate": 5.467622351181181e-06, "loss": 2.9663, "step": 1388500 }, { "epoch": 11.38, "learning_rate": 5.46597839846126e-06, "loss": 2.973, "step": 1389000 }, { "epoch": 11.38, "learning_rate": 5.464334445741341e-06, "loss": 2.9739, "step": 1389500 }, { "epoch": 11.39, "learning_rate": 5.462690493021421e-06, "loss": 2.9662, "step": 1390000 }, { "epoch": 11.39, "learning_rate": 5.461046540301501e-06, "loss": 2.9719, "step": 1390500 }, { "epoch": 11.4, "learning_rate": 5.459402587581581e-06, "loss": 2.9817, "step": 1391000 }, { "epoch": 11.4, "learning_rate": 5.457758634861662e-06, "loss": 2.9718, "step": 1391500 }, { "epoch": 11.4, "learning_rate": 5.456114682141742e-06, "loss": 2.9796, "step": 1392000 }, { "epoch": 11.41, "learning_rate": 5.4544707294218225e-06, "loss": 2.972, "step": 1392500 }, { "epoch": 11.41, "learning_rate": 5.452826776701903e-06, "loss": 2.9755, "step": 1393000 }, { "epoch": 11.42, "learning_rate": 5.451182823981983e-06, "loss": 2.9731, "step": 1393500 }, { "epoch": 11.42, "learning_rate": 5.449538871262064e-06, "loss": 2.9719, "step": 1394000 }, { "epoch": 11.42, "learning_rate": 5.447894918542144e-06, "loss": 2.9669, "step": 1394500 }, { "epoch": 11.43, "learning_rate": 5.446250965822223e-06, "loss": 2.974, "step": 1395000 }, { "epoch": 11.43, "learning_rate": 5.444607013102303e-06, "loss": 2.9758, "step": 1395500 }, { "epoch": 11.44, "learning_rate": 5.442963060382383e-06, "loss": 2.9695, "step": 1396000 }, { "epoch": 11.44, "learning_rate": 5.4413191076624635e-06, "loss": 2.9685, "step": 1396500 }, { "epoch": 11.45, "learning_rate": 5.4396751549425444e-06, "loss": 2.9747, "step": 1397000 }, { "epoch": 11.45, "learning_rate": 5.4380312022226246e-06, "loss": 2.973, "step": 1397500 }, { "epoch": 11.45, "learning_rate": 5.436387249502705e-06, "loss": 2.9753, "step": 1398000 }, { "epoch": 11.46, "learning_rate": 5.434743296782785e-06, "loss": 2.9692, "step": 1398500 }, { "epoch": 11.46, "learning_rate": 5.433099344062866e-06, "loss": 2.9772, "step": 1399000 }, { "epoch": 11.47, "learning_rate": 5.431455391342946e-06, "loss": 2.9683, "step": 1399500 }, { "epoch": 11.47, "learning_rate": 5.429811438623026e-06, "loss": 2.9707, "step": 1400000 }, { "epoch": 11.47, "learning_rate": 5.428167485903106e-06, "loss": 2.9687, "step": 1400500 }, { "epoch": 11.48, "learning_rate": 5.4265235331831855e-06, "loss": 2.9696, "step": 1401000 }, { "epoch": 11.48, "learning_rate": 5.424879580463266e-06, "loss": 2.9797, "step": 1401500 }, { "epoch": 11.49, "learning_rate": 5.4232356277433466e-06, "loss": 2.9733, "step": 1402000 }, { "epoch": 11.49, "learning_rate": 5.421591675023427e-06, "loss": 2.9671, "step": 1402500 }, { "epoch": 11.49, "learning_rate": 5.419947722303507e-06, "loss": 2.978, "step": 1403000 }, { "epoch": 11.5, "learning_rate": 5.418303769583587e-06, "loss": 2.9727, "step": 1403500 }, { "epoch": 11.5, "learning_rate": 5.416659816863667e-06, "loss": 2.9745, "step": 1404000 }, { "epoch": 11.51, "learning_rate": 5.415015864143748e-06, "loss": 2.9711, "step": 1404500 }, { "epoch": 11.51, "learning_rate": 5.413371911423828e-06, "loss": 2.9749, "step": 1405000 }, { "epoch": 11.52, "learning_rate": 5.411727958703908e-06, "loss": 2.972, "step": 1405500 }, { "epoch": 11.52, "learning_rate": 5.410084005983988e-06, "loss": 2.9717, "step": 1406000 }, { "epoch": 11.52, "learning_rate": 5.408440053264069e-06, "loss": 2.9745, "step": 1406500 }, { "epoch": 11.53, "learning_rate": 5.406796100544149e-06, "loss": 2.9727, "step": 1407000 }, { "epoch": 11.53, "learning_rate": 5.405152147824229e-06, "loss": 2.9724, "step": 1407500 }, { "epoch": 11.54, "learning_rate": 5.403508195104309e-06, "loss": 2.9654, "step": 1408000 }, { "epoch": 11.54, "learning_rate": 5.401864242384389e-06, "loss": 2.9749, "step": 1408500 }, { "epoch": 11.54, "learning_rate": 5.400220289664469e-06, "loss": 2.971, "step": 1409000 }, { "epoch": 11.55, "learning_rate": 5.39857633694455e-06, "loss": 2.9723, "step": 1409500 }, { "epoch": 11.55, "learning_rate": 5.39693238422463e-06, "loss": 2.9695, "step": 1410000 }, { "epoch": 11.55, "eval_accuracy": 0.49317043725147325, "eval_loss": 2.835310459136963, "eval_runtime": 406.1684, "eval_samples_per_second": 759.183, "eval_steps_per_second": 15.819, "step": 1410000 }, { "epoch": 11.56, "learning_rate": 5.39528843150471e-06, "loss": 2.9687, "step": 1410500 }, { "epoch": 11.56, "learning_rate": 5.3936444787847905e-06, "loss": 2.9696, "step": 1411000 }, { "epoch": 11.56, "learning_rate": 5.3920005260648715e-06, "loss": 2.9737, "step": 1411500 }, { "epoch": 11.57, "learning_rate": 5.390356573344952e-06, "loss": 2.9715, "step": 1412000 }, { "epoch": 11.57, "learning_rate": 5.388712620625032e-06, "loss": 2.971, "step": 1412500 }, { "epoch": 11.58, "learning_rate": 5.387068667905111e-06, "loss": 2.9643, "step": 1413000 }, { "epoch": 11.58, "learning_rate": 5.385424715185191e-06, "loss": 2.9683, "step": 1413500 }, { "epoch": 11.58, "learning_rate": 5.383780762465271e-06, "loss": 2.9697, "step": 1414000 }, { "epoch": 11.59, "learning_rate": 5.382136809745352e-06, "loss": 2.9724, "step": 1414500 }, { "epoch": 11.59, "learning_rate": 5.380492857025432e-06, "loss": 2.9723, "step": 1415000 }, { "epoch": 11.6, "learning_rate": 5.3788489043055125e-06, "loss": 2.9665, "step": 1415500 }, { "epoch": 11.6, "learning_rate": 5.377204951585593e-06, "loss": 2.9792, "step": 1416000 }, { "epoch": 11.61, "learning_rate": 5.375560998865673e-06, "loss": 2.9728, "step": 1416500 }, { "epoch": 11.61, "learning_rate": 5.373917046145754e-06, "loss": 2.9705, "step": 1417000 }, { "epoch": 11.61, "learning_rate": 5.372273093425834e-06, "loss": 2.9694, "step": 1417500 }, { "epoch": 11.62, "learning_rate": 5.370629140705914e-06, "loss": 2.9662, "step": 1418000 }, { "epoch": 11.62, "learning_rate": 5.368985187985994e-06, "loss": 2.9696, "step": 1418500 }, { "epoch": 11.63, "learning_rate": 5.367341235266073e-06, "loss": 2.9668, "step": 1419000 }, { "epoch": 11.63, "learning_rate": 5.365697282546154e-06, "loss": 2.9754, "step": 1419500 }, { "epoch": 11.63, "learning_rate": 5.3640533298262345e-06, "loss": 2.9687, "step": 1420000 }, { "epoch": 11.64, "learning_rate": 5.362409377106315e-06, "loss": 2.9657, "step": 1420500 }, { "epoch": 11.64, "learning_rate": 5.360765424386395e-06, "loss": 2.9687, "step": 1421000 }, { "epoch": 11.65, "learning_rate": 5.359121471666475e-06, "loss": 2.963, "step": 1421500 }, { "epoch": 11.65, "learning_rate": 5.357477518946556e-06, "loss": 2.9688, "step": 1422000 }, { "epoch": 11.65, "learning_rate": 5.355833566226636e-06, "loss": 2.9654, "step": 1422500 }, { "epoch": 11.66, "learning_rate": 5.354189613506716e-06, "loss": 2.973, "step": 1423000 }, { "epoch": 11.66, "learning_rate": 5.352545660786796e-06, "loss": 2.9717, "step": 1423500 }, { "epoch": 11.67, "learning_rate": 5.350901708066877e-06, "loss": 2.9654, "step": 1424000 }, { "epoch": 11.67, "learning_rate": 5.349257755346957e-06, "loss": 2.96, "step": 1424500 }, { "epoch": 11.67, "learning_rate": 5.347613802627037e-06, "loss": 2.9653, "step": 1425000 }, { "epoch": 11.68, "learning_rate": 5.345969849907117e-06, "loss": 2.9667, "step": 1425500 }, { "epoch": 11.68, "learning_rate": 5.344325897187197e-06, "loss": 2.9697, "step": 1426000 }, { "epoch": 11.69, "learning_rate": 5.342681944467277e-06, "loss": 2.9725, "step": 1426500 }, { "epoch": 11.69, "learning_rate": 5.341037991747358e-06, "loss": 2.9673, "step": 1427000 }, { "epoch": 11.7, "learning_rate": 5.339394039027438e-06, "loss": 2.9679, "step": 1427500 }, { "epoch": 11.7, "learning_rate": 5.337750086307518e-06, "loss": 2.9735, "step": 1428000 }, { "epoch": 11.7, "learning_rate": 5.336106133587598e-06, "loss": 2.9692, "step": 1428500 }, { "epoch": 11.71, "learning_rate": 5.3344621808676785e-06, "loss": 2.9707, "step": 1429000 }, { "epoch": 11.71, "learning_rate": 5.3328182281477594e-06, "loss": 2.9742, "step": 1429500 }, { "epoch": 11.72, "learning_rate": 5.3311742754278396e-06, "loss": 2.9593, "step": 1430000 }, { "epoch": 11.72, "learning_rate": 5.32953032270792e-06, "loss": 2.9698, "step": 1430500 }, { "epoch": 11.72, "learning_rate": 5.327886369988e-06, "loss": 2.9685, "step": 1431000 }, { "epoch": 11.73, "learning_rate": 5.326242417268079e-06, "loss": 2.9763, "step": 1431500 }, { "epoch": 11.73, "learning_rate": 5.324598464548159e-06, "loss": 2.9721, "step": 1432000 }, { "epoch": 11.74, "learning_rate": 5.32295451182824e-06, "loss": 2.9659, "step": 1432500 }, { "epoch": 11.74, "learning_rate": 5.32131055910832e-06, "loss": 2.9669, "step": 1433000 }, { "epoch": 11.74, "learning_rate": 5.3196666063884005e-06, "loss": 2.9627, "step": 1433500 }, { "epoch": 11.75, "learning_rate": 5.318022653668481e-06, "loss": 2.9696, "step": 1434000 }, { "epoch": 11.75, "learning_rate": 5.3163787009485616e-06, "loss": 2.972, "step": 1434500 }, { "epoch": 11.76, "learning_rate": 5.314734748228642e-06, "loss": 2.9714, "step": 1435000 }, { "epoch": 11.76, "learning_rate": 5.313090795508722e-06, "loss": 2.9683, "step": 1435500 }, { "epoch": 11.76, "learning_rate": 5.311446842788802e-06, "loss": 2.9655, "step": 1436000 }, { "epoch": 11.77, "learning_rate": 5.309802890068883e-06, "loss": 2.9673, "step": 1436500 }, { "epoch": 11.77, "learning_rate": 5.308158937348963e-06, "loss": 2.9678, "step": 1437000 }, { "epoch": 11.78, "learning_rate": 5.306514984629042e-06, "loss": 2.969, "step": 1437500 }, { "epoch": 11.78, "learning_rate": 5.3048710319091224e-06, "loss": 2.9631, "step": 1438000 }, { "epoch": 11.79, "learning_rate": 5.3032270791892026e-06, "loss": 2.9681, "step": 1438500 }, { "epoch": 11.79, "learning_rate": 5.301583126469283e-06, "loss": 2.9658, "step": 1439000 }, { "epoch": 11.79, "learning_rate": 5.299939173749364e-06, "loss": 2.9621, "step": 1439500 }, { "epoch": 11.8, "learning_rate": 5.298295221029444e-06, "loss": 2.9607, "step": 1440000 }, { "epoch": 11.8, "eval_accuracy": 0.4940959315942787, "eval_loss": 2.829009771347046, "eval_runtime": 405.0153, "eval_samples_per_second": 761.344, "eval_steps_per_second": 15.864, "step": 1440000 }, { "epoch": 11.8, "learning_rate": 5.296651268309524e-06, "loss": 2.9657, "step": 1440500 }, { "epoch": 11.81, "learning_rate": 5.295007315589604e-06, "loss": 2.9663, "step": 1441000 }, { "epoch": 11.81, "learning_rate": 5.293363362869684e-06, "loss": 2.9679, "step": 1441500 }, { "epoch": 11.81, "learning_rate": 5.291719410149765e-06, "loss": 2.9643, "step": 1442000 }, { "epoch": 11.82, "learning_rate": 5.290075457429845e-06, "loss": 2.975, "step": 1442500 }, { "epoch": 11.82, "learning_rate": 5.288431504709925e-06, "loss": 2.963, "step": 1443000 }, { "epoch": 11.83, "learning_rate": 5.286787551990005e-06, "loss": 2.9608, "step": 1443500 }, { "epoch": 11.83, "learning_rate": 5.285143599270085e-06, "loss": 2.962, "step": 1444000 }, { "epoch": 11.83, "learning_rate": 5.283499646550165e-06, "loss": 2.9687, "step": 1444500 }, { "epoch": 11.84, "learning_rate": 5.281855693830246e-06, "loss": 2.9767, "step": 1445000 }, { "epoch": 11.84, "learning_rate": 5.280211741110326e-06, "loss": 2.9664, "step": 1445500 }, { "epoch": 11.85, "learning_rate": 5.278567788390406e-06, "loss": 2.9695, "step": 1446000 }, { "epoch": 11.85, "learning_rate": 5.276923835670486e-06, "loss": 2.9689, "step": 1446500 }, { "epoch": 11.86, "learning_rate": 5.275279882950567e-06, "loss": 2.968, "step": 1447000 }, { "epoch": 11.86, "learning_rate": 5.273635930230647e-06, "loss": 2.9666, "step": 1447500 }, { "epoch": 11.86, "learning_rate": 5.2719919775107275e-06, "loss": 2.9656, "step": 1448000 }, { "epoch": 11.87, "learning_rate": 5.270348024790808e-06, "loss": 2.9618, "step": 1448500 }, { "epoch": 11.87, "learning_rate": 5.268704072070888e-06, "loss": 2.9627, "step": 1449000 }, { "epoch": 11.88, "learning_rate": 5.267060119350967e-06, "loss": 2.9654, "step": 1449500 }, { "epoch": 11.88, "learning_rate": 5.265416166631048e-06, "loss": 2.974, "step": 1450000 }, { "epoch": 11.88, "learning_rate": 5.263772213911128e-06, "loss": 2.9723, "step": 1450500 }, { "epoch": 11.89, "learning_rate": 5.262128261191208e-06, "loss": 2.9629, "step": 1451000 }, { "epoch": 11.89, "learning_rate": 5.260484308471288e-06, "loss": 2.9679, "step": 1451500 }, { "epoch": 11.9, "learning_rate": 5.258840355751369e-06, "loss": 2.9637, "step": 1452000 }, { "epoch": 11.9, "learning_rate": 5.2571964030314495e-06, "loss": 2.9677, "step": 1452500 }, { "epoch": 11.9, "learning_rate": 5.25555245031153e-06, "loss": 2.9707, "step": 1453000 }, { "epoch": 11.91, "learning_rate": 5.25390849759161e-06, "loss": 2.9706, "step": 1453500 }, { "epoch": 11.91, "learning_rate": 5.25226454487169e-06, "loss": 2.9627, "step": 1454000 }, { "epoch": 11.92, "learning_rate": 5.250620592151771e-06, "loss": 2.9631, "step": 1454500 }, { "epoch": 11.92, "learning_rate": 5.248976639431851e-06, "loss": 2.9652, "step": 1455000 }, { "epoch": 11.92, "learning_rate": 5.24733268671193e-06, "loss": 2.9613, "step": 1455500 }, { "epoch": 11.93, "learning_rate": 5.24568873399201e-06, "loss": 2.9683, "step": 1456000 }, { "epoch": 11.93, "learning_rate": 5.2440447812720905e-06, "loss": 2.954, "step": 1456500 }, { "epoch": 11.94, "learning_rate": 5.242400828552171e-06, "loss": 2.9646, "step": 1457000 }, { "epoch": 11.94, "learning_rate": 5.240756875832252e-06, "loss": 2.9694, "step": 1457500 }, { "epoch": 11.95, "learning_rate": 5.239112923112332e-06, "loss": 2.9651, "step": 1458000 }, { "epoch": 11.95, "learning_rate": 5.237468970392412e-06, "loss": 2.9675, "step": 1458500 }, { "epoch": 11.95, "learning_rate": 5.235825017672492e-06, "loss": 2.967, "step": 1459000 }, { "epoch": 11.96, "learning_rate": 5.234181064952573e-06, "loss": 2.9687, "step": 1459500 }, { "epoch": 11.96, "learning_rate": 5.232537112232653e-06, "loss": 2.9624, "step": 1460000 }, { "epoch": 11.97, "learning_rate": 5.230893159512733e-06, "loss": 2.9649, "step": 1460500 }, { "epoch": 11.97, "learning_rate": 5.229249206792813e-06, "loss": 2.9656, "step": 1461000 }, { "epoch": 11.97, "learning_rate": 5.227605254072893e-06, "loss": 2.9581, "step": 1461500 }, { "epoch": 11.98, "learning_rate": 5.225961301352973e-06, "loss": 2.964, "step": 1462000 }, { "epoch": 11.98, "learning_rate": 5.224317348633054e-06, "loss": 2.958, "step": 1462500 }, { "epoch": 11.99, "learning_rate": 5.222673395913134e-06, "loss": 2.9638, "step": 1463000 }, { "epoch": 11.99, "learning_rate": 5.221029443193214e-06, "loss": 2.9679, "step": 1463500 }, { "epoch": 11.99, "learning_rate": 5.219385490473294e-06, "loss": 2.9585, "step": 1464000 }, { "epoch": 12.0, "learning_rate": 5.217741537753375e-06, "loss": 2.9664, "step": 1464500 }, { "epoch": 12.0, "learning_rate": 5.216097585033455e-06, "loss": 2.9604, "step": 1465000 }, { "epoch": 12.01, "learning_rate": 5.214453632313535e-06, "loss": 2.9489, "step": 1465500 }, { "epoch": 12.01, "learning_rate": 5.2128096795936155e-06, "loss": 2.9678, "step": 1466000 }, { "epoch": 12.01, "learning_rate": 5.211165726873696e-06, "loss": 2.9626, "step": 1466500 }, { "epoch": 12.02, "learning_rate": 5.2095217741537766e-06, "loss": 2.9567, "step": 1467000 }, { "epoch": 12.02, "learning_rate": 5.207877821433856e-06, "loss": 2.9636, "step": 1467500 }, { "epoch": 12.03, "learning_rate": 5.206233868713936e-06, "loss": 2.9654, "step": 1468000 }, { "epoch": 12.03, "learning_rate": 5.204589915994016e-06, "loss": 2.9617, "step": 1468500 }, { "epoch": 12.04, "learning_rate": 5.202945963274096e-06, "loss": 2.959, "step": 1469000 }, { "epoch": 12.04, "learning_rate": 5.201302010554176e-06, "loss": 2.9599, "step": 1469500 }, { "epoch": 12.04, "learning_rate": 5.199658057834257e-06, "loss": 2.9636, "step": 1470000 }, { "epoch": 12.04, "eval_accuracy": 0.494423367210693, "eval_loss": 2.8266873359680176, "eval_runtime": 407.8041, "eval_samples_per_second": 756.138, "eval_steps_per_second": 15.755, "step": 1470000 }, { "epoch": 12.05, "learning_rate": 5.1980141051143374e-06, "loss": 2.9612, "step": 1470500 }, { "epoch": 12.05, "learning_rate": 5.1963701523944176e-06, "loss": 2.9556, "step": 1471000 }, { "epoch": 12.06, "learning_rate": 5.194726199674498e-06, "loss": 2.9642, "step": 1471500 }, { "epoch": 12.06, "learning_rate": 5.193082246954579e-06, "loss": 2.9593, "step": 1472000 }, { "epoch": 12.06, "learning_rate": 5.191438294234659e-06, "loss": 2.9578, "step": 1472500 }, { "epoch": 12.07, "learning_rate": 5.189794341514739e-06, "loss": 2.962, "step": 1473000 }, { "epoch": 12.07, "learning_rate": 5.188150388794818e-06, "loss": 2.9613, "step": 1473500 }, { "epoch": 12.08, "learning_rate": 5.186506436074898e-06, "loss": 2.96, "step": 1474000 }, { "epoch": 12.08, "learning_rate": 5.1848624833549785e-06, "loss": 2.9622, "step": 1474500 }, { "epoch": 12.08, "learning_rate": 5.1832185306350594e-06, "loss": 2.9623, "step": 1475000 }, { "epoch": 12.09, "learning_rate": 5.1815745779151396e-06, "loss": 2.9627, "step": 1475500 }, { "epoch": 12.09, "learning_rate": 5.17993062519522e-06, "loss": 2.9649, "step": 1476000 }, { "epoch": 12.1, "learning_rate": 5.1782866724753e-06, "loss": 2.9676, "step": 1476500 }, { "epoch": 12.1, "learning_rate": 5.17664271975538e-06, "loss": 2.9661, "step": 1477000 }, { "epoch": 12.1, "learning_rate": 5.174998767035461e-06, "loss": 2.9663, "step": 1477500 }, { "epoch": 12.11, "learning_rate": 5.173354814315541e-06, "loss": 2.9647, "step": 1478000 }, { "epoch": 12.11, "learning_rate": 5.171710861595621e-06, "loss": 2.9625, "step": 1478500 }, { "epoch": 12.12, "learning_rate": 5.170066908875701e-06, "loss": 2.9613, "step": 1479000 }, { "epoch": 12.12, "learning_rate": 5.168422956155782e-06, "loss": 2.9616, "step": 1479500 }, { "epoch": 12.13, "learning_rate": 5.1667790034358615e-06, "loss": 2.9624, "step": 1480000 }, { "epoch": 12.13, "learning_rate": 5.165135050715942e-06, "loss": 2.9611, "step": 1480500 }, { "epoch": 12.13, "learning_rate": 5.163491097996022e-06, "loss": 2.9657, "step": 1481000 }, { "epoch": 12.14, "learning_rate": 5.161847145276102e-06, "loss": 2.9649, "step": 1481500 }, { "epoch": 12.14, "learning_rate": 5.160203192556182e-06, "loss": 2.9588, "step": 1482000 }, { "epoch": 12.15, "learning_rate": 5.158559239836263e-06, "loss": 2.9646, "step": 1482500 }, { "epoch": 12.15, "learning_rate": 5.156915287116343e-06, "loss": 2.968, "step": 1483000 }, { "epoch": 12.15, "learning_rate": 5.155271334396423e-06, "loss": 2.9627, "step": 1483500 }, { "epoch": 12.16, "learning_rate": 5.153627381676503e-06, "loss": 2.9611, "step": 1484000 }, { "epoch": 12.16, "learning_rate": 5.151983428956584e-06, "loss": 2.9668, "step": 1484500 }, { "epoch": 12.17, "learning_rate": 5.1503394762366645e-06, "loss": 2.954, "step": 1485000 }, { "epoch": 12.17, "learning_rate": 5.148695523516745e-06, "loss": 2.963, "step": 1485500 }, { "epoch": 12.17, "learning_rate": 5.147051570796824e-06, "loss": 2.9599, "step": 1486000 }, { "epoch": 12.18, "learning_rate": 5.145407618076904e-06, "loss": 2.9564, "step": 1486500 }, { "epoch": 12.18, "learning_rate": 5.143763665356984e-06, "loss": 2.9578, "step": 1487000 }, { "epoch": 12.19, "learning_rate": 5.142119712637065e-06, "loss": 2.9657, "step": 1487500 }, { "epoch": 12.19, "learning_rate": 5.140475759917145e-06, "loss": 2.9635, "step": 1488000 }, { "epoch": 12.2, "learning_rate": 5.138831807197225e-06, "loss": 2.966, "step": 1488500 }, { "epoch": 12.2, "learning_rate": 5.1371878544773055e-06, "loss": 2.96, "step": 1489000 }, { "epoch": 12.2, "learning_rate": 5.135543901757386e-06, "loss": 2.9557, "step": 1489500 }, { "epoch": 12.21, "learning_rate": 5.133899949037467e-06, "loss": 2.96, "step": 1490000 }, { "epoch": 12.21, "learning_rate": 5.132255996317547e-06, "loss": 2.967, "step": 1490500 }, { "epoch": 12.22, "learning_rate": 5.130612043597627e-06, "loss": 2.9576, "step": 1491000 }, { "epoch": 12.22, "learning_rate": 5.128968090877707e-06, "loss": 2.9589, "step": 1491500 }, { "epoch": 12.22, "learning_rate": 5.127324138157786e-06, "loss": 2.9561, "step": 1492000 }, { "epoch": 12.23, "learning_rate": 5.125680185437867e-06, "loss": 2.9592, "step": 1492500 }, { "epoch": 12.23, "learning_rate": 5.124036232717947e-06, "loss": 2.9611, "step": 1493000 }, { "epoch": 12.24, "learning_rate": 5.1223922799980275e-06, "loss": 2.9568, "step": 1493500 }, { "epoch": 12.24, "learning_rate": 5.120748327278108e-06, "loss": 2.96, "step": 1494000 }, { "epoch": 12.24, "learning_rate": 5.119104374558188e-06, "loss": 2.961, "step": 1494500 }, { "epoch": 12.25, "learning_rate": 5.117460421838269e-06, "loss": 2.9604, "step": 1495000 }, { "epoch": 12.25, "learning_rate": 5.115816469118349e-06, "loss": 2.9559, "step": 1495500 }, { "epoch": 12.26, "learning_rate": 5.114172516398429e-06, "loss": 2.9636, "step": 1496000 }, { "epoch": 12.26, "learning_rate": 5.112528563678509e-06, "loss": 2.9589, "step": 1496500 }, { "epoch": 12.26, "learning_rate": 5.11088461095859e-06, "loss": 2.9592, "step": 1497000 }, { "epoch": 12.27, "learning_rate": 5.10924065823867e-06, "loss": 2.9643, "step": 1497500 }, { "epoch": 12.27, "learning_rate": 5.1075967055187495e-06, "loss": 2.9587, "step": 1498000 }, { "epoch": 12.28, "learning_rate": 5.10595275279883e-06, "loss": 2.9601, "step": 1498500 }, { "epoch": 12.28, "learning_rate": 5.10430880007891e-06, "loss": 2.9574, "step": 1499000 }, { "epoch": 12.29, "learning_rate": 5.10266484735899e-06, "loss": 2.9626, "step": 1499500 }, { "epoch": 12.29, "learning_rate": 5.101020894639071e-06, "loss": 2.9584, "step": 1500000 }, { "epoch": 12.29, "eval_accuracy": 0.49460218852402815, "eval_loss": 2.8247358798980713, "eval_runtime": 404.5149, "eval_samples_per_second": 762.286, "eval_steps_per_second": 15.883, "step": 1500000 }, { "epoch": 12.29, "learning_rate": 5.099376941919151e-06, "loss": 2.9567, "step": 1500500 }, { "epoch": 12.3, "learning_rate": 5.097732989199231e-06, "loss": 2.9534, "step": 1501000 }, { "epoch": 12.3, "learning_rate": 5.096089036479311e-06, "loss": 2.9594, "step": 1501500 }, { "epoch": 12.31, "learning_rate": 5.094445083759391e-06, "loss": 2.9505, "step": 1502000 }, { "epoch": 12.31, "learning_rate": 5.092801131039472e-06, "loss": 2.9665, "step": 1502500 }, { "epoch": 12.31, "learning_rate": 5.0911571783195524e-06, "loss": 2.9554, "step": 1503000 }, { "epoch": 12.32, "learning_rate": 5.0895132255996326e-06, "loss": 2.9596, "step": 1503500 }, { "epoch": 12.32, "learning_rate": 5.087869272879712e-06, "loss": 2.9574, "step": 1504000 }, { "epoch": 12.33, "learning_rate": 5.086225320159792e-06, "loss": 2.9596, "step": 1504500 }, { "epoch": 12.33, "learning_rate": 5.084581367439872e-06, "loss": 2.9548, "step": 1505000 }, { "epoch": 12.33, "learning_rate": 5.082937414719953e-06, "loss": 2.9619, "step": 1505500 }, { "epoch": 12.34, "learning_rate": 5.081293462000033e-06, "loss": 2.9556, "step": 1506000 }, { "epoch": 12.34, "learning_rate": 5.079649509280113e-06, "loss": 2.9447, "step": 1506500 }, { "epoch": 12.35, "learning_rate": 5.0780055565601935e-06, "loss": 2.951, "step": 1507000 }, { "epoch": 12.35, "learning_rate": 5.0763616038402744e-06, "loss": 2.9613, "step": 1507500 }, { "epoch": 12.35, "learning_rate": 5.0747176511203546e-06, "loss": 2.9594, "step": 1508000 }, { "epoch": 12.36, "learning_rate": 5.073073698400435e-06, "loss": 2.9587, "step": 1508500 }, { "epoch": 12.36, "learning_rate": 5.071429745680515e-06, "loss": 2.9508, "step": 1509000 }, { "epoch": 12.37, "learning_rate": 5.069785792960596e-06, "loss": 2.9505, "step": 1509500 }, { "epoch": 12.37, "learning_rate": 5.068141840240674e-06, "loss": 2.9547, "step": 1510000 }, { "epoch": 12.38, "learning_rate": 5.066497887520755e-06, "loss": 2.9588, "step": 1510500 }, { "epoch": 12.38, "learning_rate": 5.064853934800835e-06, "loss": 2.9564, "step": 1511000 }, { "epoch": 12.38, "learning_rate": 5.0632099820809154e-06, "loss": 2.953, "step": 1511500 }, { "epoch": 12.39, "learning_rate": 5.0615660293609956e-06, "loss": 2.964, "step": 1512000 }, { "epoch": 12.39, "learning_rate": 5.0599220766410765e-06, "loss": 2.9576, "step": 1512500 }, { "epoch": 12.4, "learning_rate": 5.058278123921157e-06, "loss": 2.9596, "step": 1513000 }, { "epoch": 12.4, "learning_rate": 5.056634171201237e-06, "loss": 2.9625, "step": 1513500 }, { "epoch": 12.4, "learning_rate": 5.054990218481317e-06, "loss": 2.9526, "step": 1514000 }, { "epoch": 12.41, "learning_rate": 5.053346265761397e-06, "loss": 2.957, "step": 1514500 }, { "epoch": 12.41, "learning_rate": 5.051702313041478e-06, "loss": 2.9643, "step": 1515000 }, { "epoch": 12.42, "learning_rate": 5.050058360321558e-06, "loss": 2.9526, "step": 1515500 }, { "epoch": 12.42, "learning_rate": 5.048414407601637e-06, "loss": 2.9535, "step": 1516000 }, { "epoch": 12.42, "learning_rate": 5.0467704548817175e-06, "loss": 2.9598, "step": 1516500 }, { "epoch": 12.43, "learning_rate": 5.045126502161798e-06, "loss": 2.953, "step": 1517000 }, { "epoch": 12.43, "learning_rate": 5.043482549441878e-06, "loss": 2.9571, "step": 1517500 }, { "epoch": 12.44, "learning_rate": 5.041838596721959e-06, "loss": 2.9583, "step": 1518000 }, { "epoch": 12.44, "learning_rate": 5.040194644002039e-06, "loss": 2.9615, "step": 1518500 }, { "epoch": 12.44, "learning_rate": 5.038550691282119e-06, "loss": 2.9541, "step": 1519000 }, { "epoch": 12.45, "learning_rate": 5.036906738562199e-06, "loss": 2.9595, "step": 1519500 }, { "epoch": 12.45, "learning_rate": 5.03526278584228e-06, "loss": 2.9549, "step": 1520000 }, { "epoch": 12.46, "learning_rate": 5.03361883312236e-06, "loss": 2.958, "step": 1520500 }, { "epoch": 12.46, "learning_rate": 5.03197488040244e-06, "loss": 2.9547, "step": 1521000 }, { "epoch": 12.47, "learning_rate": 5.0303309276825205e-06, "loss": 2.9544, "step": 1521500 }, { "epoch": 12.47, "learning_rate": 5.0286869749626e-06, "loss": 2.9553, "step": 1522000 }, { "epoch": 12.47, "learning_rate": 5.02704302224268e-06, "loss": 2.9533, "step": 1522500 }, { "epoch": 12.48, "learning_rate": 5.025399069522761e-06, "loss": 2.9578, "step": 1523000 }, { "epoch": 12.48, "learning_rate": 5.023755116802841e-06, "loss": 2.9628, "step": 1523500 }, { "epoch": 12.49, "learning_rate": 5.022111164082921e-06, "loss": 2.9515, "step": 1524000 }, { "epoch": 12.49, "learning_rate": 5.020467211363001e-06, "loss": 2.9511, "step": 1524500 }, { "epoch": 12.49, "learning_rate": 5.018823258643082e-06, "loss": 2.9623, "step": 1525000 }, { "epoch": 12.5, "learning_rate": 5.017179305923162e-06, "loss": 2.9648, "step": 1525500 }, { "epoch": 12.5, "learning_rate": 5.0155353532032425e-06, "loss": 2.9571, "step": 1526000 }, { "epoch": 12.51, "learning_rate": 5.013891400483323e-06, "loss": 2.9543, "step": 1526500 }, { "epoch": 12.51, "learning_rate": 5.012247447763403e-06, "loss": 2.9567, "step": 1527000 }, { "epoch": 12.51, "learning_rate": 5.010603495043484e-06, "loss": 2.9549, "step": 1527500 }, { "epoch": 12.52, "learning_rate": 5.008959542323563e-06, "loss": 2.9604, "step": 1528000 }, { "epoch": 12.52, "learning_rate": 5.007315589603643e-06, "loss": 2.9591, "step": 1528500 }, { "epoch": 12.53, "learning_rate": 5.005671636883723e-06, "loss": 2.9592, "step": 1529000 }, { "epoch": 12.53, "learning_rate": 5.004027684163803e-06, "loss": 2.9529, "step": 1529500 }, { "epoch": 12.54, "learning_rate": 5.0023837314438835e-06, "loss": 2.9546, "step": 1530000 }, { "epoch": 12.54, "eval_accuracy": 0.4951152598295225, "eval_loss": 2.8195910453796387, "eval_runtime": 406.5071, "eval_samples_per_second": 758.55, "eval_steps_per_second": 15.805, "step": 1530000 }, { "epoch": 12.54, "learning_rate": 5.0007397787239645e-06, "loss": 2.9556, "step": 1530500 }, { "epoch": 12.54, "learning_rate": 4.999095826004045e-06, "loss": 2.9566, "step": 1531000 }, { "epoch": 12.55, "learning_rate": 4.997451873284125e-06, "loss": 2.9579, "step": 1531500 }, { "epoch": 12.55, "learning_rate": 4.995807920564205e-06, "loss": 2.9552, "step": 1532000 }, { "epoch": 12.56, "learning_rate": 4.994163967844286e-06, "loss": 2.9573, "step": 1532500 }, { "epoch": 12.56, "learning_rate": 4.992520015124365e-06, "loss": 2.9501, "step": 1533000 }, { "epoch": 12.56, "learning_rate": 4.990876062404445e-06, "loss": 2.9542, "step": 1533500 }, { "epoch": 12.57, "learning_rate": 4.989232109684526e-06, "loss": 2.9543, "step": 1534000 }, { "epoch": 12.57, "learning_rate": 4.987588156964606e-06, "loss": 2.9581, "step": 1534500 }, { "epoch": 12.58, "learning_rate": 4.9859442042446865e-06, "loss": 2.9519, "step": 1535000 }, { "epoch": 12.58, "learning_rate": 4.984300251524767e-06, "loss": 2.9525, "step": 1535500 }, { "epoch": 12.58, "learning_rate": 4.982656298804847e-06, "loss": 2.9575, "step": 1536000 }, { "epoch": 12.59, "learning_rate": 4.981012346084927e-06, "loss": 2.951, "step": 1536500 }, { "epoch": 12.59, "learning_rate": 4.979368393365007e-06, "loss": 2.9554, "step": 1537000 }, { "epoch": 12.6, "learning_rate": 4.977724440645088e-06, "loss": 2.948, "step": 1537500 }, { "epoch": 12.6, "learning_rate": 4.976080487925168e-06, "loss": 2.9571, "step": 1538000 }, { "epoch": 12.6, "learning_rate": 4.974436535205248e-06, "loss": 2.9573, "step": 1538500 }, { "epoch": 12.61, "learning_rate": 4.972792582485328e-06, "loss": 2.9505, "step": 1539000 }, { "epoch": 12.61, "learning_rate": 4.9711486297654085e-06, "loss": 2.9538, "step": 1539500 }, { "epoch": 12.62, "learning_rate": 4.969504677045489e-06, "loss": 2.953, "step": 1540000 }, { "epoch": 12.62, "learning_rate": 4.967860724325569e-06, "loss": 2.9508, "step": 1540500 }, { "epoch": 12.63, "learning_rate": 4.966216771605649e-06, "loss": 2.9517, "step": 1541000 }, { "epoch": 12.63, "learning_rate": 4.96457281888573e-06, "loss": 2.9515, "step": 1541500 }, { "epoch": 12.63, "learning_rate": 4.962928866165809e-06, "loss": 2.9529, "step": 1542000 }, { "epoch": 12.64, "learning_rate": 4.961284913445889e-06, "loss": 2.9492, "step": 1542500 }, { "epoch": 12.64, "learning_rate": 4.95964096072597e-06, "loss": 2.9554, "step": 1543000 }, { "epoch": 12.65, "learning_rate": 4.95799700800605e-06, "loss": 2.9546, "step": 1543500 }, { "epoch": 12.65, "learning_rate": 4.9563530552861304e-06, "loss": 2.9517, "step": 1544000 }, { "epoch": 12.65, "learning_rate": 4.9547091025662106e-06, "loss": 2.9527, "step": 1544500 }, { "epoch": 12.66, "learning_rate": 4.953065149846291e-06, "loss": 2.9507, "step": 1545000 }, { "epoch": 12.66, "learning_rate": 4.951421197126371e-06, "loss": 2.9389, "step": 1545500 }, { "epoch": 12.67, "learning_rate": 4.949777244406451e-06, "loss": 2.9514, "step": 1546000 }, { "epoch": 12.67, "learning_rate": 4.948133291686532e-06, "loss": 2.9557, "step": 1546500 }, { "epoch": 12.67, "learning_rate": 4.946489338966612e-06, "loss": 2.9524, "step": 1547000 }, { "epoch": 12.68, "learning_rate": 4.944845386246692e-06, "loss": 2.9492, "step": 1547500 }, { "epoch": 12.68, "learning_rate": 4.943201433526772e-06, "loss": 2.9557, "step": 1548000 }, { "epoch": 12.69, "learning_rate": 4.941557480806852e-06, "loss": 2.9515, "step": 1548500 }, { "epoch": 12.69, "learning_rate": 4.9399135280869325e-06, "loss": 2.9559, "step": 1549000 }, { "epoch": 12.69, "learning_rate": 4.938269575367013e-06, "loss": 2.9511, "step": 1549500 }, { "epoch": 12.7, "learning_rate": 4.936625622647093e-06, "loss": 2.9542, "step": 1550000 }, { "epoch": 12.7, "learning_rate": 4.934981669927174e-06, "loss": 2.9592, "step": 1550500 }, { "epoch": 12.71, "learning_rate": 4.933337717207253e-06, "loss": 2.95, "step": 1551000 }, { "epoch": 12.71, "learning_rate": 4.931693764487334e-06, "loss": 2.9537, "step": 1551500 }, { "epoch": 12.72, "learning_rate": 4.930049811767414e-06, "loss": 2.9526, "step": 1552000 }, { "epoch": 12.72, "learning_rate": 4.928405859047494e-06, "loss": 2.9549, "step": 1552500 }, { "epoch": 12.72, "learning_rate": 4.926761906327574e-06, "loss": 2.9513, "step": 1553000 }, { "epoch": 12.73, "learning_rate": 4.9251179536076545e-06, "loss": 2.9561, "step": 1553500 }, { "epoch": 12.73, "learning_rate": 4.923474000887735e-06, "loss": 2.9589, "step": 1554000 }, { "epoch": 12.74, "learning_rate": 4.921830048167815e-06, "loss": 2.9527, "step": 1554500 }, { "epoch": 12.74, "learning_rate": 4.920186095447895e-06, "loss": 2.9527, "step": 1555000 }, { "epoch": 12.74, "learning_rate": 4.918542142727976e-06, "loss": 2.954, "step": 1555500 }, { "epoch": 12.75, "learning_rate": 4.916898190008056e-06, "loss": 2.943, "step": 1556000 }, { "epoch": 12.75, "learning_rate": 4.915254237288136e-06, "loss": 2.9428, "step": 1556500 }, { "epoch": 12.76, "learning_rate": 4.913610284568216e-06, "loss": 2.9548, "step": 1557000 }, { "epoch": 12.76, "learning_rate": 4.911966331848296e-06, "loss": 2.9565, "step": 1557500 }, { "epoch": 12.76, "learning_rate": 4.9103223791283765e-06, "loss": 2.9475, "step": 1558000 }, { "epoch": 12.77, "learning_rate": 4.908678426408457e-06, "loss": 2.9534, "step": 1558500 }, { "epoch": 12.77, "learning_rate": 4.907034473688538e-06, "loss": 2.9567, "step": 1559000 }, { "epoch": 12.78, "learning_rate": 4.905390520968618e-06, "loss": 2.9496, "step": 1559500 }, { "epoch": 12.78, "learning_rate": 4.903746568248697e-06, "loss": 2.9544, "step": 1560000 }, { "epoch": 12.78, "eval_accuracy": 0.4959430255891887, "eval_loss": 2.8146307468414307, "eval_runtime": 404.6908, "eval_samples_per_second": 761.955, "eval_steps_per_second": 15.876, "step": 1560000 }, { "epoch": 12.78, "learning_rate": 4.902102615528778e-06, "loss": 2.9512, "step": 1560500 }, { "epoch": 12.79, "learning_rate": 4.900458662808858e-06, "loss": 2.9506, "step": 1561000 }, { "epoch": 12.79, "learning_rate": 4.898814710088938e-06, "loss": 2.9507, "step": 1561500 }, { "epoch": 12.8, "learning_rate": 4.897170757369018e-06, "loss": 2.9485, "step": 1562000 }, { "epoch": 12.8, "learning_rate": 4.8955268046490985e-06, "loss": 2.9472, "step": 1562500 }, { "epoch": 12.81, "learning_rate": 4.893882851929179e-06, "loss": 2.9466, "step": 1563000 }, { "epoch": 12.81, "learning_rate": 4.892238899209259e-06, "loss": 2.9461, "step": 1563500 }, { "epoch": 12.81, "learning_rate": 4.890594946489339e-06, "loss": 2.9519, "step": 1564000 }, { "epoch": 12.82, "learning_rate": 4.88895099376942e-06, "loss": 2.9548, "step": 1564500 }, { "epoch": 12.82, "learning_rate": 4.8873070410495e-06, "loss": 2.9514, "step": 1565000 }, { "epoch": 12.83, "learning_rate": 4.88566308832958e-06, "loss": 2.9515, "step": 1565500 }, { "epoch": 12.83, "learning_rate": 4.88401913560966e-06, "loss": 2.9448, "step": 1566000 }, { "epoch": 12.83, "learning_rate": 4.88237518288974e-06, "loss": 2.9534, "step": 1566500 }, { "epoch": 12.84, "learning_rate": 4.8807312301698205e-06, "loss": 2.9453, "step": 1567000 }, { "epoch": 12.84, "learning_rate": 4.879087277449901e-06, "loss": 2.95, "step": 1567500 }, { "epoch": 12.85, "learning_rate": 4.877443324729982e-06, "loss": 2.9518, "step": 1568000 }, { "epoch": 12.85, "learning_rate": 4.875799372010062e-06, "loss": 2.9582, "step": 1568500 }, { "epoch": 12.85, "learning_rate": 4.874155419290141e-06, "loss": 2.9535, "step": 1569000 }, { "epoch": 12.86, "learning_rate": 4.872511466570222e-06, "loss": 2.9523, "step": 1569500 }, { "epoch": 12.86, "learning_rate": 4.870867513850302e-06, "loss": 2.9578, "step": 1570000 }, { "epoch": 12.87, "learning_rate": 4.869223561130382e-06, "loss": 2.95, "step": 1570500 }, { "epoch": 12.87, "learning_rate": 4.867579608410462e-06, "loss": 2.9472, "step": 1571000 }, { "epoch": 12.88, "learning_rate": 4.865935655690543e-06, "loss": 2.9505, "step": 1571500 }, { "epoch": 12.88, "learning_rate": 4.864291702970623e-06, "loss": 2.9519, "step": 1572000 }, { "epoch": 12.88, "learning_rate": 4.862647750250703e-06, "loss": 2.939, "step": 1572500 }, { "epoch": 12.89, "learning_rate": 4.861003797530784e-06, "loss": 2.9494, "step": 1573000 }, { "epoch": 12.89, "learning_rate": 4.859359844810864e-06, "loss": 2.9491, "step": 1573500 }, { "epoch": 12.9, "learning_rate": 4.857715892090944e-06, "loss": 2.9524, "step": 1574000 }, { "epoch": 12.9, "learning_rate": 4.856071939371024e-06, "loss": 2.9501, "step": 1574500 }, { "epoch": 12.9, "learning_rate": 4.854427986651104e-06, "loss": 2.9494, "step": 1575000 }, { "epoch": 12.91, "learning_rate": 4.852784033931184e-06, "loss": 2.949, "step": 1575500 }, { "epoch": 12.91, "learning_rate": 4.8511400812112645e-06, "loss": 2.9445, "step": 1576000 }, { "epoch": 12.92, "learning_rate": 4.849496128491345e-06, "loss": 2.9457, "step": 1576500 }, { "epoch": 12.92, "learning_rate": 4.8478521757714256e-06, "loss": 2.9505, "step": 1577000 }, { "epoch": 12.92, "learning_rate": 4.846208223051506e-06, "loss": 2.9521, "step": 1577500 }, { "epoch": 12.93, "learning_rate": 4.844564270331585e-06, "loss": 2.9494, "step": 1578000 }, { "epoch": 12.93, "learning_rate": 4.842920317611666e-06, "loss": 2.9524, "step": 1578500 }, { "epoch": 12.94, "learning_rate": 4.841276364891746e-06, "loss": 2.9477, "step": 1579000 }, { "epoch": 12.94, "learning_rate": 4.839632412171826e-06, "loss": 2.947, "step": 1579500 }, { "epoch": 12.94, "learning_rate": 4.837988459451906e-06, "loss": 2.9519, "step": 1580000 }, { "epoch": 12.95, "learning_rate": 4.836344506731987e-06, "loss": 2.95, "step": 1580500 }, { "epoch": 12.95, "learning_rate": 4.834700554012067e-06, "loss": 2.9561, "step": 1581000 }, { "epoch": 12.96, "learning_rate": 4.833056601292147e-06, "loss": 2.9502, "step": 1581500 }, { "epoch": 12.96, "learning_rate": 4.831412648572228e-06, "loss": 2.9511, "step": 1582000 }, { "epoch": 12.97, "learning_rate": 4.829768695852308e-06, "loss": 2.9545, "step": 1582500 }, { "epoch": 12.97, "learning_rate": 4.828124743132388e-06, "loss": 2.9486, "step": 1583000 }, { "epoch": 12.97, "learning_rate": 4.826480790412468e-06, "loss": 2.9494, "step": 1583500 }, { "epoch": 12.98, "learning_rate": 4.824836837692549e-06, "loss": 2.9427, "step": 1584000 }, { "epoch": 12.98, "learning_rate": 4.823192884972628e-06, "loss": 2.9539, "step": 1584500 }, { "epoch": 12.99, "learning_rate": 4.8215489322527084e-06, "loss": 2.9488, "step": 1585000 }, { "epoch": 12.99, "learning_rate": 4.819904979532789e-06, "loss": 2.9474, "step": 1585500 }, { "epoch": 12.99, "learning_rate": 4.8182610268128695e-06, "loss": 2.9436, "step": 1586000 }, { "epoch": 13.0, "learning_rate": 4.81661707409295e-06, "loss": 2.9426, "step": 1586500 }, { "epoch": 13.0, "learning_rate": 4.81497312137303e-06, "loss": 2.9475, "step": 1587000 }, { "epoch": 13.01, "learning_rate": 4.81332916865311e-06, "loss": 2.9566, "step": 1587500 }, { "epoch": 13.01, "learning_rate": 4.81168521593319e-06, "loss": 2.9528, "step": 1588000 }, { "epoch": 13.01, "learning_rate": 4.81004126321327e-06, "loss": 2.9459, "step": 1588500 }, { "epoch": 13.02, "learning_rate": 4.80839731049335e-06, "loss": 2.9478, "step": 1589000 }, { "epoch": 13.02, "learning_rate": 4.806753357773431e-06, "loss": 2.9457, "step": 1589500 }, { "epoch": 13.03, "learning_rate": 4.805109405053511e-06, "loss": 2.9486, "step": 1590000 }, { "epoch": 13.03, "eval_accuracy": 0.4963652221640762, "eval_loss": 2.8131563663482666, "eval_runtime": 407.4078, "eval_samples_per_second": 756.873, "eval_steps_per_second": 15.77, "step": 1590000 }, { "epoch": 13.03, "learning_rate": 4.803465452333591e-06, "loss": 2.9509, "step": 1590500 }, { "epoch": 13.03, "learning_rate": 4.801821499613672e-06, "loss": 2.9485, "step": 1591000 }, { "epoch": 13.04, "learning_rate": 4.800177546893752e-06, "loss": 2.9573, "step": 1591500 }, { "epoch": 13.04, "learning_rate": 4.798533594173832e-06, "loss": 2.9446, "step": 1592000 }, { "epoch": 13.05, "learning_rate": 4.796889641453912e-06, "loss": 2.9451, "step": 1592500 }, { "epoch": 13.05, "learning_rate": 4.795245688733993e-06, "loss": 2.9395, "step": 1593000 }, { "epoch": 13.06, "learning_rate": 4.793601736014072e-06, "loss": 2.9443, "step": 1593500 }, { "epoch": 13.06, "learning_rate": 4.791957783294152e-06, "loss": 2.9481, "step": 1594000 }, { "epoch": 13.06, "learning_rate": 4.790313830574233e-06, "loss": 2.9436, "step": 1594500 }, { "epoch": 13.07, "learning_rate": 4.7886698778543135e-06, "loss": 2.95, "step": 1595000 }, { "epoch": 13.07, "learning_rate": 4.787025925134394e-06, "loss": 2.9522, "step": 1595500 }, { "epoch": 13.08, "learning_rate": 4.785381972414474e-06, "loss": 2.9437, "step": 1596000 }, { "epoch": 13.08, "learning_rate": 4.783738019694554e-06, "loss": 2.9467, "step": 1596500 }, { "epoch": 13.08, "learning_rate": 4.782094066974634e-06, "loss": 2.9474, "step": 1597000 }, { "epoch": 13.09, "learning_rate": 4.780450114254714e-06, "loss": 2.945, "step": 1597500 }, { "epoch": 13.09, "learning_rate": 4.778806161534795e-06, "loss": 2.9562, "step": 1598000 }, { "epoch": 13.1, "learning_rate": 4.777162208814875e-06, "loss": 2.9497, "step": 1598500 }, { "epoch": 13.1, "learning_rate": 4.775518256094955e-06, "loss": 2.9503, "step": 1599000 }, { "epoch": 13.1, "learning_rate": 4.7738743033750355e-06, "loss": 2.9489, "step": 1599500 }, { "epoch": 13.11, "learning_rate": 4.772230350655116e-06, "loss": 2.9467, "step": 1600000 }, { "epoch": 13.11, "learning_rate": 4.770586397935196e-06, "loss": 2.9436, "step": 1600500 }, { "epoch": 13.12, "learning_rate": 4.768942445215276e-06, "loss": 2.9546, "step": 1601000 }, { "epoch": 13.12, "learning_rate": 4.767298492495356e-06, "loss": 2.9411, "step": 1601500 }, { "epoch": 13.12, "learning_rate": 4.765654539775437e-06, "loss": 2.9413, "step": 1602000 }, { "epoch": 13.13, "learning_rate": 4.764010587055516e-06, "loss": 2.9436, "step": 1602500 }, { "epoch": 13.13, "learning_rate": 4.762366634335596e-06, "loss": 2.9467, "step": 1603000 }, { "epoch": 13.14, "learning_rate": 4.760722681615677e-06, "loss": 2.9432, "step": 1603500 }, { "epoch": 13.14, "learning_rate": 4.7590787288957575e-06, "loss": 2.9429, "step": 1604000 }, { "epoch": 13.15, "learning_rate": 4.757434776175838e-06, "loss": 2.9474, "step": 1604500 }, { "epoch": 13.15, "learning_rate": 4.755790823455918e-06, "loss": 2.9524, "step": 1605000 }, { "epoch": 13.15, "learning_rate": 4.754146870735998e-06, "loss": 2.9471, "step": 1605500 }, { "epoch": 13.16, "learning_rate": 4.752502918016078e-06, "loss": 2.9486, "step": 1606000 }, { "epoch": 13.16, "learning_rate": 4.750858965296158e-06, "loss": 2.9405, "step": 1606500 }, { "epoch": 13.17, "learning_rate": 4.749215012576239e-06, "loss": 2.9477, "step": 1607000 }, { "epoch": 13.17, "learning_rate": 4.747571059856319e-06, "loss": 2.9465, "step": 1607500 }, { "epoch": 13.17, "learning_rate": 4.745927107136399e-06, "loss": 2.9527, "step": 1608000 }, { "epoch": 13.18, "learning_rate": 4.7442831544164795e-06, "loss": 2.947, "step": 1608500 }, { "epoch": 13.18, "learning_rate": 4.74263920169656e-06, "loss": 2.9366, "step": 1609000 }, { "epoch": 13.19, "learning_rate": 4.74099524897664e-06, "loss": 2.9444, "step": 1609500 }, { "epoch": 13.19, "learning_rate": 4.73935129625672e-06, "loss": 2.9487, "step": 1610000 }, { "epoch": 13.19, "learning_rate": 4.737707343536801e-06, "loss": 2.9506, "step": 1610500 }, { "epoch": 13.2, "learning_rate": 4.736063390816881e-06, "loss": 2.9445, "step": 1611000 }, { "epoch": 13.2, "learning_rate": 4.73441943809696e-06, "loss": 2.9486, "step": 1611500 }, { "epoch": 13.21, "learning_rate": 4.732775485377041e-06, "loss": 2.9435, "step": 1612000 }, { "epoch": 13.21, "learning_rate": 4.731131532657121e-06, "loss": 2.9416, "step": 1612500 }, { "epoch": 13.22, "learning_rate": 4.7294875799372014e-06, "loss": 2.9449, "step": 1613000 }, { "epoch": 13.22, "learning_rate": 4.7278436272172816e-06, "loss": 2.95, "step": 1613500 }, { "epoch": 13.22, "learning_rate": 4.726199674497362e-06, "loss": 2.9506, "step": 1614000 }, { "epoch": 13.23, "learning_rate": 4.724555721777442e-06, "loss": 2.9452, "step": 1614500 }, { "epoch": 13.23, "learning_rate": 4.722911769057522e-06, "loss": 2.9432, "step": 1615000 }, { "epoch": 13.24, "learning_rate": 4.721267816337602e-06, "loss": 2.9425, "step": 1615500 }, { "epoch": 13.24, "learning_rate": 4.719623863617683e-06, "loss": 2.9451, "step": 1616000 }, { "epoch": 13.24, "learning_rate": 4.717979910897763e-06, "loss": 2.9468, "step": 1616500 }, { "epoch": 13.25, "learning_rate": 4.716335958177843e-06, "loss": 2.9504, "step": 1617000 }, { "epoch": 13.25, "learning_rate": 4.7146920054579234e-06, "loss": 2.9464, "step": 1617500 }, { "epoch": 13.26, "learning_rate": 4.7130480527380036e-06, "loss": 2.9482, "step": 1618000 }, { "epoch": 13.26, "learning_rate": 4.711404100018084e-06, "loss": 2.9461, "step": 1618500 }, { "epoch": 13.26, "learning_rate": 4.709760147298164e-06, "loss": 2.9471, "step": 1619000 }, { "epoch": 13.27, "learning_rate": 4.708116194578245e-06, "loss": 2.9439, "step": 1619500 }, { "epoch": 13.27, "learning_rate": 4.706472241858325e-06, "loss": 2.9413, "step": 1620000 }, { "epoch": 13.27, "eval_accuracy": 0.49669974826700375, "eval_loss": 2.809884786605835, "eval_runtime": 410.3222, "eval_samples_per_second": 751.497, "eval_steps_per_second": 15.658, "step": 1620000 }, { "epoch": 13.28, "learning_rate": 4.704828289138404e-06, "loss": 2.9414, "step": 1620500 }, { "epoch": 13.28, "learning_rate": 4.703184336418485e-06, "loss": 2.9411, "step": 1621000 }, { "epoch": 13.28, "learning_rate": 4.701540383698565e-06, "loss": 2.9508, "step": 1621500 }, { "epoch": 13.29, "learning_rate": 4.699896430978645e-06, "loss": 2.9504, "step": 1622000 }, { "epoch": 13.29, "learning_rate": 4.6982524782587255e-06, "loss": 2.9523, "step": 1622500 }, { "epoch": 13.3, "learning_rate": 4.696608525538806e-06, "loss": 2.9378, "step": 1623000 }, { "epoch": 13.3, "learning_rate": 4.694964572818886e-06, "loss": 2.9436, "step": 1623500 }, { "epoch": 13.31, "learning_rate": 4.693320620098966e-06, "loss": 2.9432, "step": 1624000 }, { "epoch": 13.31, "learning_rate": 4.691676667379047e-06, "loss": 2.9492, "step": 1624500 }, { "epoch": 13.31, "learning_rate": 4.690032714659127e-06, "loss": 2.9463, "step": 1625000 }, { "epoch": 13.32, "learning_rate": 4.688388761939207e-06, "loss": 2.9491, "step": 1625500 }, { "epoch": 13.32, "learning_rate": 4.686744809219287e-06, "loss": 2.9386, "step": 1626000 }, { "epoch": 13.33, "learning_rate": 4.685100856499367e-06, "loss": 2.9441, "step": 1626500 }, { "epoch": 13.33, "learning_rate": 4.6834569037794475e-06, "loss": 2.9448, "step": 1627000 }, { "epoch": 13.33, "learning_rate": 4.681812951059528e-06, "loss": 2.9372, "step": 1627500 }, { "epoch": 13.34, "learning_rate": 4.680168998339608e-06, "loss": 2.9463, "step": 1628000 }, { "epoch": 13.34, "learning_rate": 4.678525045619689e-06, "loss": 2.9438, "step": 1628500 }, { "epoch": 13.35, "learning_rate": 4.676881092899769e-06, "loss": 2.9344, "step": 1629000 }, { "epoch": 13.35, "learning_rate": 4.675237140179849e-06, "loss": 2.9421, "step": 1629500 }, { "epoch": 13.35, "learning_rate": 4.673593187459929e-06, "loss": 2.9386, "step": 1630000 }, { "epoch": 13.36, "learning_rate": 4.671949234740009e-06, "loss": 2.9458, "step": 1630500 }, { "epoch": 13.36, "learning_rate": 4.670305282020089e-06, "loss": 2.9415, "step": 1631000 }, { "epoch": 13.37, "learning_rate": 4.6686613293001695e-06, "loss": 2.9457, "step": 1631500 }, { "epoch": 13.37, "learning_rate": 4.6670173765802505e-06, "loss": 2.9435, "step": 1632000 }, { "epoch": 13.37, "learning_rate": 4.665373423860331e-06, "loss": 2.9474, "step": 1632500 }, { "epoch": 13.38, "learning_rate": 4.66372947114041e-06, "loss": 2.9461, "step": 1633000 }, { "epoch": 13.38, "learning_rate": 4.662085518420491e-06, "loss": 2.9435, "step": 1633500 }, { "epoch": 13.39, "learning_rate": 4.660441565700571e-06, "loss": 2.9385, "step": 1634000 }, { "epoch": 13.39, "learning_rate": 4.658797612980651e-06, "loss": 2.9458, "step": 1634500 }, { "epoch": 13.4, "learning_rate": 4.657153660260731e-06, "loss": 2.9402, "step": 1635000 }, { "epoch": 13.4, "learning_rate": 4.655509707540811e-06, "loss": 2.9385, "step": 1635500 }, { "epoch": 13.4, "learning_rate": 4.6538657548208915e-06, "loss": 2.9452, "step": 1636000 }, { "epoch": 13.41, "learning_rate": 4.652221802100972e-06, "loss": 2.9495, "step": 1636500 }, { "epoch": 13.41, "learning_rate": 4.650577849381052e-06, "loss": 2.9466, "step": 1637000 }, { "epoch": 13.42, "learning_rate": 4.648933896661133e-06, "loss": 2.9438, "step": 1637500 }, { "epoch": 13.42, "learning_rate": 4.647289943941213e-06, "loss": 2.9452, "step": 1638000 }, { "epoch": 13.42, "learning_rate": 4.645645991221293e-06, "loss": 2.9386, "step": 1638500 }, { "epoch": 13.43, "learning_rate": 4.644002038501373e-06, "loss": 2.9393, "step": 1639000 }, { "epoch": 13.43, "learning_rate": 4.642358085781453e-06, "loss": 2.9346, "step": 1639500 }, { "epoch": 13.44, "learning_rate": 4.640714133061533e-06, "loss": 2.9463, "step": 1640000 }, { "epoch": 13.44, "learning_rate": 4.6390701803416135e-06, "loss": 2.9433, "step": 1640500 }, { "epoch": 13.44, "learning_rate": 4.6374262276216945e-06, "loss": 2.9408, "step": 1641000 }, { "epoch": 13.45, "learning_rate": 4.635782274901775e-06, "loss": 2.9425, "step": 1641500 }, { "epoch": 13.45, "learning_rate": 4.634138322181854e-06, "loss": 2.9493, "step": 1642000 }, { "epoch": 13.46, "learning_rate": 4.632494369461935e-06, "loss": 2.9481, "step": 1642500 }, { "epoch": 13.46, "learning_rate": 4.630850416742015e-06, "loss": 2.9438, "step": 1643000 }, { "epoch": 13.46, "learning_rate": 4.629206464022095e-06, "loss": 2.9436, "step": 1643500 }, { "epoch": 13.47, "learning_rate": 4.627562511302175e-06, "loss": 2.9404, "step": 1644000 }, { "epoch": 13.47, "learning_rate": 4.625918558582256e-06, "loss": 2.9467, "step": 1644500 }, { "epoch": 13.48, "learning_rate": 4.6242746058623355e-06, "loss": 2.9504, "step": 1645000 }, { "epoch": 13.48, "learning_rate": 4.622630653142416e-06, "loss": 2.947, "step": 1645500 }, { "epoch": 13.49, "learning_rate": 4.6209867004224966e-06, "loss": 2.9411, "step": 1646000 }, { "epoch": 13.49, "learning_rate": 4.619342747702577e-06, "loss": 2.9402, "step": 1646500 }, { "epoch": 13.49, "learning_rate": 4.617698794982657e-06, "loss": 2.9397, "step": 1647000 }, { "epoch": 13.5, "learning_rate": 4.616054842262737e-06, "loss": 2.9406, "step": 1647500 }, { "epoch": 13.5, "learning_rate": 4.614410889542817e-06, "loss": 2.9432, "step": 1648000 }, { "epoch": 13.51, "learning_rate": 4.612766936822897e-06, "loss": 2.9428, "step": 1648500 }, { "epoch": 13.51, "learning_rate": 4.611122984102977e-06, "loss": 2.9471, "step": 1649000 }, { "epoch": 13.51, "learning_rate": 4.6094790313830575e-06, "loss": 2.944, "step": 1649500 }, { "epoch": 13.52, "learning_rate": 4.6078350786631384e-06, "loss": 2.9381, "step": 1650000 }, { "epoch": 13.52, "eval_accuracy": 0.49680267143539775, "eval_loss": 2.8081107139587402, "eval_runtime": 403.5601, "eval_samples_per_second": 764.089, "eval_steps_per_second": 15.921, "step": 1650000 }, { "epoch": 13.52, "learning_rate": 4.6061911259432186e-06, "loss": 2.9407, "step": 1650500 }, { "epoch": 13.53, "learning_rate": 4.604547173223298e-06, "loss": 2.9429, "step": 1651000 }, { "epoch": 13.53, "learning_rate": 4.602903220503379e-06, "loss": 2.9429, "step": 1651500 }, { "epoch": 13.53, "learning_rate": 4.601259267783459e-06, "loss": 2.9433, "step": 1652000 }, { "epoch": 13.54, "learning_rate": 4.599615315063539e-06, "loss": 2.9313, "step": 1652500 }, { "epoch": 13.54, "learning_rate": 4.597971362343619e-06, "loss": 2.942, "step": 1653000 }, { "epoch": 13.55, "learning_rate": 4.5963274096237e-06, "loss": 2.9498, "step": 1653500 }, { "epoch": 13.55, "learning_rate": 4.5946834569037794e-06, "loss": 2.9326, "step": 1654000 }, { "epoch": 13.56, "learning_rate": 4.5930395041838596e-06, "loss": 2.9415, "step": 1654500 }, { "epoch": 13.56, "learning_rate": 4.5913955514639405e-06, "loss": 2.9337, "step": 1655000 }, { "epoch": 13.56, "learning_rate": 4.589751598744021e-06, "loss": 2.9363, "step": 1655500 }, { "epoch": 13.57, "learning_rate": 4.588107646024101e-06, "loss": 2.943, "step": 1656000 }, { "epoch": 13.57, "learning_rate": 4.586463693304181e-06, "loss": 2.9443, "step": 1656500 }, { "epoch": 13.58, "learning_rate": 4.584819740584261e-06, "loss": 2.9424, "step": 1657000 }, { "epoch": 13.58, "learning_rate": 4.583175787864341e-06, "loss": 2.9449, "step": 1657500 }, { "epoch": 13.58, "learning_rate": 4.581531835144421e-06, "loss": 2.9351, "step": 1658000 }, { "epoch": 13.59, "learning_rate": 4.579887882424502e-06, "loss": 2.9457, "step": 1658500 }, { "epoch": 13.59, "learning_rate": 4.578243929704582e-06, "loss": 2.9411, "step": 1659000 }, { "epoch": 13.6, "learning_rate": 4.5765999769846625e-06, "loss": 2.9469, "step": 1659500 }, { "epoch": 13.6, "learning_rate": 4.574956024264743e-06, "loss": 2.9427, "step": 1660000 }, { "epoch": 13.6, "learning_rate": 4.573312071544823e-06, "loss": 2.9381, "step": 1660500 }, { "epoch": 13.61, "learning_rate": 4.571668118824903e-06, "loss": 2.9367, "step": 1661000 }, { "epoch": 13.61, "learning_rate": 4.570024166104983e-06, "loss": 2.9354, "step": 1661500 }, { "epoch": 13.62, "learning_rate": 4.568380213385063e-06, "loss": 2.9368, "step": 1662000 }, { "epoch": 13.62, "learning_rate": 4.566736260665144e-06, "loss": 2.948, "step": 1662500 }, { "epoch": 13.62, "learning_rate": 4.565092307945223e-06, "loss": 2.9395, "step": 1663000 }, { "epoch": 13.63, "learning_rate": 4.5634483552253035e-06, "loss": 2.942, "step": 1663500 }, { "epoch": 13.63, "learning_rate": 4.5618044025053845e-06, "loss": 2.933, "step": 1664000 }, { "epoch": 13.64, "learning_rate": 4.560160449785465e-06, "loss": 2.9397, "step": 1664500 }, { "epoch": 13.64, "learning_rate": 4.558516497065545e-06, "loss": 2.9435, "step": 1665000 }, { "epoch": 13.65, "learning_rate": 4.556872544345625e-06, "loss": 2.9398, "step": 1665500 }, { "epoch": 13.65, "learning_rate": 4.555228591625705e-06, "loss": 2.9433, "step": 1666000 }, { "epoch": 13.65, "learning_rate": 4.553584638905785e-06, "loss": 2.9367, "step": 1666500 }, { "epoch": 13.66, "learning_rate": 4.551940686185865e-06, "loss": 2.9471, "step": 1667000 }, { "epoch": 13.66, "learning_rate": 4.550296733465946e-06, "loss": 2.9446, "step": 1667500 }, { "epoch": 13.67, "learning_rate": 4.548652780746026e-06, "loss": 2.9373, "step": 1668000 }, { "epoch": 13.67, "learning_rate": 4.5470088280261065e-06, "loss": 2.9354, "step": 1668500 }, { "epoch": 13.67, "learning_rate": 4.545364875306187e-06, "loss": 2.9431, "step": 1669000 }, { "epoch": 13.68, "learning_rate": 4.543720922586267e-06, "loss": 2.9444, "step": 1669500 }, { "epoch": 13.68, "learning_rate": 4.542076969866347e-06, "loss": 2.9413, "step": 1670000 }, { "epoch": 13.69, "learning_rate": 4.540433017146427e-06, "loss": 2.9407, "step": 1670500 }, { "epoch": 13.69, "learning_rate": 4.538789064426508e-06, "loss": 2.9372, "step": 1671000 }, { "epoch": 13.69, "learning_rate": 4.537145111706588e-06, "loss": 2.9418, "step": 1671500 }, { "epoch": 13.7, "learning_rate": 4.535501158986667e-06, "loss": 2.9401, "step": 1672000 }, { "epoch": 13.7, "learning_rate": 4.533857206266748e-06, "loss": 2.9391, "step": 1672500 }, { "epoch": 13.71, "learning_rate": 4.5322132535468285e-06, "loss": 2.9417, "step": 1673000 }, { "epoch": 13.71, "learning_rate": 4.530569300826909e-06, "loss": 2.9413, "step": 1673500 }, { "epoch": 13.71, "learning_rate": 4.528925348106989e-06, "loss": 2.9382, "step": 1674000 }, { "epoch": 13.72, "learning_rate": 4.527281395387069e-06, "loss": 2.942, "step": 1674500 }, { "epoch": 13.72, "learning_rate": 4.525637442667149e-06, "loss": 2.9443, "step": 1675000 }, { "epoch": 13.73, "learning_rate": 4.523993489947229e-06, "loss": 2.9417, "step": 1675500 }, { "epoch": 13.73, "learning_rate": 4.522349537227309e-06, "loss": 2.9398, "step": 1676000 }, { "epoch": 13.74, "learning_rate": 4.52070558450739e-06, "loss": 2.943, "step": 1676500 }, { "epoch": 13.74, "learning_rate": 4.51906163178747e-06, "loss": 2.9367, "step": 1677000 }, { "epoch": 13.74, "learning_rate": 4.5174176790675505e-06, "loss": 2.9447, "step": 1677500 }, { "epoch": 13.75, "learning_rate": 4.515773726347631e-06, "loss": 2.9327, "step": 1678000 }, { "epoch": 13.75, "learning_rate": 4.514129773627711e-06, "loss": 2.9379, "step": 1678500 }, { "epoch": 13.76, "learning_rate": 4.512485820907791e-06, "loss": 2.9412, "step": 1679000 }, { "epoch": 13.76, "learning_rate": 4.510841868187871e-06, "loss": 2.9383, "step": 1679500 }, { "epoch": 13.76, "learning_rate": 4.509197915467952e-06, "loss": 2.9389, "step": 1680000 }, { "epoch": 13.76, "eval_accuracy": 0.4972946717627968, "eval_loss": 2.80572772026062, "eval_runtime": 409.9534, "eval_samples_per_second": 752.173, "eval_steps_per_second": 15.673, "step": 1680000 }, { "epoch": 13.77, "learning_rate": 4.507553962748032e-06, "loss": 2.9444, "step": 1680500 }, { "epoch": 13.77, "learning_rate": 4.505910010028112e-06, "loss": 2.9413, "step": 1681000 }, { "epoch": 13.78, "learning_rate": 4.504266057308192e-06, "loss": 2.949, "step": 1681500 }, { "epoch": 13.78, "learning_rate": 4.5026221045882725e-06, "loss": 2.9386, "step": 1682000 }, { "epoch": 13.78, "learning_rate": 4.500978151868353e-06, "loss": 2.9448, "step": 1682500 }, { "epoch": 13.79, "learning_rate": 4.499334199148433e-06, "loss": 2.934, "step": 1683000 }, { "epoch": 13.79, "learning_rate": 4.497690246428514e-06, "loss": 2.9403, "step": 1683500 }, { "epoch": 13.8, "learning_rate": 4.496046293708594e-06, "loss": 2.9464, "step": 1684000 }, { "epoch": 13.8, "learning_rate": 4.494402340988673e-06, "loss": 2.9362, "step": 1684500 }, { "epoch": 13.8, "learning_rate": 4.492758388268754e-06, "loss": 2.9427, "step": 1685000 }, { "epoch": 13.81, "learning_rate": 4.491114435548834e-06, "loss": 2.939, "step": 1685500 }, { "epoch": 13.81, "learning_rate": 4.489470482828914e-06, "loss": 2.9373, "step": 1686000 }, { "epoch": 13.82, "learning_rate": 4.4878265301089944e-06, "loss": 2.941, "step": 1686500 }, { "epoch": 13.82, "learning_rate": 4.4861825773890746e-06, "loss": 2.9358, "step": 1687000 }, { "epoch": 13.83, "learning_rate": 4.484538624669155e-06, "loss": 2.9371, "step": 1687500 }, { "epoch": 13.83, "learning_rate": 4.482894671949235e-06, "loss": 2.9382, "step": 1688000 }, { "epoch": 13.83, "learning_rate": 4.481250719229315e-06, "loss": 2.931, "step": 1688500 }, { "epoch": 13.84, "learning_rate": 4.479606766509396e-06, "loss": 2.9357, "step": 1689000 }, { "epoch": 13.84, "learning_rate": 4.477962813789476e-06, "loss": 2.9389, "step": 1689500 }, { "epoch": 13.85, "learning_rate": 4.476318861069556e-06, "loss": 2.9362, "step": 1690000 }, { "epoch": 13.85, "learning_rate": 4.474674908349636e-06, "loss": 2.9395, "step": 1690500 }, { "epoch": 13.85, "learning_rate": 4.473030955629716e-06, "loss": 2.9368, "step": 1691000 }, { "epoch": 13.86, "learning_rate": 4.4713870029097965e-06, "loss": 2.9447, "step": 1691500 }, { "epoch": 13.86, "learning_rate": 4.469743050189877e-06, "loss": 2.9401, "step": 1692000 }, { "epoch": 13.87, "learning_rate": 4.468099097469958e-06, "loss": 2.9357, "step": 1692500 }, { "epoch": 13.87, "learning_rate": 4.466455144750038e-06, "loss": 2.9433, "step": 1693000 }, { "epoch": 13.87, "learning_rate": 4.464811192030117e-06, "loss": 2.9427, "step": 1693500 }, { "epoch": 13.88, "learning_rate": 4.463167239310198e-06, "loss": 2.9336, "step": 1694000 }, { "epoch": 13.88, "learning_rate": 4.461523286590278e-06, "loss": 2.9388, "step": 1694500 }, { "epoch": 13.89, "learning_rate": 4.459879333870358e-06, "loss": 2.9386, "step": 1695000 }, { "epoch": 13.89, "learning_rate": 4.458235381150438e-06, "loss": 2.9384, "step": 1695500 }, { "epoch": 13.9, "learning_rate": 4.4565914284305185e-06, "loss": 2.9361, "step": 1696000 }, { "epoch": 13.9, "learning_rate": 4.454947475710599e-06, "loss": 2.9363, "step": 1696500 }, { "epoch": 13.9, "learning_rate": 4.453303522990679e-06, "loss": 2.9361, "step": 1697000 }, { "epoch": 13.91, "learning_rate": 4.45165957027076e-06, "loss": 2.9355, "step": 1697500 }, { "epoch": 13.91, "learning_rate": 4.45001561755084e-06, "loss": 2.9356, "step": 1698000 }, { "epoch": 13.92, "learning_rate": 4.44837166483092e-06, "loss": 2.9391, "step": 1698500 }, { "epoch": 13.92, "learning_rate": 4.446727712111e-06, "loss": 2.9384, "step": 1699000 }, { "epoch": 13.92, "learning_rate": 4.44508375939108e-06, "loss": 2.9367, "step": 1699500 }, { "epoch": 13.93, "learning_rate": 4.44343980667116e-06, "loss": 2.9446, "step": 1700000 }, { "epoch": 13.93, "learning_rate": 4.4417958539512405e-06, "loss": 2.9439, "step": 1700500 }, { "epoch": 13.94, "learning_rate": 4.440151901231321e-06, "loss": 2.9381, "step": 1701000 }, { "epoch": 13.94, "learning_rate": 4.438507948511402e-06, "loss": 2.9345, "step": 1701500 }, { "epoch": 13.94, "learning_rate": 4.436863995791482e-06, "loss": 2.9391, "step": 1702000 }, { "epoch": 13.95, "learning_rate": 4.435220043071561e-06, "loss": 2.9316, "step": 1702500 }, { "epoch": 13.95, "learning_rate": 4.433576090351642e-06, "loss": 2.9351, "step": 1703000 }, { "epoch": 13.96, "learning_rate": 4.431932137631722e-06, "loss": 2.932, "step": 1703500 }, { "epoch": 13.96, "learning_rate": 4.430288184911802e-06, "loss": 2.938, "step": 1704000 }, { "epoch": 13.96, "learning_rate": 4.428644232191882e-06, "loss": 2.9325, "step": 1704500 }, { "epoch": 13.97, "learning_rate": 4.427000279471963e-06, "loss": 2.9388, "step": 1705000 }, { "epoch": 13.97, "learning_rate": 4.425356326752043e-06, "loss": 2.9338, "step": 1705500 }, { "epoch": 13.98, "learning_rate": 4.423712374032123e-06, "loss": 2.9372, "step": 1706000 }, { "epoch": 13.98, "learning_rate": 4.422068421312204e-06, "loss": 2.9421, "step": 1706500 }, { "epoch": 13.99, "learning_rate": 4.420424468592284e-06, "loss": 2.9381, "step": 1707000 }, { "epoch": 13.99, "learning_rate": 4.418780515872364e-06, "loss": 2.9352, "step": 1707500 }, { "epoch": 13.99, "learning_rate": 4.417136563152444e-06, "loss": 2.9376, "step": 1708000 }, { "epoch": 14.0, "learning_rate": 4.415492610432524e-06, "loss": 2.9373, "step": 1708500 }, { "epoch": 14.0, "learning_rate": 4.413848657712604e-06, "loss": 2.9386, "step": 1709000 }, { "epoch": 14.01, "learning_rate": 4.4122047049926845e-06, "loss": 2.9335, "step": 1709500 }, { "epoch": 14.01, "learning_rate": 4.410560752272765e-06, "loss": 2.9374, "step": 1710000 }, { "epoch": 14.01, "eval_accuracy": 0.4976944853269889, "eval_loss": 2.8028008937835693, "eval_runtime": 411.9878, "eval_samples_per_second": 748.459, "eval_steps_per_second": 15.595, "step": 1710000 }, { "epoch": 14.01, "learning_rate": 4.408916799552846e-06, "loss": 2.9313, "step": 1710500 }, { "epoch": 14.02, "learning_rate": 4.407272846832926e-06, "loss": 2.9383, "step": 1711000 }, { "epoch": 14.02, "learning_rate": 4.405628894113006e-06, "loss": 2.939, "step": 1711500 }, { "epoch": 14.03, "learning_rate": 4.403984941393086e-06, "loss": 2.9374, "step": 1712000 }, { "epoch": 14.03, "learning_rate": 4.402340988673166e-06, "loss": 2.9351, "step": 1712500 }, { "epoch": 14.03, "learning_rate": 4.400697035953246e-06, "loss": 2.9335, "step": 1713000 }, { "epoch": 14.04, "learning_rate": 4.399053083233326e-06, "loss": 2.9393, "step": 1713500 }, { "epoch": 14.04, "learning_rate": 4.397409130513407e-06, "loss": 2.9374, "step": 1714000 }, { "epoch": 14.05, "learning_rate": 4.395765177793487e-06, "loss": 2.9355, "step": 1714500 }, { "epoch": 14.05, "learning_rate": 4.394121225073567e-06, "loss": 2.9253, "step": 1715000 }, { "epoch": 14.05, "learning_rate": 4.392477272353648e-06, "loss": 2.9334, "step": 1715500 }, { "epoch": 14.06, "learning_rate": 4.390833319633728e-06, "loss": 2.9287, "step": 1716000 }, { "epoch": 14.06, "learning_rate": 4.389189366913808e-06, "loss": 2.9363, "step": 1716500 }, { "epoch": 14.07, "learning_rate": 4.387545414193888e-06, "loss": 2.9334, "step": 1717000 }, { "epoch": 14.07, "learning_rate": 4.385901461473968e-06, "loss": 2.9396, "step": 1717500 }, { "epoch": 14.08, "learning_rate": 4.384257508754048e-06, "loss": 2.9314, "step": 1718000 }, { "epoch": 14.08, "learning_rate": 4.3826135560341285e-06, "loss": 2.9284, "step": 1718500 }, { "epoch": 14.08, "learning_rate": 4.3809696033142094e-06, "loss": 2.9397, "step": 1719000 }, { "epoch": 14.09, "learning_rate": 4.3793256505942896e-06, "loss": 2.9268, "step": 1719500 }, { "epoch": 14.09, "learning_rate": 4.37768169787437e-06, "loss": 2.9344, "step": 1720000 }, { "epoch": 14.1, "learning_rate": 4.37603774515445e-06, "loss": 2.9336, "step": 1720500 }, { "epoch": 14.1, "learning_rate": 4.37439379243453e-06, "loss": 2.9399, "step": 1721000 }, { "epoch": 14.1, "learning_rate": 4.37274983971461e-06, "loss": 2.9335, "step": 1721500 }, { "epoch": 14.11, "learning_rate": 4.37110588699469e-06, "loss": 2.9299, "step": 1722000 }, { "epoch": 14.11, "learning_rate": 4.36946193427477e-06, "loss": 2.9353, "step": 1722500 }, { "epoch": 14.12, "learning_rate": 4.367817981554851e-06, "loss": 2.9282, "step": 1723000 }, { "epoch": 14.12, "learning_rate": 4.3661740288349306e-06, "loss": 2.9343, "step": 1723500 }, { "epoch": 14.12, "learning_rate": 4.364530076115011e-06, "loss": 2.9299, "step": 1724000 }, { "epoch": 14.13, "learning_rate": 4.362886123395092e-06, "loss": 2.9317, "step": 1724500 }, { "epoch": 14.13, "learning_rate": 4.361242170675172e-06, "loss": 2.9287, "step": 1725000 }, { "epoch": 14.14, "learning_rate": 4.359598217955252e-06, "loss": 2.9372, "step": 1725500 }, { "epoch": 14.14, "learning_rate": 4.357954265235332e-06, "loss": 2.9415, "step": 1726000 }, { "epoch": 14.14, "learning_rate": 4.356310312515412e-06, "loss": 2.9302, "step": 1726500 }, { "epoch": 14.15, "learning_rate": 4.354666359795492e-06, "loss": 2.9365, "step": 1727000 }, { "epoch": 14.15, "learning_rate": 4.3530224070755724e-06, "loss": 2.9399, "step": 1727500 }, { "epoch": 14.16, "learning_rate": 4.351378454355653e-06, "loss": 2.934, "step": 1728000 }, { "epoch": 14.16, "learning_rate": 4.3497345016357335e-06, "loss": 2.9384, "step": 1728500 }, { "epoch": 14.17, "learning_rate": 4.348090548915814e-06, "loss": 2.9384, "step": 1729000 }, { "epoch": 14.17, "learning_rate": 4.346446596195894e-06, "loss": 2.9356, "step": 1729500 }, { "epoch": 14.17, "learning_rate": 4.344802643475974e-06, "loss": 2.9365, "step": 1730000 }, { "epoch": 14.18, "learning_rate": 4.343158690756054e-06, "loss": 2.941, "step": 1730500 }, { "epoch": 14.18, "learning_rate": 4.341514738036134e-06, "loss": 2.9355, "step": 1731000 }, { "epoch": 14.19, "learning_rate": 4.339870785316215e-06, "loss": 2.9348, "step": 1731500 }, { "epoch": 14.19, "learning_rate": 4.338226832596295e-06, "loss": 2.9331, "step": 1732000 }, { "epoch": 14.19, "learning_rate": 4.336582879876375e-06, "loss": 2.9319, "step": 1732500 }, { "epoch": 14.2, "learning_rate": 4.3349389271564555e-06, "loss": 2.9369, "step": 1733000 }, { "epoch": 14.2, "learning_rate": 4.333294974436536e-06, "loss": 2.9361, "step": 1733500 }, { "epoch": 14.21, "learning_rate": 4.331651021716616e-06, "loss": 2.9339, "step": 1734000 }, { "epoch": 14.21, "learning_rate": 4.330007068996696e-06, "loss": 2.9307, "step": 1734500 }, { "epoch": 14.21, "learning_rate": 4.328363116276776e-06, "loss": 2.9329, "step": 1735000 }, { "epoch": 14.22, "learning_rate": 4.326719163556857e-06, "loss": 2.9387, "step": 1735500 }, { "epoch": 14.22, "learning_rate": 4.325075210836936e-06, "loss": 2.9352, "step": 1736000 }, { "epoch": 14.23, "learning_rate": 4.323431258117016e-06, "loss": 2.9308, "step": 1736500 }, { "epoch": 14.23, "learning_rate": 4.321787305397097e-06, "loss": 2.9319, "step": 1737000 }, { "epoch": 14.24, "learning_rate": 4.3201433526771775e-06, "loss": 2.9321, "step": 1737500 }, { "epoch": 14.24, "learning_rate": 4.318499399957258e-06, "loss": 2.9276, "step": 1738000 }, { "epoch": 14.24, "learning_rate": 4.316855447237338e-06, "loss": 2.933, "step": 1738500 }, { "epoch": 14.25, "learning_rate": 4.315211494517418e-06, "loss": 2.936, "step": 1739000 }, { "epoch": 14.25, "learning_rate": 4.313567541797498e-06, "loss": 2.9336, "step": 1739500 }, { "epoch": 14.26, "learning_rate": 4.311923589077578e-06, "loss": 2.9341, "step": 1740000 }, { "epoch": 14.26, "eval_accuracy": 0.4978347321882909, "eval_loss": 2.8000051975250244, "eval_runtime": 409.1394, "eval_samples_per_second": 753.67, "eval_steps_per_second": 15.704, "step": 1740000 }, { "epoch": 14.26, "learning_rate": 4.310279636357659e-06, "loss": 2.9279, "step": 1740500 }, { "epoch": 14.26, "learning_rate": 4.308635683637739e-06, "loss": 2.9396, "step": 1741000 }, { "epoch": 14.27, "learning_rate": 4.306991730917819e-06, "loss": 2.9339, "step": 1741500 }, { "epoch": 14.27, "learning_rate": 4.3053477781978995e-06, "loss": 2.9332, "step": 1742000 }, { "epoch": 14.28, "learning_rate": 4.30370382547798e-06, "loss": 2.9357, "step": 1742500 }, { "epoch": 14.28, "learning_rate": 4.30205987275806e-06, "loss": 2.9298, "step": 1743000 }, { "epoch": 14.28, "learning_rate": 4.30041592003814e-06, "loss": 2.936, "step": 1743500 }, { "epoch": 14.29, "learning_rate": 4.298771967318221e-06, "loss": 2.9274, "step": 1744000 }, { "epoch": 14.29, "learning_rate": 4.297128014598301e-06, "loss": 2.9341, "step": 1744500 }, { "epoch": 14.3, "learning_rate": 4.29548406187838e-06, "loss": 2.9329, "step": 1745000 }, { "epoch": 14.3, "learning_rate": 4.293840109158461e-06, "loss": 2.9282, "step": 1745500 }, { "epoch": 14.3, "learning_rate": 4.292196156438541e-06, "loss": 2.9359, "step": 1746000 }, { "epoch": 14.31, "learning_rate": 4.2905522037186215e-06, "loss": 2.9311, "step": 1746500 }, { "epoch": 14.31, "learning_rate": 4.288908250998702e-06, "loss": 2.9301, "step": 1747000 }, { "epoch": 14.32, "learning_rate": 4.287264298278782e-06, "loss": 2.9277, "step": 1747500 }, { "epoch": 14.32, "learning_rate": 4.285620345558862e-06, "loss": 2.9333, "step": 1748000 }, { "epoch": 14.33, "learning_rate": 4.283976392838942e-06, "loss": 2.9305, "step": 1748500 }, { "epoch": 14.33, "learning_rate": 4.282332440119022e-06, "loss": 2.9342, "step": 1749000 }, { "epoch": 14.33, "learning_rate": 4.280688487399103e-06, "loss": 2.9321, "step": 1749500 }, { "epoch": 14.34, "learning_rate": 4.279044534679183e-06, "loss": 2.9341, "step": 1750000 }, { "epoch": 14.34, "learning_rate": 4.277400581959263e-06, "loss": 2.9312, "step": 1750500 }, { "epoch": 14.35, "learning_rate": 4.2757566292393435e-06, "loss": 2.9252, "step": 1751000 }, { "epoch": 14.35, "learning_rate": 4.274112676519424e-06, "loss": 2.9335, "step": 1751500 }, { "epoch": 14.35, "learning_rate": 4.272468723799504e-06, "loss": 2.9248, "step": 1752000 }, { "epoch": 14.36, "learning_rate": 4.270824771079584e-06, "loss": 2.9322, "step": 1752500 }, { "epoch": 14.36, "learning_rate": 4.269180818359665e-06, "loss": 2.9343, "step": 1753000 }, { "epoch": 14.37, "learning_rate": 4.267536865639745e-06, "loss": 2.9346, "step": 1753500 }, { "epoch": 14.37, "learning_rate": 4.265892912919824e-06, "loss": 2.9312, "step": 1754000 }, { "epoch": 14.37, "learning_rate": 4.264248960199905e-06, "loss": 2.9274, "step": 1754500 }, { "epoch": 14.38, "learning_rate": 4.262605007479985e-06, "loss": 2.9308, "step": 1755000 }, { "epoch": 14.38, "learning_rate": 4.2609610547600654e-06, "loss": 2.9315, "step": 1755500 }, { "epoch": 14.39, "learning_rate": 4.2593171020401456e-06, "loss": 2.929, "step": 1756000 }, { "epoch": 14.39, "learning_rate": 4.2576731493202265e-06, "loss": 2.9362, "step": 1756500 }, { "epoch": 14.39, "learning_rate": 4.256029196600306e-06, "loss": 2.9238, "step": 1757000 }, { "epoch": 14.4, "learning_rate": 4.254385243880386e-06, "loss": 2.9318, "step": 1757500 }, { "epoch": 14.4, "learning_rate": 4.252741291160467e-06, "loss": 2.935, "step": 1758000 }, { "epoch": 14.41, "learning_rate": 4.251097338440547e-06, "loss": 2.9325, "step": 1758500 }, { "epoch": 14.41, "learning_rate": 4.249453385720627e-06, "loss": 2.9388, "step": 1759000 }, { "epoch": 14.42, "learning_rate": 4.247809433000707e-06, "loss": 2.9329, "step": 1759500 }, { "epoch": 14.42, "learning_rate": 4.2461654802807874e-06, "loss": 2.9317, "step": 1760000 }, { "epoch": 14.42, "learning_rate": 4.2445215275608676e-06, "loss": 2.9316, "step": 1760500 }, { "epoch": 14.43, "learning_rate": 4.242877574840948e-06, "loss": 2.9311, "step": 1761000 }, { "epoch": 14.43, "learning_rate": 4.241233622121028e-06, "loss": 2.932, "step": 1761500 }, { "epoch": 14.44, "learning_rate": 4.239589669401109e-06, "loss": 2.9319, "step": 1762000 }, { "epoch": 14.44, "learning_rate": 4.237945716681189e-06, "loss": 2.9314, "step": 1762500 }, { "epoch": 14.44, "learning_rate": 4.236301763961268e-06, "loss": 2.934, "step": 1763000 }, { "epoch": 14.45, "learning_rate": 4.234657811241349e-06, "loss": 2.9321, "step": 1763500 }, { "epoch": 14.45, "learning_rate": 4.233013858521429e-06, "loss": 2.936, "step": 1764000 }, { "epoch": 14.46, "learning_rate": 4.231369905801509e-06, "loss": 2.9388, "step": 1764500 }, { "epoch": 14.46, "learning_rate": 4.2297259530815895e-06, "loss": 2.925, "step": 1765000 }, { "epoch": 14.46, "learning_rate": 4.2280820003616705e-06, "loss": 2.9331, "step": 1765500 }, { "epoch": 14.47, "learning_rate": 4.22643804764175e-06, "loss": 2.9265, "step": 1766000 }, { "epoch": 14.47, "learning_rate": 4.22479409492183e-06, "loss": 2.9322, "step": 1766500 }, { "epoch": 14.48, "learning_rate": 4.223150142201911e-06, "loss": 2.9266, "step": 1767000 }, { "epoch": 14.48, "learning_rate": 4.221506189481991e-06, "loss": 2.9338, "step": 1767500 }, { "epoch": 14.48, "learning_rate": 4.219862236762071e-06, "loss": 2.9362, "step": 1768000 }, { "epoch": 14.49, "learning_rate": 4.218218284042151e-06, "loss": 2.9338, "step": 1768500 }, { "epoch": 14.49, "learning_rate": 4.216574331322231e-06, "loss": 2.9352, "step": 1769000 }, { "epoch": 14.5, "learning_rate": 4.2149303786023115e-06, "loss": 2.9298, "step": 1769500 }, { "epoch": 14.5, "learning_rate": 4.213286425882392e-06, "loss": 2.9275, "step": 1770000 }, { "epoch": 14.5, "eval_accuracy": 0.49835039139690634, "eval_loss": 2.7977969646453857, "eval_runtime": 403.9508, "eval_samples_per_second": 763.35, "eval_steps_per_second": 15.905, "step": 1770000 }, { "epoch": 14.51, "learning_rate": 4.211642473162473e-06, "loss": 2.9349, "step": 1770500 }, { "epoch": 14.51, "learning_rate": 4.209998520442553e-06, "loss": 2.9281, "step": 1771000 }, { "epoch": 14.51, "learning_rate": 4.208354567722633e-06, "loss": 2.9367, "step": 1771500 }, { "epoch": 14.52, "learning_rate": 4.206710615002713e-06, "loss": 2.9272, "step": 1772000 }, { "epoch": 14.52, "learning_rate": 4.205066662282793e-06, "loss": 2.9336, "step": 1772500 }, { "epoch": 14.53, "learning_rate": 4.203422709562873e-06, "loss": 2.9265, "step": 1773000 }, { "epoch": 14.53, "learning_rate": 4.201778756842953e-06, "loss": 2.9294, "step": 1773500 }, { "epoch": 14.53, "learning_rate": 4.2001348041230335e-06, "loss": 2.9236, "step": 1774000 }, { "epoch": 14.54, "learning_rate": 4.1984908514031145e-06, "loss": 2.9314, "step": 1774500 }, { "epoch": 14.54, "learning_rate": 4.196846898683194e-06, "loss": 2.9274, "step": 1775000 }, { "epoch": 14.55, "learning_rate": 4.195202945963274e-06, "loss": 2.9199, "step": 1775500 }, { "epoch": 14.55, "learning_rate": 4.193558993243355e-06, "loss": 2.9313, "step": 1776000 }, { "epoch": 14.55, "learning_rate": 4.191915040523435e-06, "loss": 2.9253, "step": 1776500 }, { "epoch": 14.56, "learning_rate": 4.190271087803515e-06, "loss": 2.9304, "step": 1777000 }, { "epoch": 14.56, "learning_rate": 4.188627135083595e-06, "loss": 2.9333, "step": 1777500 }, { "epoch": 14.57, "learning_rate": 4.186983182363675e-06, "loss": 2.9317, "step": 1778000 }, { "epoch": 14.57, "learning_rate": 4.1853392296437555e-06, "loss": 2.9276, "step": 1778500 }, { "epoch": 14.58, "learning_rate": 4.183695276923836e-06, "loss": 2.9319, "step": 1779000 }, { "epoch": 14.58, "learning_rate": 4.182051324203917e-06, "loss": 2.933, "step": 1779500 }, { "epoch": 14.58, "learning_rate": 4.180407371483997e-06, "loss": 2.9379, "step": 1780000 }, { "epoch": 14.59, "learning_rate": 4.178763418764077e-06, "loss": 2.9306, "step": 1780500 }, { "epoch": 14.59, "learning_rate": 4.177119466044157e-06, "loss": 2.9328, "step": 1781000 }, { "epoch": 14.6, "learning_rate": 4.175475513324237e-06, "loss": 2.9255, "step": 1781500 }, { "epoch": 14.6, "learning_rate": 4.173831560604317e-06, "loss": 2.9313, "step": 1782000 }, { "epoch": 14.6, "learning_rate": 4.172187607884397e-06, "loss": 2.9252, "step": 1782500 }, { "epoch": 14.61, "learning_rate": 4.1705436551644775e-06, "loss": 2.9281, "step": 1783000 }, { "epoch": 14.61, "learning_rate": 4.1688997024445585e-06, "loss": 2.9268, "step": 1783500 }, { "epoch": 14.62, "learning_rate": 4.167255749724639e-06, "loss": 2.9269, "step": 1784000 }, { "epoch": 14.62, "learning_rate": 4.165611797004719e-06, "loss": 2.9347, "step": 1784500 }, { "epoch": 14.62, "learning_rate": 4.163967844284799e-06, "loss": 2.9234, "step": 1785000 }, { "epoch": 14.63, "learning_rate": 4.162323891564879e-06, "loss": 2.9271, "step": 1785500 }, { "epoch": 14.63, "learning_rate": 4.160679938844959e-06, "loss": 2.9335, "step": 1786000 }, { "epoch": 14.64, "learning_rate": 4.159035986125039e-06, "loss": 2.9314, "step": 1786500 }, { "epoch": 14.64, "learning_rate": 4.15739203340512e-06, "loss": 2.9327, "step": 1787000 }, { "epoch": 14.64, "learning_rate": 4.1557480806851995e-06, "loss": 2.9259, "step": 1787500 }, { "epoch": 14.65, "learning_rate": 4.15410412796528e-06, "loss": 2.9288, "step": 1788000 }, { "epoch": 14.65, "learning_rate": 4.1524601752453606e-06, "loss": 2.9282, "step": 1788500 }, { "epoch": 14.66, "learning_rate": 4.150816222525441e-06, "loss": 2.9276, "step": 1789000 }, { "epoch": 14.66, "learning_rate": 4.149172269805521e-06, "loss": 2.9337, "step": 1789500 }, { "epoch": 14.67, "learning_rate": 4.147528317085601e-06, "loss": 2.9284, "step": 1790000 }, { "epoch": 14.67, "learning_rate": 4.145884364365681e-06, "loss": 2.9208, "step": 1790500 }, { "epoch": 14.67, "learning_rate": 4.144240411645761e-06, "loss": 2.9408, "step": 1791000 }, { "epoch": 14.68, "learning_rate": 4.142596458925841e-06, "loss": 2.9293, "step": 1791500 }, { "epoch": 14.68, "learning_rate": 4.140952506205922e-06, "loss": 2.93, "step": 1792000 }, { "epoch": 14.69, "learning_rate": 4.1393085534860024e-06, "loss": 2.9313, "step": 1792500 }, { "epoch": 14.69, "learning_rate": 4.1376646007660826e-06, "loss": 2.9321, "step": 1793000 }, { "epoch": 14.69, "learning_rate": 4.136020648046163e-06, "loss": 2.9249, "step": 1793500 }, { "epoch": 14.7, "learning_rate": 4.134376695326243e-06, "loss": 2.9286, "step": 1794000 }, { "epoch": 14.7, "learning_rate": 4.132732742606323e-06, "loss": 2.9354, "step": 1794500 }, { "epoch": 14.71, "learning_rate": 4.131088789886403e-06, "loss": 2.9287, "step": 1795000 }, { "epoch": 14.71, "learning_rate": 4.129444837166483e-06, "loss": 2.9271, "step": 1795500 }, { "epoch": 14.71, "learning_rate": 4.127800884446564e-06, "loss": 2.9335, "step": 1796000 }, { "epoch": 14.72, "learning_rate": 4.1261569317266434e-06, "loss": 2.9235, "step": 1796500 }, { "epoch": 14.72, "learning_rate": 4.1245129790067236e-06, "loss": 2.9245, "step": 1797000 }, { "epoch": 14.73, "learning_rate": 4.1228690262868045e-06, "loss": 2.925, "step": 1797500 }, { "epoch": 14.73, "learning_rate": 4.121225073566885e-06, "loss": 2.9244, "step": 1798000 }, { "epoch": 14.73, "learning_rate": 4.119581120846965e-06, "loss": 2.9306, "step": 1798500 }, { "epoch": 14.74, "learning_rate": 4.117937168127045e-06, "loss": 2.9243, "step": 1799000 }, { "epoch": 14.74, "learning_rate": 4.116293215407125e-06, "loss": 2.9272, "step": 1799500 }, { "epoch": 14.75, "learning_rate": 4.114649262687205e-06, "loss": 2.9319, "step": 1800000 }, { "epoch": 14.75, "eval_accuracy": 0.4989067207393835, "eval_loss": 2.794658899307251, "eval_runtime": 404.1884, "eval_samples_per_second": 762.902, "eval_steps_per_second": 15.896, "step": 1800000 }, { "epoch": 14.75, "learning_rate": 4.113005309967285e-06, "loss": 2.9225, "step": 1800500 }, { "epoch": 14.76, "learning_rate": 4.111361357247366e-06, "loss": 2.9304, "step": 1801000 }, { "epoch": 14.76, "learning_rate": 4.109717404527446e-06, "loss": 2.9258, "step": 1801500 }, { "epoch": 14.76, "learning_rate": 4.1080734518075265e-06, "loss": 2.9214, "step": 1802000 }, { "epoch": 14.77, "learning_rate": 4.106429499087607e-06, "loss": 2.9272, "step": 1802500 }, { "epoch": 14.77, "learning_rate": 4.104785546367687e-06, "loss": 2.9293, "step": 1803000 }, { "epoch": 14.78, "learning_rate": 4.103141593647767e-06, "loss": 2.9307, "step": 1803500 }, { "epoch": 14.78, "learning_rate": 4.101497640927847e-06, "loss": 2.9316, "step": 1804000 }, { "epoch": 14.78, "learning_rate": 4.099853688207928e-06, "loss": 2.9265, "step": 1804500 }, { "epoch": 14.79, "learning_rate": 4.098209735488008e-06, "loss": 2.9225, "step": 1805000 }, { "epoch": 14.79, "learning_rate": 4.096565782768087e-06, "loss": 2.9219, "step": 1805500 }, { "epoch": 14.8, "learning_rate": 4.094921830048168e-06, "loss": 2.9265, "step": 1806000 }, { "epoch": 14.8, "learning_rate": 4.0932778773282485e-06, "loss": 2.9258, "step": 1806500 }, { "epoch": 14.8, "learning_rate": 4.091633924608329e-06, "loss": 2.9324, "step": 1807000 }, { "epoch": 14.81, "learning_rate": 4.089989971888409e-06, "loss": 2.9335, "step": 1807500 }, { "epoch": 14.81, "learning_rate": 4.088346019168489e-06, "loss": 2.9219, "step": 1808000 }, { "epoch": 14.82, "learning_rate": 4.086702066448569e-06, "loss": 2.9355, "step": 1808500 }, { "epoch": 14.82, "learning_rate": 4.085058113728649e-06, "loss": 2.9275, "step": 1809000 }, { "epoch": 14.82, "learning_rate": 4.083414161008729e-06, "loss": 2.9191, "step": 1809500 }, { "epoch": 14.83, "learning_rate": 4.08177020828881e-06, "loss": 2.9248, "step": 1810000 }, { "epoch": 14.83, "learning_rate": 4.08012625556889e-06, "loss": 2.9263, "step": 1810500 }, { "epoch": 14.84, "learning_rate": 4.0784823028489705e-06, "loss": 2.9324, "step": 1811000 }, { "epoch": 14.84, "learning_rate": 4.076838350129051e-06, "loss": 2.9256, "step": 1811500 }, { "epoch": 14.85, "learning_rate": 4.075194397409131e-06, "loss": 2.9276, "step": 1812000 }, { "epoch": 14.85, "learning_rate": 4.073550444689211e-06, "loss": 2.9319, "step": 1812500 }, { "epoch": 14.85, "learning_rate": 4.071906491969291e-06, "loss": 2.9265, "step": 1813000 }, { "epoch": 14.86, "learning_rate": 4.070262539249372e-06, "loss": 2.9284, "step": 1813500 }, { "epoch": 14.86, "learning_rate": 4.068618586529452e-06, "loss": 2.9313, "step": 1814000 }, { "epoch": 14.87, "learning_rate": 4.066974633809531e-06, "loss": 2.9201, "step": 1814500 }, { "epoch": 14.87, "learning_rate": 4.065330681089612e-06, "loss": 2.9272, "step": 1815000 }, { "epoch": 14.87, "learning_rate": 4.0636867283696925e-06, "loss": 2.929, "step": 1815500 }, { "epoch": 14.88, "learning_rate": 4.062042775649773e-06, "loss": 2.9221, "step": 1816000 }, { "epoch": 14.88, "learning_rate": 4.060398822929853e-06, "loss": 2.9246, "step": 1816500 }, { "epoch": 14.89, "learning_rate": 4.058754870209934e-06, "loss": 2.9316, "step": 1817000 }, { "epoch": 14.89, "learning_rate": 4.057110917490013e-06, "loss": 2.9285, "step": 1817500 }, { "epoch": 14.89, "learning_rate": 4.055466964770093e-06, "loss": 2.9272, "step": 1818000 }, { "epoch": 14.9, "learning_rate": 4.053823012050174e-06, "loss": 2.928, "step": 1818500 }, { "epoch": 14.9, "learning_rate": 4.052179059330254e-06, "loss": 2.9296, "step": 1819000 }, { "epoch": 14.91, "learning_rate": 4.050535106610334e-06, "loss": 2.9252, "step": 1819500 }, { "epoch": 14.91, "learning_rate": 4.0488911538904145e-06, "loss": 2.9287, "step": 1820000 }, { "epoch": 14.92, "learning_rate": 4.047247201170495e-06, "loss": 2.9318, "step": 1820500 }, { "epoch": 14.92, "learning_rate": 4.045603248450575e-06, "loss": 2.927, "step": 1821000 }, { "epoch": 14.92, "learning_rate": 4.043959295730655e-06, "loss": 2.9241, "step": 1821500 }, { "epoch": 14.93, "learning_rate": 4.042315343010735e-06, "loss": 2.9234, "step": 1822000 }, { "epoch": 14.93, "learning_rate": 4.040671390290816e-06, "loss": 2.9218, "step": 1822500 }, { "epoch": 14.94, "learning_rate": 4.039027437570896e-06, "loss": 2.9306, "step": 1823000 }, { "epoch": 14.94, "learning_rate": 4.037383484850975e-06, "loss": 2.9271, "step": 1823500 }, { "epoch": 14.94, "learning_rate": 4.035739532131056e-06, "loss": 2.9231, "step": 1824000 }, { "epoch": 14.95, "learning_rate": 4.0340955794111365e-06, "loss": 2.9276, "step": 1824500 }, { "epoch": 14.95, "learning_rate": 4.032451626691217e-06, "loss": 2.9269, "step": 1825000 }, { "epoch": 14.96, "learning_rate": 4.030807673971297e-06, "loss": 2.9248, "step": 1825500 }, { "epoch": 14.96, "learning_rate": 4.029163721251378e-06, "loss": 2.927, "step": 1826000 }, { "epoch": 14.96, "learning_rate": 4.027519768531457e-06, "loss": 2.932, "step": 1826500 }, { "epoch": 14.97, "learning_rate": 4.025875815811537e-06, "loss": 2.9253, "step": 1827000 }, { "epoch": 14.97, "learning_rate": 4.024231863091618e-06, "loss": 2.9283, "step": 1827500 }, { "epoch": 14.98, "learning_rate": 4.022587910371698e-06, "loss": 2.9251, "step": 1828000 }, { "epoch": 14.98, "learning_rate": 4.020943957651778e-06, "loss": 2.9207, "step": 1828500 }, { "epoch": 14.98, "learning_rate": 4.0193000049318584e-06, "loss": 2.931, "step": 1829000 }, { "epoch": 14.99, "learning_rate": 4.0176560522119386e-06, "loss": 2.922, "step": 1829500 }, { "epoch": 14.99, "learning_rate": 4.016012099492019e-06, "loss": 2.9304, "step": 1830000 }, { "epoch": 14.99, "eval_accuracy": 0.49915539316259305, "eval_loss": 2.7920210361480713, "eval_runtime": 408.9888, "eval_samples_per_second": 753.947, "eval_steps_per_second": 15.709, "step": 1830000 }, { "epoch": 15.0, "learning_rate": 4.014368146772099e-06, "loss": 2.924, "step": 1830500 }, { "epoch": 15.0, "learning_rate": 4.01272419405218e-06, "loss": 2.9216, "step": 1831000 }, { "epoch": 15.01, "learning_rate": 4.01108024133226e-06, "loss": 2.9301, "step": 1831500 }, { "epoch": 15.01, "learning_rate": 4.00943628861234e-06, "loss": 2.9275, "step": 1832000 }, { "epoch": 15.01, "learning_rate": 4.00779233589242e-06, "loss": 2.9231, "step": 1832500 }, { "epoch": 15.02, "learning_rate": 4.0061483831725e-06, "loss": 2.9206, "step": 1833000 }, { "epoch": 15.02, "learning_rate": 4.0045044304525804e-06, "loss": 2.9231, "step": 1833500 }, { "epoch": 15.03, "learning_rate": 4.0028604777326606e-06, "loss": 2.9246, "step": 1834000 }, { "epoch": 15.03, "learning_rate": 4.001216525012741e-06, "loss": 2.9253, "step": 1834500 }, { "epoch": 15.03, "learning_rate": 3.999572572292822e-06, "loss": 2.9233, "step": 1835000 }, { "epoch": 15.04, "learning_rate": 3.997928619572902e-06, "loss": 2.9254, "step": 1835500 }, { "epoch": 15.04, "learning_rate": 3.996284666852981e-06, "loss": 2.9231, "step": 1836000 }, { "epoch": 15.05, "learning_rate": 3.994640714133062e-06, "loss": 2.9289, "step": 1836500 }, { "epoch": 15.05, "learning_rate": 3.992996761413142e-06, "loss": 2.9245, "step": 1837000 }, { "epoch": 15.05, "learning_rate": 3.991352808693222e-06, "loss": 2.9221, "step": 1837500 }, { "epoch": 15.06, "learning_rate": 3.989708855973302e-06, "loss": 2.9205, "step": 1838000 }, { "epoch": 15.06, "learning_rate": 3.988064903253383e-06, "loss": 2.9306, "step": 1838500 }, { "epoch": 15.07, "learning_rate": 3.986420950533463e-06, "loss": 2.9248, "step": 1839000 }, { "epoch": 15.07, "learning_rate": 3.984776997813543e-06, "loss": 2.9229, "step": 1839500 }, { "epoch": 15.07, "learning_rate": 3.983133045093624e-06, "loss": 2.9208, "step": 1840000 }, { "epoch": 15.08, "learning_rate": 3.981489092373704e-06, "loss": 2.9246, "step": 1840500 }, { "epoch": 15.08, "learning_rate": 3.979845139653784e-06, "loss": 2.9267, "step": 1841000 }, { "epoch": 15.09, "learning_rate": 3.978201186933864e-06, "loss": 2.9254, "step": 1841500 }, { "epoch": 15.09, "step": 1841687, "total_flos": 9.934758834172068e+17, "train_loss": 3.300056499910879, "train_runtime": 215976.5267, "train_samples_per_second": 678.171, "train_steps_per_second": 14.129 } ], "max_steps": 3051450, "num_train_epochs": 25, "total_flos": 9.934758834172068e+17, "trial_name": null, "trial_params": null }