diff --git "a/trainer_state.json" "b/trainer_state.json" --- "a/trainer_state.json" +++ "b/trainer_state.json" @@ -3,14055 +3,13152 @@ "best_model_checkpoint": null, "epoch": 3.0, "eval_steps": 500, - "global_step": 2004, + "global_step": 1875, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { - "epoch": 0.0014970059880239522, - "grad_norm": 0.4412629429939014, - "learning_rate": 3.278688524590164e-07, - "loss": 1.0214, + "epoch": 0.0016, + "grad_norm": 0.7358543184137598, + "learning_rate": 3.5087719298245616e-07, + "loss": 1.6113, "step": 1 }, { - "epoch": 0.0029940119760479044, - "grad_norm": 0.44857961209660113, - "learning_rate": 6.557377049180328e-07, - "loss": 1.0321, + "epoch": 0.0032, + "grad_norm": 0.7911699409831793, + "learning_rate": 7.017543859649123e-07, + "loss": 1.6029, "step": 2 }, { - "epoch": 0.004491017964071856, - "grad_norm": 0.4453725376646987, - "learning_rate": 9.836065573770493e-07, - "loss": 1.0352, + "epoch": 0.0048, + "grad_norm": 0.7514423561996403, + "learning_rate": 1.0526315789473685e-06, + "loss": 1.6231, "step": 3 }, { - "epoch": 0.005988023952095809, - "grad_norm": 0.4051365182123174, - "learning_rate": 1.3114754098360657e-06, - "loss": 0.959, + "epoch": 0.0064, + "grad_norm": 0.7653628324142866, + "learning_rate": 1.4035087719298246e-06, + "loss": 1.6389, "step": 4 }, { - "epoch": 0.0074850299401197605, - "grad_norm": 0.3685112349775458, - "learning_rate": 1.6393442622950819e-06, - "loss": 0.9052, + "epoch": 0.008, + "grad_norm": 0.751467157371489, + "learning_rate": 1.7543859649122807e-06, + "loss": 1.6246, "step": 5 }, { - "epoch": 0.008982035928143712, - "grad_norm": 0.46048753426928685, - "learning_rate": 1.9672131147540985e-06, - "loss": 0.9808, + "epoch": 0.0096, + "grad_norm": 0.7645075600822779, + "learning_rate": 2.105263157894737e-06, + "loss": 1.6364, "step": 6 }, { - "epoch": 0.010479041916167664, - "grad_norm": 0.4364944016404722, - "learning_rate": 2.295081967213115e-06, - "loss": 1.0203, + "epoch": 0.0112, + "grad_norm": 0.8084898707473633, + "learning_rate": 2.456140350877193e-06, + "loss": 1.6179, "step": 7 }, { - "epoch": 0.011976047904191617, - "grad_norm": 0.4197052350095643, - "learning_rate": 2.6229508196721314e-06, - "loss": 0.936, + "epoch": 0.0128, + "grad_norm": 0.7300397658242517, + "learning_rate": 2.8070175438596493e-06, + "loss": 1.6075, "step": 8 }, { - "epoch": 0.01347305389221557, - "grad_norm": 0.3935251532018477, - "learning_rate": 2.9508196721311478e-06, - "loss": 0.931, + "epoch": 0.0144, + "grad_norm": 0.7553157416489561, + "learning_rate": 3.157894736842105e-06, + "loss": 1.6126, "step": 9 }, { - "epoch": 0.014970059880239521, - "grad_norm": 0.40165315264572143, - "learning_rate": 3.2786885245901638e-06, - "loss": 0.9519, + "epoch": 0.016, + "grad_norm": 0.772508985596036, + "learning_rate": 3.5087719298245615e-06, + "loss": 1.6097, "step": 10 }, { - "epoch": 0.016467065868263474, - "grad_norm": 0.4327103578358591, - "learning_rate": 3.6065573770491806e-06, - "loss": 0.9975, + "epoch": 0.0176, + "grad_norm": 0.7416306158167456, + "learning_rate": 3.859649122807018e-06, + "loss": 1.5971, "step": 11 }, { - "epoch": 0.017964071856287425, - "grad_norm": 0.4829902386185703, - "learning_rate": 3.934426229508197e-06, - "loss": 1.0322, + "epoch": 0.0192, + "grad_norm": 0.7046433490415748, + "learning_rate": 4.210526315789474e-06, + "loss": 1.5399, "step": 12 }, { - "epoch": 0.019461077844311378, - "grad_norm": 0.43745126656959527, - "learning_rate": 4.2622950819672135e-06, - "loss": 0.9339, + "epoch": 0.0208, + "grad_norm": 0.7702175304329298, + "learning_rate": 4.56140350877193e-06, + "loss": 1.5664, "step": 13 }, { - "epoch": 0.020958083832335328, - "grad_norm": 0.40770435507966485, - "learning_rate": 4.59016393442623e-06, - "loss": 0.9558, + "epoch": 0.0224, + "grad_norm": 0.7298779175731793, + "learning_rate": 4.912280701754386e-06, + "loss": 1.5476, "step": 14 }, { - "epoch": 0.02245508982035928, - "grad_norm": 0.3880526820488479, - "learning_rate": 4.918032786885246e-06, - "loss": 0.9233, + "epoch": 0.024, + "grad_norm": 0.7559518088328111, + "learning_rate": 5.263157894736842e-06, + "loss": 1.5376, "step": 15 }, { - "epoch": 0.023952095808383235, - "grad_norm": 0.41942806957859513, - "learning_rate": 5.245901639344263e-06, - "loss": 0.97, + "epoch": 0.0256, + "grad_norm": 0.7096582083433334, + "learning_rate": 5.6140350877192985e-06, + "loss": 1.5267, "step": 16 }, { - "epoch": 0.025449101796407185, - "grad_norm": 0.38973129771254217, - "learning_rate": 5.573770491803278e-06, - "loss": 0.8961, + "epoch": 0.0272, + "grad_norm": 0.7914446668594594, + "learning_rate": 5.964912280701755e-06, + "loss": 1.5569, "step": 17 }, { - "epoch": 0.02694610778443114, - "grad_norm": 0.4152019760637627, - "learning_rate": 5.9016393442622956e-06, - "loss": 0.9692, + "epoch": 0.0288, + "grad_norm": 0.6806304091778055, + "learning_rate": 6.31578947368421e-06, + "loss": 1.4443, "step": 18 }, { - "epoch": 0.02844311377245509, - "grad_norm": 0.46572881246006315, - "learning_rate": 6.229508196721312e-06, - "loss": 0.9336, + "epoch": 0.0304, + "grad_norm": 0.7077907993401488, + "learning_rate": 6.666666666666667e-06, + "loss": 1.477, "step": 19 }, { - "epoch": 0.029940119760479042, - "grad_norm": 0.3928467539453223, - "learning_rate": 6.5573770491803276e-06, - "loss": 0.9064, + "epoch": 0.032, + "grad_norm": 0.7169470880561969, + "learning_rate": 7.017543859649123e-06, + "loss": 1.4694, "step": 20 }, { - "epoch": 0.03143712574850299, - "grad_norm": 0.3955023428706724, - "learning_rate": 6.885245901639345e-06, - "loss": 0.8755, + "epoch": 0.0336, + "grad_norm": 0.7194881877089658, + "learning_rate": 7.368421052631579e-06, + "loss": 1.4203, "step": 21 }, { - "epoch": 0.03293413173652695, - "grad_norm": 0.3655402984227431, - "learning_rate": 7.213114754098361e-06, - "loss": 0.8226, + "epoch": 0.0352, + "grad_norm": 0.7512994062273585, + "learning_rate": 7.719298245614036e-06, + "loss": 1.4298, "step": 22 }, { - "epoch": 0.0344311377245509, - "grad_norm": 0.38433031729962397, - "learning_rate": 7.540983606557377e-06, - "loss": 0.8473, + "epoch": 0.0368, + "grad_norm": 0.708482808548663, + "learning_rate": 8.070175438596492e-06, + "loss": 1.484, "step": 23 }, { - "epoch": 0.03592814371257485, - "grad_norm": 0.3573419875384817, - "learning_rate": 7.868852459016394e-06, - "loss": 0.8282, + "epoch": 0.0384, + "grad_norm": 0.6736120028530779, + "learning_rate": 8.421052631578948e-06, + "loss": 1.4359, "step": 24 }, { - "epoch": 0.0374251497005988, - "grad_norm": 0.4093172699149451, - "learning_rate": 8.19672131147541e-06, - "loss": 0.8879, + "epoch": 0.04, + "grad_norm": 0.6387588689424757, + "learning_rate": 8.771929824561405e-06, + "loss": 1.3827, "step": 25 }, { - "epoch": 0.038922155688622756, - "grad_norm": 0.3820938437320782, - "learning_rate": 8.524590163934427e-06, - "loss": 0.864, + "epoch": 0.0416, + "grad_norm": 0.6528462520370978, + "learning_rate": 9.12280701754386e-06, + "loss": 1.3671, "step": 26 }, { - "epoch": 0.040419161676646706, - "grad_norm": 0.39326220628777386, - "learning_rate": 8.852459016393443e-06, - "loss": 0.8862, + "epoch": 0.0432, + "grad_norm": 0.6747978566054349, + "learning_rate": 9.473684210526315e-06, + "loss": 1.3764, "step": 27 }, { - "epoch": 0.041916167664670656, - "grad_norm": 0.36807756007538617, - "learning_rate": 9.18032786885246e-06, - "loss": 0.8257, + "epoch": 0.0448, + "grad_norm": 0.651327249996694, + "learning_rate": 9.824561403508772e-06, + "loss": 1.3101, "step": 28 }, { - "epoch": 0.04341317365269461, - "grad_norm": 0.35588261728997433, - "learning_rate": 9.508196721311476e-06, - "loss": 0.8128, + "epoch": 0.0464, + "grad_norm": 0.6255836750532637, + "learning_rate": 1.017543859649123e-05, + "loss": 1.3146, "step": 29 }, { - "epoch": 0.04491017964071856, - "grad_norm": 0.3715077903083305, - "learning_rate": 9.836065573770493e-06, - "loss": 0.8099, + "epoch": 0.048, + "grad_norm": 0.6407677657370118, + "learning_rate": 1.0526315789473684e-05, + "loss": 1.2396, "step": 30 }, { - "epoch": 0.04640718562874251, - "grad_norm": 0.3596909054144244, - "learning_rate": 1.0163934426229509e-05, - "loss": 0.7715, + "epoch": 0.0496, + "grad_norm": 0.6507716198039026, + "learning_rate": 1.0877192982456142e-05, + "loss": 1.259, "step": 31 }, { - "epoch": 0.04790419161676647, - "grad_norm": 0.384702217147705, - "learning_rate": 1.0491803278688525e-05, - "loss": 0.833, + "epoch": 0.0512, + "grad_norm": 0.6092921472155218, + "learning_rate": 1.1228070175438597e-05, + "loss": 1.1989, "step": 32 }, { - "epoch": 0.04940119760479042, - "grad_norm": 0.36512927940401047, - "learning_rate": 1.0819672131147544e-05, - "loss": 0.7903, + "epoch": 0.0528, + "grad_norm": 0.6583503598059806, + "learning_rate": 1.1578947368421053e-05, + "loss": 1.2636, "step": 33 }, { - "epoch": 0.05089820359281437, - "grad_norm": 0.39841041594176824, - "learning_rate": 1.1147540983606557e-05, - "loss": 0.7641, + "epoch": 0.0544, + "grad_norm": 0.6306048826535348, + "learning_rate": 1.192982456140351e-05, + "loss": 1.1936, "step": 34 }, { - "epoch": 0.05239520958083832, - "grad_norm": 0.41324753544403525, - "learning_rate": 1.1475409836065575e-05, - "loss": 0.7774, + "epoch": 0.056, + "grad_norm": 0.6093528650503758, + "learning_rate": 1.2280701754385966e-05, + "loss": 1.161, "step": 35 }, { - "epoch": 0.05389221556886228, - "grad_norm": 0.3805767081935722, - "learning_rate": 1.1803278688524591e-05, - "loss": 0.7746, + "epoch": 0.0576, + "grad_norm": 0.6110544645977457, + "learning_rate": 1.263157894736842e-05, + "loss": 1.1311, "step": 36 }, { - "epoch": 0.05538922155688623, - "grad_norm": 0.3640692670562054, - "learning_rate": 1.2131147540983608e-05, - "loss": 0.741, + "epoch": 0.0592, + "grad_norm": 0.6113710211776816, + "learning_rate": 1.2982456140350879e-05, + "loss": 1.1236, "step": 37 }, { - "epoch": 0.05688622754491018, - "grad_norm": 0.3727384829726358, - "learning_rate": 1.2459016393442624e-05, - "loss": 0.7203, + "epoch": 0.0608, + "grad_norm": 0.6219287023974935, + "learning_rate": 1.3333333333333333e-05, + "loss": 1.1245, "step": 38 }, { - "epoch": 0.058383233532934134, - "grad_norm": 0.3695211453830474, - "learning_rate": 1.2786885245901642e-05, - "loss": 0.6895, + "epoch": 0.0624, + "grad_norm": 0.6058903001550351, + "learning_rate": 1.3684210526315791e-05, + "loss": 1.0604, "step": 39 }, { - "epoch": 0.059880239520958084, - "grad_norm": 0.41877844793582925, - "learning_rate": 1.3114754098360655e-05, - "loss": 0.6759, + "epoch": 0.064, + "grad_norm": 0.5995392327986184, + "learning_rate": 1.4035087719298246e-05, + "loss": 0.9938, "step": 40 }, { - "epoch": 0.061377245508982034, - "grad_norm": 0.33931923326947677, - "learning_rate": 1.3442622950819673e-05, - "loss": 0.6196, + "epoch": 0.0656, + "grad_norm": 0.5966528699606367, + "learning_rate": 1.4385964912280704e-05, + "loss": 0.9924, "step": 41 }, { - "epoch": 0.06287425149700598, - "grad_norm": 0.3684580530246632, - "learning_rate": 1.377049180327869e-05, - "loss": 0.6655, + "epoch": 0.0672, + "grad_norm": 0.6114423840267016, + "learning_rate": 1.4736842105263159e-05, + "loss": 0.9598, "step": 42 }, { - "epoch": 0.06437125748502993, - "grad_norm": 0.32458089242640203, - "learning_rate": 1.4098360655737706e-05, - "loss": 0.5865, + "epoch": 0.0688, + "grad_norm": 0.6128015950363562, + "learning_rate": 1.5087719298245615e-05, + "loss": 0.9558, "step": 43 }, { - "epoch": 0.0658682634730539, - "grad_norm": 0.3895546564318695, - "learning_rate": 1.4426229508196722e-05, - "loss": 0.6705, + "epoch": 0.0704, + "grad_norm": 0.6315271862733165, + "learning_rate": 1.543859649122807e-05, + "loss": 0.9046, "step": 44 }, { - "epoch": 0.06736526946107785, - "grad_norm": 0.3483102383245026, - "learning_rate": 1.4754098360655739e-05, - "loss": 0.5904, + "epoch": 0.072, + "grad_norm": 0.6309262858556801, + "learning_rate": 1.578947368421053e-05, + "loss": 0.8911, "step": 45 }, { - "epoch": 0.0688622754491018, - "grad_norm": 0.3789944257657541, - "learning_rate": 1.5081967213114754e-05, - "loss": 0.6053, + "epoch": 0.0736, + "grad_norm": 0.636954612998268, + "learning_rate": 1.6140350877192984e-05, + "loss": 0.823, "step": 46 }, { - "epoch": 0.07035928143712575, - "grad_norm": 0.37752386838956564, - "learning_rate": 1.5409836065573772e-05, - "loss": 0.5844, + "epoch": 0.0752, + "grad_norm": 0.6668276280248587, + "learning_rate": 1.649122807017544e-05, + "loss": 0.8139, "step": 47 }, { - "epoch": 0.0718562874251497, - "grad_norm": 0.3630635575321055, - "learning_rate": 1.5737704918032788e-05, - "loss": 0.5291, + "epoch": 0.0768, + "grad_norm": 0.834907844936787, + "learning_rate": 1.6842105263157896e-05, + "loss": 0.7885, "step": 48 }, { - "epoch": 0.07335329341317365, - "grad_norm": 0.3914577758366373, - "learning_rate": 1.6065573770491805e-05, - "loss": 0.5326, + "epoch": 0.0784, + "grad_norm": 0.7151347787252198, + "learning_rate": 1.719298245614035e-05, + "loss": 0.7322, "step": 49 }, { - "epoch": 0.0748502994011976, - "grad_norm": 0.40325362044689916, - "learning_rate": 1.639344262295082e-05, - "loss": 0.5288, + "epoch": 0.08, + "grad_norm": 0.6745090416732078, + "learning_rate": 1.754385964912281e-05, + "loss": 0.7137, "step": 50 }, { - "epoch": 0.07634730538922156, - "grad_norm": 0.4137060583375249, - "learning_rate": 1.6721311475409837e-05, - "loss": 0.4758, + "epoch": 0.0816, + "grad_norm": 0.6569944474302377, + "learning_rate": 1.7894736842105264e-05, + "loss": 0.6544, "step": 51 }, { - "epoch": 0.07784431137724551, - "grad_norm": 0.37431549278431586, - "learning_rate": 1.7049180327868854e-05, - "loss": 0.4133, + "epoch": 0.0832, + "grad_norm": 0.6571117644599914, + "learning_rate": 1.824561403508772e-05, + "loss": 0.6047, "step": 52 }, { - "epoch": 0.07934131736526946, - "grad_norm": 0.4434060283126579, - "learning_rate": 1.737704918032787e-05, - "loss": 0.4636, + "epoch": 0.0848, + "grad_norm": 0.6906720063383268, + "learning_rate": 1.8596491228070176e-05, + "loss": 0.5888, "step": 53 }, { - "epoch": 0.08083832335329341, - "grad_norm": 0.44913599309601093, - "learning_rate": 1.7704918032786887e-05, - "loss": 0.4453, + "epoch": 0.0864, + "grad_norm": 0.7155015831811764, + "learning_rate": 1.894736842105263e-05, + "loss": 0.556, "step": 54 }, { - "epoch": 0.08233532934131736, - "grad_norm": 0.4208028028608017, - "learning_rate": 1.8032786885245903e-05, - "loss": 0.3823, + "epoch": 0.088, + "grad_norm": 0.7439398825020056, + "learning_rate": 1.929824561403509e-05, + "loss": 0.4999, "step": 55 }, { - "epoch": 0.08383233532934131, - "grad_norm": 0.4340233256708269, - "learning_rate": 1.836065573770492e-05, - "loss": 0.3575, + "epoch": 0.0896, + "grad_norm": 0.73454273481856, + "learning_rate": 1.9649122807017544e-05, + "loss": 0.4582, "step": 56 }, { - "epoch": 0.08532934131736528, - "grad_norm": 0.4231169164061207, - "learning_rate": 1.8688524590163936e-05, - "loss": 0.3157, + "epoch": 0.0912, + "grad_norm": 0.8031998382360424, + "learning_rate": 2e-05, + "loss": 0.4203, "step": 57 }, { - "epoch": 0.08682634730538923, - "grad_norm": 0.4174421584158068, - "learning_rate": 1.9016393442622952e-05, - "loss": 0.2966, + "epoch": 0.0928, + "grad_norm": 0.7472148501527976, + "learning_rate": 1.9999985069241058e-05, + "loss": 0.3486, "step": 58 }, { - "epoch": 0.08832335329341318, - "grad_norm": 0.4611062696725442, - "learning_rate": 1.934426229508197e-05, - "loss": 0.2824, + "epoch": 0.0944, + "grad_norm": 0.7894792956649227, + "learning_rate": 1.9999940277008807e-05, + "loss": 0.3621, "step": 59 }, { - "epoch": 0.08982035928143713, - "grad_norm": 0.5834947622453618, - "learning_rate": 1.9672131147540985e-05, - "loss": 0.2742, + "epoch": 0.096, + "grad_norm": 0.8963297396723989, + "learning_rate": 1.9999865623437014e-05, + "loss": 0.3129, "step": 60 }, { - "epoch": 0.09131736526946108, - "grad_norm": 0.4356716457731799, - "learning_rate": 2e-05, - "loss": 0.232, + "epoch": 0.0976, + "grad_norm": 0.884139403384448, + "learning_rate": 1.99997611087486e-05, + "loss": 0.2671, "step": 61 }, { - "epoch": 0.09281437125748503, - "grad_norm": 0.5040703847152936, - "learning_rate": 1.9999986928541335e-05, - "loss": 0.215, + "epoch": 0.0992, + "grad_norm": 0.9041012483682248, + "learning_rate": 1.9999626733255662e-05, + "loss": 0.2702, "step": 62 }, { - "epoch": 0.09431137724550898, - "grad_norm": 0.6246157723635114, - "learning_rate": 1.9999947714199512e-05, - "loss": 0.2105, + "epoch": 0.1008, + "grad_norm": 0.9511493826946734, + "learning_rate": 1.9999462497359468e-05, + "loss": 0.258, "step": 63 }, { - "epoch": 0.09580838323353294, - "grad_norm": 0.620783787770869, - "learning_rate": 1.999988235707705e-05, - "loss": 0.1869, + "epoch": 0.1024, + "grad_norm": 0.7270450177964887, + "learning_rate": 1.9999268401550445e-05, + "loss": 0.2139, "step": 64 }, { - "epoch": 0.09730538922155689, - "grad_norm": 0.4844829922546262, - "learning_rate": 1.9999790857344808e-05, - "loss": 0.1693, + "epoch": 0.104, + "grad_norm": 0.6243505426704236, + "learning_rate": 1.9999044446408203e-05, + "loss": 0.2011, "step": 65 }, { - "epoch": 0.09880239520958084, - "grad_norm": 0.39690065232341765, - "learning_rate": 1.9999673215241996e-05, - "loss": 0.1559, + "epoch": 0.1056, + "grad_norm": 0.5771396526385555, + "learning_rate": 1.9998790632601496e-05, + "loss": 0.1764, "step": 66 }, { - "epoch": 0.10029940119760479, - "grad_norm": 0.39690065232341765, - "learning_rate": 1.9999673215241996e-05, - "loss": 0.1393, + "epoch": 0.1072, + "grad_norm": 0.4953614734628554, + "learning_rate": 1.9998506960888258e-05, + "loss": 0.1622, "step": 67 }, { - "epoch": 0.10179640718562874, - "grad_norm": 0.3744306931664112, - "learning_rate": 1.9999529431076165e-05, - "loss": 0.1444, + "epoch": 0.1088, + "grad_norm": 0.5099091603110442, + "learning_rate": 1.999819343211557e-05, + "loss": 0.1426, "step": 68 }, { - "epoch": 0.10329341317365269, - "grad_norm": 0.3555775828949075, - "learning_rate": 1.9999359505223208e-05, - "loss": 0.131, + "epoch": 0.1104, + "grad_norm": 0.4708736887102006, + "learning_rate": 1.999785004721968e-05, + "loss": 0.1339, "step": 69 }, { - "epoch": 0.10479041916167664, - "grad_norm": 0.446887895690596, - "learning_rate": 1.999916343812736e-05, - "loss": 0.1295, + "epoch": 0.112, + "grad_norm": 0.5728193517444005, + "learning_rate": 1.9997476807225987e-05, + "loss": 0.1354, "step": 70 }, { - "epoch": 0.1062874251497006, - "grad_norm": 0.4182793503525473, - "learning_rate": 1.99989412303012e-05, - "loss": 0.1129, + "epoch": 0.1136, + "grad_norm": 0.5007571963655035, + "learning_rate": 1.999707371324904e-05, + "loss": 0.1202, "step": 71 }, { - "epoch": 0.10778443113772455, - "grad_norm": 0.3512235169414461, - "learning_rate": 1.999869288232564e-05, - "loss": 0.1115, + "epoch": 0.1152, + "grad_norm": 0.4183656230559708, + "learning_rate": 1.9996640766492542e-05, + "loss": 0.1092, "step": 72 }, { - "epoch": 0.1092814371257485, - "grad_norm": 0.39358701307726796, - "learning_rate": 1.9998418394849935e-05, - "loss": 0.1059, + "epoch": 0.1168, + "grad_norm": 0.4786625534172537, + "learning_rate": 1.9996177968249336e-05, + "loss": 0.107, "step": 73 }, { - "epoch": 0.11077844311377245, - "grad_norm": 0.37079524723615265, - "learning_rate": 1.999811776859168e-05, - "loss": 0.1063, + "epoch": 0.1184, + "grad_norm": 0.4599603227151936, + "learning_rate": 1.999568531990141e-05, + "loss": 0.096, "step": 74 }, { - "epoch": 0.1122754491017964, - "grad_norm": 0.35175920574697694, - "learning_rate": 1.9997791004336794e-05, - "loss": 0.0978, + "epoch": 0.12, + "grad_norm": 0.5951574041051128, + "learning_rate": 1.999516282291988e-05, + "loss": 0.0976, "step": 75 }, { - "epoch": 0.11377245508982035, - "grad_norm": 0.44421104255896743, - "learning_rate": 1.9997438102939538e-05, - "loss": 0.0972, + "epoch": 0.1216, + "grad_norm": 0.44660464536078237, + "learning_rate": 1.9994610478865012e-05, + "loss": 0.0936, "step": 76 }, { - "epoch": 0.11526946107784432, - "grad_norm": 0.2935792679601046, - "learning_rate": 1.99970590653225e-05, - "loss": 0.0865, + "epoch": 0.1232, + "grad_norm": 0.43513595090472557, + "learning_rate": 1.999402828938618e-05, + "loss": 0.0819, "step": 77 }, { - "epoch": 0.11676646706586827, - "grad_norm": 0.32149928982255116, - "learning_rate": 1.999665389247659e-05, - "loss": 0.0848, + "epoch": 0.1248, + "grad_norm": 0.48756807602594426, + "learning_rate": 1.9993416256221894e-05, + "loss": 0.0839, "step": 78 }, { - "epoch": 0.11826347305389222, - "grad_norm": 0.3346624453038523, - "learning_rate": 1.999622258546105e-05, - "loss": 0.0841, + "epoch": 0.1264, + "grad_norm": 0.40464585472402786, + "learning_rate": 1.999277438119978e-05, + "loss": 0.0764, "step": 79 }, { - "epoch": 0.11976047904191617, - "grad_norm": 0.2307198733032352, - "learning_rate": 1.9995765145403444e-05, - "loss": 0.078, + "epoch": 0.128, + "grad_norm": 0.40431395080239424, + "learning_rate": 1.9992102666236567e-05, + "loss": 0.0727, "step": 80 }, { - "epoch": 0.12125748502994012, - "grad_norm": 0.2501179187728697, - "learning_rate": 1.9995281573499652e-05, - "loss": 0.0789, + "epoch": 0.1296, + "grad_norm": 0.41569038798416647, + "learning_rate": 1.9991401113338103e-05, + "loss": 0.0734, "step": 81 }, { - "epoch": 0.12275449101796407, - "grad_norm": 0.3027539443814983, - "learning_rate": 1.9994771871013874e-05, - "loss": 0.0773, + "epoch": 0.1312, + "grad_norm": 0.4136415893195504, + "learning_rate": 1.9990669724599336e-05, + "loss": 0.067, "step": 82 }, { - "epoch": 0.12425149700598802, - "grad_norm": 0.4231966349076508, - "learning_rate": 1.999423603927862e-05, - "loss": 0.0789, + "epoch": 0.1328, + "grad_norm": 0.37595574061292625, + "learning_rate": 1.9989908502204295e-05, + "loss": 0.0634, "step": 83 }, { - "epoch": 0.12574850299401197, - "grad_norm": 0.2635202834499579, - "learning_rate": 1.9993674079694708e-05, - "loss": 0.0675, + "epoch": 0.1344, + "grad_norm": 0.4355120929608111, + "learning_rate": 1.998911744842611e-05, + "loss": 0.0643, "step": 84 }, { - "epoch": 0.12724550898203593, - "grad_norm": 0.43234380467645195, - "learning_rate": 1.999308599373127e-05, - "loss": 0.0787, + "epoch": 0.136, + "grad_norm": 0.4273191979578173, + "learning_rate": 1.9988296565626988e-05, + "loss": 0.0663, "step": 85 }, { - "epoch": 0.12874251497005987, - "grad_norm": 0.1825788301606033, - "learning_rate": 1.999247178292573e-05, - "loss": 0.0721, + "epoch": 0.1376, + "grad_norm": 0.33212691264888766, + "learning_rate": 1.9987445856258208e-05, + "loss": 0.0621, "step": 86 }, { - "epoch": 0.13023952095808383, - "grad_norm": 0.2078262527726928, - "learning_rate": 1.9991831448883815e-05, - "loss": 0.072, + "epoch": 0.1392, + "grad_norm": 0.26988113038751194, + "learning_rate": 1.9986565322860117e-05, + "loss": 0.0573, "step": 87 }, { - "epoch": 0.1317365269461078, - "grad_norm": 0.20140626332699332, - "learning_rate": 1.9991164993279543e-05, - "loss": 0.0692, + "epoch": 0.1408, + "grad_norm": 0.3583810712871837, + "learning_rate": 1.9985654968062122e-05, + "loss": 0.0625, "step": 88 }, { - "epoch": 0.13323353293413173, - "grad_norm": 0.27122788318338725, - "learning_rate": 1.999047241785523e-05, - "loss": 0.0728, + "epoch": 0.1424, + "grad_norm": 0.2856551450347671, + "learning_rate": 1.9984714794582682e-05, + "loss": 0.0518, "step": 89 }, { - "epoch": 0.1347305389221557, - "grad_norm": 0.2420008393498157, - "learning_rate": 1.998975372442146e-05, - "loss": 0.074, + "epoch": 0.144, + "grad_norm": 0.357713767536613, + "learning_rate": 1.9983744805229296e-05, + "loss": 0.0589, "step": 90 }, { - "epoch": 0.13622754491017963, - "grad_norm": 0.29824238890853016, - "learning_rate": 1.9989008914857115e-05, - "loss": 0.0676, + "epoch": 0.1456, + "grad_norm": 0.3142942652181447, + "learning_rate": 1.99827450028985e-05, + "loss": 0.0581, "step": 91 }, { - "epoch": 0.1377245508982036, - "grad_norm": 0.2912749464147097, - "learning_rate": 1.9988237991109342e-05, - "loss": 0.0683, + "epoch": 0.1472, + "grad_norm": 0.38657154986006614, + "learning_rate": 1.998171539057586e-05, + "loss": 0.0564, "step": 92 }, { - "epoch": 0.13922155688622753, - "grad_norm": 0.22500666240199066, - "learning_rate": 1.9987440955193563e-05, - "loss": 0.0695, + "epoch": 0.1488, + "grad_norm": 0.30210821869356325, + "learning_rate": 1.9980655971335944e-05, + "loss": 0.0579, "step": 93 }, { - "epoch": 0.1407185628742515, - "grad_norm": 0.4333093969766628, - "learning_rate": 1.998661780919346e-05, - "loss": 0.0709, + "epoch": 0.1504, + "grad_norm": 0.24458499620779606, + "learning_rate": 1.9979566748342348e-05, + "loss": 0.0459, "step": 94 }, { - "epoch": 0.14221556886227546, - "grad_norm": 0.1895065089384485, - "learning_rate": 1.9985768555260976e-05, - "loss": 0.0665, + "epoch": 0.152, + "grad_norm": 0.24019855385023173, + "learning_rate": 1.9978447724847655e-05, + "loss": 0.0476, "step": 95 }, { - "epoch": 0.1437125748502994, - "grad_norm": 0.27388821802849495, - "learning_rate": 1.9984893195616313e-05, - "loss": 0.0668, + "epoch": 0.1536, + "grad_norm": 0.3262894382467836, + "learning_rate": 1.9977298904193438e-05, + "loss": 0.0486, "step": 96 }, { - "epoch": 0.14520958083832336, - "grad_norm": 0.2133616131184573, - "learning_rate": 1.9983991732547912e-05, - "loss": 0.0644, + "epoch": 0.1552, + "grad_norm": 0.2632264800201802, + "learning_rate": 1.9976120289810247e-05, + "loss": 0.0506, "step": 97 }, { - "epoch": 0.1467065868263473, - "grad_norm": 0.23231083558556673, - "learning_rate": 1.998306416841246e-05, - "loss": 0.0659, + "epoch": 0.1568, + "grad_norm": 0.27260952881040074, + "learning_rate": 1.997491188521761e-05, + "loss": 0.0455, "step": 98 }, { - "epoch": 0.14820359281437126, - "grad_norm": 0.3238616061638123, - "learning_rate": 1.998211050563488e-05, - "loss": 0.0676, + "epoch": 0.1584, + "grad_norm": 0.23211417179873192, + "learning_rate": 1.9973673694024002e-05, + "loss": 0.045, "step": 99 }, { - "epoch": 0.1497005988023952, - "grad_norm": 0.19406634289412697, - "learning_rate": 1.998113074670833e-05, - "loss": 0.0651, + "epoch": 0.16, + "grad_norm": 0.2770948659117598, + "learning_rate": 1.997240571992685e-05, + "loss": 0.0459, "step": 100 }, { - "epoch": 0.15119760479041916, - "grad_norm": 0.17418625172312732, - "learning_rate": 1.9980124894194184e-05, - "loss": 0.0641, + "epoch": 0.1616, + "grad_norm": 0.22461047265833722, + "learning_rate": 1.9971107966712518e-05, + "loss": 0.0442, "step": 101 }, { - "epoch": 0.15269461077844312, - "grad_norm": 0.21916068973423491, - "learning_rate": 1.9979092950722033e-05, - "loss": 0.0624, + "epoch": 0.1632, + "grad_norm": 0.2420647199541069, + "learning_rate": 1.9969780438256295e-05, + "loss": 0.0398, "step": 102 }, { - "epoch": 0.15419161676646706, - "grad_norm": 0.23896804696125767, - "learning_rate": 1.9978034918989674e-05, - "loss": 0.0644, + "epoch": 0.1648, + "grad_norm": 0.2899178643865089, + "learning_rate": 1.9968423138522382e-05, + "loss": 0.0446, "step": 103 }, { - "epoch": 0.15568862275449102, - "grad_norm": 0.19653741457543372, - "learning_rate": 1.9976950801763114e-05, - "loss": 0.0638, + "epoch": 0.1664, + "grad_norm": 0.32269167988242264, + "learning_rate": 1.9967036071563878e-05, + "loss": 0.0386, "step": 104 }, { - "epoch": 0.15718562874251496, - "grad_norm": 0.33352908376943624, - "learning_rate": 1.9975840601876553e-05, - "loss": 0.063, + "epoch": 0.168, + "grad_norm": 0.2875789284847917, + "learning_rate": 1.996561924152278e-05, + "loss": 0.0412, "step": 105 }, { - "epoch": 0.15868263473053892, - "grad_norm": 0.19174789139029536, - "learning_rate": 1.9974704322232375e-05, - "loss": 0.0619, + "epoch": 0.1696, + "grad_norm": 0.3161150343172469, + "learning_rate": 1.996417265262996e-05, + "loss": 0.0402, "step": 106 }, { - "epoch": 0.1601796407185629, - "grad_norm": 0.31382911722883733, - "learning_rate": 1.997354196580115e-05, - "loss": 0.063, + "epoch": 0.1712, + "grad_norm": 0.36239858352621107, + "learning_rate": 1.9962696309205146e-05, + "loss": 0.04, "step": 107 }, { - "epoch": 0.16167664670658682, - "grad_norm": 0.19646342349922372, - "learning_rate": 1.9972353535621613e-05, - "loss": 0.0614, + "epoch": 0.1728, + "grad_norm": 0.35590689541990983, + "learning_rate": 1.996119021565693e-05, + "loss": 0.0427, "step": 108 }, { - "epoch": 0.1631736526946108, - "grad_norm": 0.36904190804708314, - "learning_rate": 1.9971139034800666e-05, - "loss": 0.0626, + "epoch": 0.1744, + "grad_norm": 0.2827310515458967, + "learning_rate": 1.995965437648273e-05, + "loss": 0.0359, "step": 109 }, { - "epoch": 0.16467065868263472, - "grad_norm": 0.2720984468904841, - "learning_rate": 1.9969898466513375e-05, - "loss": 0.0624, + "epoch": 0.176, + "grad_norm": 0.29640131231314887, + "learning_rate": 1.9958088796268794e-05, + "loss": 0.0386, "step": 110 }, { - "epoch": 0.1661676646706587, - "grad_norm": 0.20744167748661432, - "learning_rate": 1.9968631834002947e-05, - "loss": 0.0595, + "epoch": 0.1776, + "grad_norm": 0.4028138274584707, + "learning_rate": 1.995649347969019e-05, + "loss": 0.0416, "step": 111 }, { - "epoch": 0.16766467065868262, - "grad_norm": 0.494627859147702, - "learning_rate": 1.9967339140580718e-05, - "loss": 0.0629, + "epoch": 0.1792, + "grad_norm": 0.30852447529803145, + "learning_rate": 1.9954868431510764e-05, + "loss": 0.0365, "step": 112 }, { - "epoch": 0.1691616766467066, - "grad_norm": 0.21122512164165028, - "learning_rate": 1.9966020389626173e-05, - "loss": 0.0611, + "epoch": 0.1808, + "grad_norm": 0.5489679840744338, + "learning_rate": 1.995321365658317e-05, + "loss": 0.0379, "step": 113 }, { - "epoch": 0.17065868263473055, - "grad_norm": 0.22452536057932843, - "learning_rate": 1.9964675584586918e-05, - "loss": 0.0613, + "epoch": 0.1824, + "grad_norm": 0.2677070683435972, + "learning_rate": 1.9951529159848805e-05, + "loss": 0.0415, "step": 114 }, { - "epoch": 0.1721556886227545, - "grad_norm": 0.1937038289344653, - "learning_rate": 1.996330472897866e-05, - "loss": 0.0568, + "epoch": 0.184, + "grad_norm": 0.22638499773534226, + "learning_rate": 1.994981494633784e-05, + "loss": 0.0319, "step": 115 }, { - "epoch": 0.17365269461077845, - "grad_norm": 0.21370437758625413, - "learning_rate": 1.996190782638521e-05, - "loss": 0.0623, + "epoch": 0.1856, + "grad_norm": 0.35815794363384784, + "learning_rate": 1.9948071021169176e-05, + "loss": 0.0339, "step": 116 }, { - "epoch": 0.1751497005988024, - "grad_norm": 0.2026742633701451, - "learning_rate": 1.9960484880458486e-05, - "loss": 0.0603, + "epoch": 0.1872, + "grad_norm": 0.3342618533336046, + "learning_rate": 1.9946297389550433e-05, + "loss": 0.0341, "step": 117 }, { - "epoch": 0.17664670658682635, - "grad_norm": 0.22896351028877954, - "learning_rate": 1.995903589491848e-05, - "loss": 0.0599, + "epoch": 0.1888, + "grad_norm": 0.26538131081219346, + "learning_rate": 1.9944494056777945e-05, + "loss": 0.0326, "step": 118 }, { - "epoch": 0.1781437125748503, - "grad_norm": 0.3143604037843644, - "learning_rate": 1.995756087355327e-05, - "loss": 0.06, + "epoch": 0.1904, + "grad_norm": 0.26670239092609044, + "learning_rate": 1.9942661028236746e-05, + "loss": 0.028, "step": 119 }, { - "epoch": 0.17964071856287425, - "grad_norm": 0.24857151161776553, - "learning_rate": 1.9956059820218982e-05, - "loss": 0.0598, + "epoch": 0.192, + "grad_norm": 0.290242888748579, + "learning_rate": 1.9940798309400527e-05, + "loss": 0.0347, "step": 120 }, { - "epoch": 0.18113772455089822, - "grad_norm": 0.23471075757390752, - "learning_rate": 1.9954532738839814e-05, - "loss": 0.0583, + "epoch": 0.1936, + "grad_norm": 0.4557618271323126, + "learning_rate": 1.9938905905831657e-05, + "loss": 0.0322, "step": 121 }, { - "epoch": 0.18263473053892215, - "grad_norm": 0.1933238932543813, - "learning_rate": 1.9952979633408002e-05, - "loss": 0.0572, + "epoch": 0.1952, + "grad_norm": 0.263625615146275, + "learning_rate": 1.9936983823181132e-05, + "loss": 0.0336, "step": 122 }, { - "epoch": 0.18413173652694612, - "grad_norm": 0.21990365595016542, - "learning_rate": 1.9951400507983812e-05, - "loss": 0.0585, + "epoch": 0.1968, + "grad_norm": 0.22436028726042295, + "learning_rate": 1.993503206718859e-05, + "loss": 0.0303, "step": 123 }, { - "epoch": 0.18562874251497005, - "grad_norm": 0.19297438870815392, - "learning_rate": 1.9949795366695545e-05, - "loss": 0.0577, + "epoch": 0.1984, + "grad_norm": 0.36036424972145303, + "learning_rate": 1.993305064368227e-05, + "loss": 0.0309, "step": 124 }, { - "epoch": 0.18712574850299402, - "grad_norm": 0.2355462451136706, - "learning_rate": 1.9948164213739502e-05, - "loss": 0.0558, + "epoch": 0.2, + "grad_norm": 0.2636999950967481, + "learning_rate": 1.9931039558578997e-05, + "loss": 0.0273, "step": 125 }, { - "epoch": 0.18862275449101795, - "grad_norm": 0.19921818148554826, - "learning_rate": 1.9946507053379998e-05, - "loss": 0.061, + "epoch": 0.2016, + "grad_norm": 0.2761731394185584, + "learning_rate": 1.9928998817884185e-05, + "loss": 0.0293, "step": 126 }, { - "epoch": 0.19011976047904192, - "grad_norm": 0.24443384088619968, - "learning_rate": 1.994482388994933e-05, - "loss": 0.0605, + "epoch": 0.2032, + "grad_norm": 0.30374955920113056, + "learning_rate": 1.9926928427691788e-05, + "loss": 0.0252, "step": 127 }, { - "epoch": 0.19161676646706588, - "grad_norm": 0.17312333530517943, - "learning_rate": 1.9943114727847782e-05, - "loss": 0.0604, + "epoch": 0.2048, + "grad_norm": 0.23433485832962797, + "learning_rate": 1.9924828394184308e-05, + "loss": 0.0274, "step": 128 }, { - "epoch": 0.19311377245508982, - "grad_norm": 0.22275107608465397, - "learning_rate": 1.9941379571543597e-05, - "loss": 0.0559, + "epoch": 0.2064, + "grad_norm": 0.2737766558936752, + "learning_rate": 1.992269872363277e-05, + "loss": 0.0251, "step": 129 }, { - "epoch": 0.19461077844311378, - "grad_norm": 0.21284771446183928, - "learning_rate": 1.9939618425572985e-05, - "loss": 0.0577, + "epoch": 0.208, + "grad_norm": 0.37112868758464157, + "learning_rate": 1.992053942239668e-05, + "loss": 0.0284, "step": 130 }, { - "epoch": 0.19610778443113772, - "grad_norm": 0.23636170247374946, - "learning_rate": 1.9937831294540094e-05, - "loss": 0.0576, + "epoch": 0.2096, + "grad_norm": 0.25459224936986335, + "learning_rate": 1.991835049692405e-05, + "loss": 0.0307, "step": 131 }, { - "epoch": 0.19760479041916168, - "grad_norm": 0.21444998693247203, - "learning_rate": 1.9936018183117003e-05, - "loss": 0.0575, + "epoch": 0.2112, + "grad_norm": 0.2902652013361823, + "learning_rate": 1.9916131953751342e-05, + "loss": 0.0245, "step": 132 }, { - "epoch": 0.19910179640718562, - "grad_norm": 0.2216207633715879, - "learning_rate": 1.993417909604372e-05, - "loss": 0.0549, + "epoch": 0.2128, + "grad_norm": 0.4198056600285124, + "learning_rate": 1.991388379950346e-05, + "loss": 0.0318, "step": 133 }, { - "epoch": 0.20059880239520958, - "grad_norm": 0.2520990703952128, - "learning_rate": 1.993231403812815e-05, - "loss": 0.0588, + "epoch": 0.2144, + "grad_norm": 0.28382438818772865, + "learning_rate": 1.9911606040893742e-05, + "loss": 0.0283, "step": 134 }, { - "epoch": 0.20209580838323354, - "grad_norm": 0.22625505105344712, - "learning_rate": 1.99304230142461e-05, - "loss": 0.0545, + "epoch": 0.216, + "grad_norm": 0.23336607870897053, + "learning_rate": 1.9909298684723905e-05, + "loss": 0.0246, "step": 135 }, { - "epoch": 0.20359281437125748, - "grad_norm": 0.17746500333059273, - "learning_rate": 1.9928506029341256e-05, - "loss": 0.0588, + "epoch": 0.2176, + "grad_norm": 0.2918806911818325, + "learning_rate": 1.990696173788408e-05, + "loss": 0.0243, "step": 136 }, { - "epoch": 0.20508982035928144, - "grad_norm": 0.17244614526395785, - "learning_rate": 1.9926563088425177e-05, - "loss": 0.0561, + "epoch": 0.2192, + "grad_norm": 0.3086628915284369, + "learning_rate": 1.9904595207352736e-05, + "loss": 0.0272, "step": 137 }, { - "epoch": 0.20658682634730538, - "grad_norm": 0.17124109763728518, - "learning_rate": 1.992459419657728e-05, - "loss": 0.0562, + "epoch": 0.2208, + "grad_norm": 0.3683163021055742, + "learning_rate": 1.9902199100196697e-05, + "loss": 0.0238, "step": 138 }, { - "epoch": 0.20808383233532934, - "grad_norm": 0.1984353051118203, - "learning_rate": 1.9922599358944823e-05, - "loss": 0.0563, + "epoch": 0.2224, + "grad_norm": 0.33375531864826613, + "learning_rate": 1.9899773423571102e-05, + "loss": 0.0253, "step": 139 }, { - "epoch": 0.20958083832335328, - "grad_norm": 0.2614654611155798, - "learning_rate": 1.9920578580742892e-05, - "loss": 0.0556, + "epoch": 0.224, + "grad_norm": 0.23272431144615036, + "learning_rate": 1.9897318184719386e-05, + "loss": 0.0198, "step": 140 }, { - "epoch": 0.21107784431137724, - "grad_norm": 0.22618621587850632, - "learning_rate": 1.9918531867254386e-05, - "loss": 0.056, + "epoch": 0.2256, + "grad_norm": 0.39233790536820173, + "learning_rate": 1.9894833390973266e-05, + "loss": 0.0247, "step": 141 }, { - "epoch": 0.2125748502994012, - "grad_norm": 0.1686791845721939, - "learning_rate": 1.9916459223830018e-05, - "loss": 0.0549, + "epoch": 0.2272, + "grad_norm": 0.3207050735210565, + "learning_rate": 1.989231904975272e-05, + "loss": 0.0217, "step": 142 }, { - "epoch": 0.21407185628742514, - "grad_norm": 0.27469186421828573, - "learning_rate": 1.991436065588828e-05, - "loss": 0.0531, + "epoch": 0.2288, + "grad_norm": 0.18878538321873145, + "learning_rate": 1.9889775168565942e-05, + "loss": 0.0195, "step": 143 }, { - "epoch": 0.2155688622754491, - "grad_norm": 0.28877488507125365, - "learning_rate": 1.9912236168915443e-05, - "loss": 0.0562, + "epoch": 0.2304, + "grad_norm": 0.331980372714454, + "learning_rate": 1.9887201755009358e-05, + "loss": 0.0222, "step": 144 }, { - "epoch": 0.21706586826347304, - "grad_norm": 0.1726289274307483, - "learning_rate": 1.991008576846553e-05, - "loss": 0.0538, + "epoch": 0.232, + "grad_norm": 0.2617001176043066, + "learning_rate": 1.9884598816767563e-05, + "loss": 0.0215, "step": 145 }, { - "epoch": 0.218562874251497, - "grad_norm": 0.1912420614588707, - "learning_rate": 1.990790946016032e-05, - "loss": 0.0528, + "epoch": 0.2336, + "grad_norm": 0.1998476020770707, + "learning_rate": 1.988196636161333e-05, + "loss": 0.021, "step": 146 }, { - "epoch": 0.22005988023952097, - "grad_norm": 0.23379848312481374, - "learning_rate": 1.9905707249689318e-05, - "loss": 0.052, + "epoch": 0.2352, + "grad_norm": 0.15711441770336212, + "learning_rate": 1.987930439740757e-05, + "loss": 0.0168, "step": 147 }, { - "epoch": 0.2215568862275449, - "grad_norm": 0.24847391624574525, - "learning_rate": 1.990347914280974e-05, - "loss": 0.0502, + "epoch": 0.2368, + "grad_norm": 0.18823706933202605, + "learning_rate": 1.987661293209931e-05, + "loss": 0.016, "step": 148 }, { - "epoch": 0.22305389221556887, - "grad_norm": 0.3441419819000447, - "learning_rate": 1.990122514534651e-05, - "loss": 0.0575, + "epoch": 0.2384, + "grad_norm": 0.2203924846273772, + "learning_rate": 1.9873891973725673e-05, + "loss": 0.022, "step": 149 }, { - "epoch": 0.2245508982035928, - "grad_norm": 0.3955212700237536, - "learning_rate": 1.989894526319224e-05, - "loss": 0.0542, + "epoch": 0.24, + "grad_norm": 0.22095723495277175, + "learning_rate": 1.9871141530411854e-05, + "loss": 0.0212, "step": 150 }, { - "epoch": 0.22604790419161677, - "grad_norm": 0.2380289767466844, - "learning_rate": 1.98966395023072e-05, - "loss": 0.0537, + "epoch": 0.2416, + "grad_norm": 0.2291413194508601, + "learning_rate": 1.98683616103711e-05, + "loss": 0.0163, "step": 151 }, { - "epoch": 0.2275449101796407, - "grad_norm": 0.26238101314438406, - "learning_rate": 1.989430786871932e-05, - "loss": 0.054, + "epoch": 0.2432, + "grad_norm": 0.2935224449509153, + "learning_rate": 1.986555222190467e-05, + "loss": 0.0195, "step": 152 }, { - "epoch": 0.22904191616766467, - "grad_norm": 0.31325198919582375, - "learning_rate": 1.989195036852418e-05, - "loss": 0.0563, + "epoch": 0.2448, + "grad_norm": 0.3524972156636008, + "learning_rate": 1.986271337340182e-05, + "loss": 0.015, "step": 153 }, { - "epoch": 0.23053892215568864, - "grad_norm": 0.27559129084895034, - "learning_rate": 1.9889567007884965e-05, - "loss": 0.0527, + "epoch": 0.2464, + "grad_norm": 0.21658492228652335, + "learning_rate": 1.9859845073339788e-05, + "loss": 0.018, "step": 154 }, { - "epoch": 0.23203592814371257, - "grad_norm": 0.2036748289588086, - "learning_rate": 1.988715779303248e-05, - "loss": 0.0523, + "epoch": 0.248, + "grad_norm": 0.26769149602569436, + "learning_rate": 1.9856947330283752e-05, + "loss": 0.0177, "step": 155 }, { - "epoch": 0.23353293413173654, - "grad_norm": 0.21158749273476493, - "learning_rate": 1.988472273026511e-05, - "loss": 0.0517, + "epoch": 0.2496, + "grad_norm": 0.23444425440366023, + "learning_rate": 1.9854020152886816e-05, + "loss": 0.0206, "step": 156 }, { - "epoch": 0.23502994011976047, - "grad_norm": 0.3777409399476317, - "learning_rate": 1.9882261825948828e-05, - "loss": 0.0545, + "epoch": 0.2512, + "grad_norm": 0.45183746759896726, + "learning_rate": 1.985106354988997e-05, + "loss": 0.0284, "step": 157 }, { - "epoch": 0.23652694610778444, - "grad_norm": 0.16198901337349092, - "learning_rate": 1.9879775086517145e-05, - "loss": 0.049, + "epoch": 0.2528, + "grad_norm": 0.22812452622594312, + "learning_rate": 1.9848077530122083e-05, + "loss": 0.0159, "step": 158 }, { - "epoch": 0.23802395209580837, - "grad_norm": 0.28600911159833825, - "learning_rate": 1.9877262518471133e-05, - "loss": 0.0511, + "epoch": 0.2544, + "grad_norm": 0.29949675168255213, + "learning_rate": 1.984506210249986e-05, + "loss": 0.0209, "step": 159 }, { - "epoch": 0.23952095808383234, - "grad_norm": 0.21326314303753005, - "learning_rate": 1.9874724128379373e-05, - "loss": 0.0511, + "epoch": 0.256, + "grad_norm": 0.20288486241622716, + "learning_rate": 1.984201727602783e-05, + "loss": 0.0175, "step": 160 }, { - "epoch": 0.2410179640718563, - "grad_norm": 0.23896462883526792, - "learning_rate": 1.9872159922877955e-05, - "loss": 0.052, + "epoch": 0.2576, + "grad_norm": 0.24153624253906011, + "learning_rate": 1.9838943059798305e-05, + "loss": 0.016, "step": 161 }, { - "epoch": 0.24251497005988024, - "grad_norm": 0.24778993752258044, - "learning_rate": 1.9869569908670466e-05, - "loss": 0.0503, + "epoch": 0.2592, + "grad_norm": 0.2769997843018402, + "learning_rate": 1.983583946299136e-05, + "loss": 0.0164, "step": 162 }, { - "epoch": 0.2440119760479042, - "grad_norm": 0.2678509446705851, - "learning_rate": 1.9866954092527954e-05, - "loss": 0.0509, + "epoch": 0.2608, + "grad_norm": 0.20799564640411272, + "learning_rate": 1.9832706494874812e-05, + "loss": 0.0158, "step": 163 }, { - "epoch": 0.24550898203592814, - "grad_norm": 0.4976820467779877, - "learning_rate": 1.9864312481288934e-05, - "loss": 0.0452, + "epoch": 0.2624, + "grad_norm": 0.23601523435408028, + "learning_rate": 1.9829544164804172e-05, + "loss": 0.016, "step": 164 }, { - "epoch": 0.2470059880239521, - "grad_norm": 0.21451893299135652, - "learning_rate": 1.9861645081859334e-05, - "loss": 0.0503, + "epoch": 0.264, + "grad_norm": 0.22418166608529375, + "learning_rate": 1.982635248222264e-05, + "loss": 0.0191, "step": 165 }, { - "epoch": 0.24850299401197604, - "grad_norm": 0.18172472245421414, - "learning_rate": 1.9858951901212525e-05, - "loss": 0.0467, + "epoch": 0.2656, + "grad_norm": 0.13054329188698238, + "learning_rate": 1.9823131456661064e-05, + "loss": 0.011, "step": 166 }, { - "epoch": 0.25, - "grad_norm": 0.1831777829268529, - "learning_rate": 1.9856232946389264e-05, - "loss": 0.046, + "epoch": 0.2672, + "grad_norm": 0.18252261238651618, + "learning_rate": 1.9819881097737917e-05, + "loss": 0.0122, "step": 167 }, { - "epoch": 0.25149700598802394, - "grad_norm": 0.24695897142692572, - "learning_rate": 1.985348822449769e-05, - "loss": 0.0449, + "epoch": 0.2688, + "grad_norm": 0.1726449946139791, + "learning_rate": 1.9816601415159266e-05, + "loss": 0.012, "step": 168 }, { - "epoch": 0.25299401197604793, - "grad_norm": 0.24320724433114305, - "learning_rate": 1.985071774271331e-05, - "loss": 0.0514, + "epoch": 0.2704, + "grad_norm": 0.28649148702667626, + "learning_rate": 1.9813292418718734e-05, + "loss": 0.0199, "step": 169 }, { - "epoch": 0.25449101796407186, - "grad_norm": 0.29316721920718963, - "learning_rate": 1.9847921508278977e-05, - "loss": 0.0455, + "epoch": 0.272, + "grad_norm": 0.32637951614090105, + "learning_rate": 1.980995411829749e-05, + "loss": 0.0183, "step": 170 }, { - "epoch": 0.2559880239520958, - "grad_norm": 0.24473542108780105, - "learning_rate": 1.984509952850485e-05, - "loss": 0.0477, + "epoch": 0.2736, + "grad_norm": 0.1924690150414372, + "learning_rate": 1.9806586523864212e-05, + "loss": 0.0134, "step": 171 }, { - "epoch": 0.25748502994011974, - "grad_norm": 0.29195204962744054, - "learning_rate": 1.9842251810768415e-05, - "loss": 0.0454, + "epoch": 0.2752, + "grad_norm": 0.3053757080333075, + "learning_rate": 1.980318964547504e-05, + "loss": 0.0205, "step": 172 }, { - "epoch": 0.25898203592814373, - "grad_norm": 0.26054012611614774, - "learning_rate": 1.983937836251444e-05, - "loss": 0.0475, + "epoch": 0.2768, + "grad_norm": 0.20048108515719862, + "learning_rate": 1.9799763493273572e-05, + "loss": 0.0104, "step": 173 }, { - "epoch": 0.26047904191616766, - "grad_norm": 0.2683808403595366, - "learning_rate": 1.983647919125495e-05, - "loss": 0.0469, + "epoch": 0.2784, + "grad_norm": 0.2732627182334408, + "learning_rate": 1.9796308077490817e-05, + "loss": 0.0122, "step": 174 }, { - "epoch": 0.2619760479041916, - "grad_norm": 0.2798959888989543, - "learning_rate": 1.983355430456923e-05, - "loss": 0.0457, + "epoch": 0.28, + "grad_norm": 0.6532880269049472, + "learning_rate": 1.9792823408445173e-05, + "loss": 0.0162, "step": 175 }, { - "epoch": 0.2634730538922156, - "grad_norm": 0.3230989454804457, - "learning_rate": 1.983060371010378e-05, - "loss": 0.0481, + "epoch": 0.2816, + "grad_norm": 0.38492332201417967, + "learning_rate": 1.978930949654239e-05, + "loss": 0.0164, "step": 176 }, { - "epoch": 0.26497005988023953, - "grad_norm": 0.28580432616804236, - "learning_rate": 1.9827627415572328e-05, - "loss": 0.0428, + "epoch": 0.2832, + "grad_norm": 0.3503764828469701, + "learning_rate": 1.978576635227554e-05, + "loss": 0.0119, "step": 177 }, { - "epoch": 0.26646706586826346, - "grad_norm": 0.3713352257679464, - "learning_rate": 1.982462542875576e-05, - "loss": 0.0467, + "epoch": 0.2848, + "grad_norm": 0.31142382540887115, + "learning_rate": 1.9782193986224997e-05, + "loss": 0.0109, "step": 178 }, { - "epoch": 0.2679640718562874, - "grad_norm": 0.2792642088474226, - "learning_rate": 1.982159775750216e-05, - "loss": 0.0468, + "epoch": 0.2864, + "grad_norm": 0.36239173703322675, + "learning_rate": 1.9778592409058376e-05, + "loss": 0.0154, "step": 179 }, { - "epoch": 0.2694610778443114, - "grad_norm": 0.26225012059799724, - "learning_rate": 1.9818544409726734e-05, - "loss": 0.0481, + "epoch": 0.288, + "grad_norm": 0.30337585394648847, + "learning_rate": 1.9774961631530543e-05, + "loss": 0.0122, "step": 180 }, { - "epoch": 0.27095808383233533, - "grad_norm": 0.320954008862723, - "learning_rate": 1.981546539341183e-05, - "loss": 0.0444, + "epoch": 0.2896, + "grad_norm": 0.35293599460078406, + "learning_rate": 1.9771301664483548e-05, + "loss": 0.0125, "step": 181 }, { - "epoch": 0.27245508982035926, - "grad_norm": 0.3027600817067956, - "learning_rate": 1.9812360716606886e-05, - "loss": 0.0461, + "epoch": 0.2912, + "grad_norm": 0.31600914873730507, + "learning_rate": 1.976761251884661e-05, + "loss": 0.0134, "step": 182 }, { - "epoch": 0.27395209580838326, - "grad_norm": 0.28110581468430873, - "learning_rate": 1.9809230387428444e-05, - "loss": 0.0445, + "epoch": 0.2928, + "grad_norm": 0.26927693320635504, + "learning_rate": 1.976389420563607e-05, + "loss": 0.0125, "step": 183 }, { - "epoch": 0.2754491017964072, - "grad_norm": 0.30945413021599966, - "learning_rate": 1.980607441406009e-05, - "loss": 0.0441, + "epoch": 0.2944, + "grad_norm": 0.29561899390721047, + "learning_rate": 1.9760146735955388e-05, + "loss": 0.0143, "step": 184 }, { - "epoch": 0.27694610778443113, - "grad_norm": 0.29814499120640103, - "learning_rate": 1.9802892804752462e-05, - "loss": 0.0434, + "epoch": 0.296, + "grad_norm": 0.4715335009988575, + "learning_rate": 1.975637012099507e-05, + "loss": 0.0162, "step": 185 }, { - "epoch": 0.27844311377245506, - "grad_norm": 0.34230417502487365, - "learning_rate": 1.9799685567823215e-05, - "loss": 0.0444, + "epoch": 0.2976, + "grad_norm": 0.3567896197888384, + "learning_rate": 1.9752564372032655e-05, + "loss": 0.0097, "step": 186 }, { - "epoch": 0.27994011976047906, - "grad_norm": 0.2518591743527993, - "learning_rate": 1.9796452711657002e-05, - "loss": 0.0397, + "epoch": 0.2992, + "grad_norm": 0.3840699761704516, + "learning_rate": 1.97487295004327e-05, + "loss": 0.0127, "step": 187 }, { - "epoch": 0.281437125748503, - "grad_norm": 0.3292942776587782, - "learning_rate": 1.9793194244705453e-05, - "loss": 0.0452, + "epoch": 0.3008, + "grad_norm": 0.14860107530375916, + "learning_rate": 1.974486551764671e-05, + "loss": 0.008, "step": 188 }, { - "epoch": 0.28293413173652693, - "grad_norm": 0.2582695780716865, - "learning_rate": 1.9789910175487147e-05, - "loss": 0.0418, + "epoch": 0.3024, + "grad_norm": 0.3713295730970593, + "learning_rate": 1.9740972435213114e-05, + "loss": 0.0112, "step": 189 }, { - "epoch": 0.2844311377245509, - "grad_norm": 0.2730893776861321, - "learning_rate": 1.97866005125876e-05, - "loss": 0.0402, + "epoch": 0.304, + "grad_norm": 0.2519884849457611, + "learning_rate": 1.973705026475726e-05, + "loss": 0.0113, "step": 190 }, { - "epoch": 0.28592814371257486, - "grad_norm": 0.3111167647085528, - "learning_rate": 1.9783265264659244e-05, - "loss": 0.0395, + "epoch": 0.3056, + "grad_norm": 0.40990913449865773, + "learning_rate": 1.9733099017991342e-05, + "loss": 0.0139, "step": 191 }, { - "epoch": 0.2874251497005988, - "grad_norm": 0.39076695341386136, - "learning_rate": 1.977990444042138e-05, - "loss": 0.0514, + "epoch": 0.3072, + "grad_norm": 0.3385724214686744, + "learning_rate": 1.9729118706714377e-05, + "loss": 0.01, "step": 192 }, { - "epoch": 0.28892215568862273, - "grad_norm": 0.43768952581558807, - "learning_rate": 1.9776518048660188e-05, - "loss": 0.046, + "epoch": 0.3088, + "grad_norm": 0.22538894424367692, + "learning_rate": 1.972510934281218e-05, + "loss": 0.0091, "step": 193 }, { - "epoch": 0.2904191616766467, - "grad_norm": 0.3165460062789067, - "learning_rate": 1.9773106098228685e-05, - "loss": 0.0449, + "epoch": 0.3104, + "grad_norm": 0.21366259733010207, + "learning_rate": 1.9721070938257326e-05, + "loss": 0.0131, "step": 194 }, { - "epoch": 0.29191616766467066, - "grad_norm": 0.28739375717770865, - "learning_rate": 1.97696685980467e-05, - "loss": 0.0434, + "epoch": 0.312, + "grad_norm": 0.23522891058948267, + "learning_rate": 1.9717003505109097e-05, + "loss": 0.0136, "step": 195 }, { - "epoch": 0.2934131736526946, - "grad_norm": 0.32294252936754736, - "learning_rate": 1.976620555710087e-05, - "loss": 0.0447, + "epoch": 0.3136, + "grad_norm": 0.19459287049017815, + "learning_rate": 1.971290705551347e-05, + "loss": 0.0108, "step": 196 }, { - "epoch": 0.2949101796407186, - "grad_norm": 0.3345999711440401, - "learning_rate": 1.9762716984444585e-05, - "loss": 0.0456, + "epoch": 0.3152, + "grad_norm": 0.3280138838547234, + "learning_rate": 1.9708781601703066e-05, + "loss": 0.0106, "step": 197 }, { - "epoch": 0.2964071856287425, - "grad_norm": 0.31106180218416374, - "learning_rate": 1.9759202889197994e-05, - "loss": 0.0389, + "epoch": 0.3168, + "grad_norm": 0.4193956433136829, + "learning_rate": 1.970462715599711e-05, + "loss": 0.0146, "step": 198 }, { - "epoch": 0.29790419161676646, - "grad_norm": 0.25582092602450185, - "learning_rate": 1.975566328054797e-05, - "loss": 0.0433, + "epoch": 0.3184, + "grad_norm": 0.18897686427726873, + "learning_rate": 1.9700443730801412e-05, + "loss": 0.0091, "step": 199 }, { - "epoch": 0.2994011976047904, - "grad_norm": 0.2037709453766463, - "learning_rate": 1.9752098167748083e-05, - "loss": 0.0348, + "epoch": 0.32, + "grad_norm": 0.24472231112640697, + "learning_rate": 1.9696231338608317e-05, + "loss": 0.0107, "step": 200 }, { - "epoch": 0.3008982035928144, - "grad_norm": 0.34634898932379976, - "learning_rate": 1.9748507560118573e-05, - "loss": 0.0398, + "epoch": 0.3216, + "grad_norm": 0.2535191242691839, + "learning_rate": 1.9691989991996663e-05, + "loss": 0.0113, "step": 201 }, { - "epoch": 0.3023952095808383, - "grad_norm": 0.38910405891750727, - "learning_rate": 1.974489146704634e-05, - "loss": 0.0393, + "epoch": 0.3232, + "grad_norm": 0.24198007218272746, + "learning_rate": 1.9687719703631757e-05, + "loss": 0.0092, "step": 202 }, { - "epoch": 0.30389221556886226, - "grad_norm": 0.33295863522793434, - "learning_rate": 1.9741249897984907e-05, - "loss": 0.0384, + "epoch": 0.3248, + "grad_norm": 0.32135157441024376, + "learning_rate": 1.9683420486265328e-05, + "loss": 0.0108, "step": 203 }, { - "epoch": 0.30538922155688625, - "grad_norm": 0.21725882145501477, - "learning_rate": 1.9737582862454398e-05, - "loss": 0.034, + "epoch": 0.3264, + "grad_norm": 0.24878978637482901, + "learning_rate": 1.967909235273549e-05, + "loss": 0.011, "step": 204 }, { - "epoch": 0.3068862275449102, - "grad_norm": 0.3222703155967036, - "learning_rate": 1.9733890370041505e-05, - "loss": 0.0379, + "epoch": 0.328, + "grad_norm": 0.28919135086647363, + "learning_rate": 1.967473531596671e-05, + "loss": 0.0086, "step": 205 }, { - "epoch": 0.3083832335329341, - "grad_norm": 0.36543573520717576, - "learning_rate": 1.9730172430399494e-05, - "loss": 0.0368, + "epoch": 0.3296, + "grad_norm": 0.3149664501299847, + "learning_rate": 1.9670349388969758e-05, + "loss": 0.0089, "step": 206 }, { - "epoch": 0.30988023952095806, - "grad_norm": 0.2972736130414979, - "learning_rate": 1.972642905324813e-05, - "loss": 0.0387, + "epoch": 0.3312, + "grad_norm": 0.46074087094839206, + "learning_rate": 1.966593458484168e-05, + "loss": 0.0115, "step": 207 }, { - "epoch": 0.31137724550898205, - "grad_norm": 0.27528054613918207, - "learning_rate": 1.9722660248373707e-05, - "loss": 0.0337, + "epoch": 0.3328, + "grad_norm": 0.32453985066034696, + "learning_rate": 1.9661490916765752e-05, + "loss": 0.0078, "step": 208 }, { - "epoch": 0.312874251497006, - "grad_norm": 0.2514931870827675, - "learning_rate": 1.971886602562897e-05, - "loss": 0.0359, + "epoch": 0.3344, + "grad_norm": 0.21726445303846265, + "learning_rate": 1.9657018398011435e-05, + "loss": 0.0075, "step": 209 }, { - "epoch": 0.3143712574850299, - "grad_norm": 0.33119244111260354, - "learning_rate": 1.9715046394933126e-05, - "loss": 0.0377, + "epoch": 0.336, + "grad_norm": 0.29005944041139997, + "learning_rate": 1.9652517041934357e-05, + "loss": 0.0065, "step": 210 }, { - "epoch": 0.3158682634730539, - "grad_norm": 0.2644246425467484, - "learning_rate": 1.971120136627181e-05, - "loss": 0.0391, + "epoch": 0.3376, + "grad_norm": 0.13171213095514112, + "learning_rate": 1.9647986861976246e-05, + "loss": 0.0044, "step": 211 }, { - "epoch": 0.31736526946107785, - "grad_norm": 0.23488800200566334, - "learning_rate": 1.970733094969704e-05, - "loss": 0.0305, + "epoch": 0.3392, + "grad_norm": 0.22782619185124792, + "learning_rate": 1.9643427871664912e-05, + "loss": 0.0062, "step": 212 }, { - "epoch": 0.3188622754491018, - "grad_norm": 0.3336820637038372, - "learning_rate": 1.970343515532722e-05, - "loss": 0.0347, + "epoch": 0.3408, + "grad_norm": 0.2248678754440452, + "learning_rate": 1.9638840084614182e-05, + "loss": 0.0062, "step": 213 }, { - "epoch": 0.3203592814371258, - "grad_norm": 0.32537350598910286, - "learning_rate": 1.969951399334709e-05, - "loss": 0.0383, + "epoch": 0.3424, + "grad_norm": 0.3625913388024184, + "learning_rate": 1.963422351452389e-05, + "loss": 0.0084, "step": 214 }, { - "epoch": 0.3218562874251497, - "grad_norm": 0.4477662129282192, - "learning_rate": 1.9695567474007713e-05, - "loss": 0.0375, + "epoch": 0.344, + "grad_norm": 0.15657625546350282, + "learning_rate": 1.9629578175179823e-05, + "loss": 0.0076, "step": 215 }, { - "epoch": 0.32335329341317365, - "grad_norm": 0.39632766587050555, - "learning_rate": 1.9691595607626443e-05, - "loss": 0.0306, + "epoch": 0.3456, + "grad_norm": 0.1517982622637864, + "learning_rate": 1.9624904080453656e-05, + "loss": 0.005, "step": 216 }, { - "epoch": 0.3248502994011976, - "grad_norm": 0.31164368475238186, - "learning_rate": 1.9687598404586895e-05, - "loss": 0.0317, + "epoch": 0.3472, + "grad_norm": 0.2758823005848304, + "learning_rate": 1.9620201244302952e-05, + "loss": 0.01, "step": 217 }, { - "epoch": 0.3263473053892216, - "grad_norm": 0.29356080957866204, - "learning_rate": 1.9683575875338922e-05, - "loss": 0.0319, + "epoch": 0.3488, + "grad_norm": 0.19837305214001089, + "learning_rate": 1.9615469680771097e-05, + "loss": 0.005, "step": 218 }, { - "epoch": 0.3278443113772455, - "grad_norm": 0.3170039835076496, - "learning_rate": 1.9679528030398596e-05, - "loss": 0.0329, + "epoch": 0.3504, + "grad_norm": 0.18124188268088945, + "learning_rate": 1.9610709403987248e-05, + "loss": 0.0045, "step": 219 }, { - "epoch": 0.32934131736526945, - "grad_norm": 0.30665504238301144, - "learning_rate": 1.9675454880348158e-05, - "loss": 0.0279, + "epoch": 0.352, + "grad_norm": 0.1894657499100413, + "learning_rate": 1.960592042816632e-05, + "loss": 0.0065, "step": 220 }, { - "epoch": 0.33083832335329344, - "grad_norm": 0.25789142501667217, - "learning_rate": 1.967135643583601e-05, - "loss": 0.029, + "epoch": 0.3536, + "grad_norm": 0.14435370426581687, + "learning_rate": 1.9601102767608924e-05, + "loss": 0.0043, "step": 221 }, { - "epoch": 0.3323353293413174, - "grad_norm": 0.29710874117629504, - "learning_rate": 1.966723270757669e-05, - "loss": 0.0339, + "epoch": 0.3552, + "grad_norm": 0.1695642548479439, + "learning_rate": 1.9596256436701324e-05, + "loss": 0.0046, "step": 222 }, { - "epoch": 0.3338323353293413, - "grad_norm": 0.3797724903024257, - "learning_rate": 1.966308370635082e-05, - "loss": 0.0317, + "epoch": 0.3568, + "grad_norm": 0.16480315731755024, + "learning_rate": 1.95913814499154e-05, + "loss": 0.005, "step": 223 }, { - "epoch": 0.33532934131736525, - "grad_norm": 0.3234195979276272, - "learning_rate": 1.96589094430051e-05, - "loss": 0.0282, + "epoch": 0.3584, + "grad_norm": 0.20501655629665766, + "learning_rate": 1.9586477821808597e-05, + "loss": 0.0048, "step": 224 }, { - "epoch": 0.33682634730538924, - "grad_norm": 0.3322138017326269, - "learning_rate": 1.965470992845227e-05, - "loss": 0.0289, + "epoch": 0.36, + "grad_norm": 0.24808881256292714, + "learning_rate": 1.95815455670239e-05, + "loss": 0.0068, "step": 225 }, { - "epoch": 0.3383233532934132, - "grad_norm": 0.24818051146540246, - "learning_rate": 1.9650485173671092e-05, - "loss": 0.0221, + "epoch": 0.3616, + "grad_norm": 0.20807812279666899, + "learning_rate": 1.957658470028977e-05, + "loss": 0.0046, "step": 226 }, { - "epoch": 0.3398203592814371, - "grad_norm": 0.42659472820256933, - "learning_rate": 1.96462351897063e-05, - "loss": 0.031, + "epoch": 0.3632, + "grad_norm": 0.12976277315186674, + "learning_rate": 1.9571595236420103e-05, + "loss": 0.003, "step": 227 }, { - "epoch": 0.3413173652694611, - "grad_norm": 0.4005980652683074, - "learning_rate": 1.9641959987668603e-05, - "loss": 0.0365, + "epoch": 0.3648, + "grad_norm": 0.35081120007947303, + "learning_rate": 1.95665771903142e-05, + "loss": 0.0045, "step": 228 }, { - "epoch": 0.34281437125748504, - "grad_norm": 0.4685928568147018, - "learning_rate": 1.9637659578734615e-05, - "loss": 0.0327, + "epoch": 0.3664, + "grad_norm": 0.18678758567379564, + "learning_rate": 1.9561530576956703e-05, + "loss": 0.0025, "step": 229 }, { - "epoch": 0.344311377245509, - "grad_norm": 0.3863947245851545, - "learning_rate": 1.9633333974146867e-05, - "loss": 0.0267, + "epoch": 0.368, + "grad_norm": 0.2726852511816301, + "learning_rate": 1.9556455411417575e-05, + "loss": 0.0067, "step": 230 }, { - "epoch": 0.3458083832335329, - "grad_norm": 0.5573187059327918, - "learning_rate": 1.9628983185213747e-05, - "loss": 0.0279, + "epoch": 0.3696, + "grad_norm": 0.1986239338872355, + "learning_rate": 1.955135170885202e-05, + "loss": 0.0066, "step": 231 }, { - "epoch": 0.3473053892215569, - "grad_norm": 0.3063425475105623, - "learning_rate": 1.962460722330949e-05, - "loss": 0.0254, + "epoch": 0.3712, + "grad_norm": 0.2686457903895003, + "learning_rate": 1.9546219484500475e-05, + "loss": 0.0058, "step": 232 }, { - "epoch": 0.34880239520958084, - "grad_norm": 0.25300385871670333, - "learning_rate": 1.9620206099874135e-05, - "loss": 0.0238, + "epoch": 0.3728, + "grad_norm": 0.13534531600226646, + "learning_rate": 1.9541058753688538e-05, + "loss": 0.0054, "step": 233 }, { - "epoch": 0.3502994011976048, - "grad_norm": 0.33330643618442385, - "learning_rate": 1.96157798264135e-05, - "loss": 0.023, + "epoch": 0.3744, + "grad_norm": 0.15207516826062709, + "learning_rate": 1.9535869531826938e-05, + "loss": 0.0035, "step": 234 }, { - "epoch": 0.35179640718562877, - "grad_norm": 0.3374148649272243, - "learning_rate": 1.9611328414499164e-05, - "loss": 0.0245, + "epoch": 0.376, + "grad_norm": 0.33924323511495663, + "learning_rate": 1.9530651834411477e-05, + "loss": 0.0078, "step": 235 }, { - "epoch": 0.3532934131736527, - "grad_norm": 0.38153658162458814, - "learning_rate": 1.9606851875768404e-05, - "loss": 0.0227, + "epoch": 0.3776, + "grad_norm": 0.3777186216574476, + "learning_rate": 1.952540567702299e-05, + "loss": 0.0021, "step": 236 }, { - "epoch": 0.35479041916167664, - "grad_norm": 0.31854741219630106, - "learning_rate": 1.9602350221924208e-05, - "loss": 0.0194, + "epoch": 0.3792, + "grad_norm": 0.130829431828979, + "learning_rate": 1.95201310753273e-05, + "loss": 0.002, "step": 237 }, { - "epoch": 0.3562874251497006, - "grad_norm": 0.3245371702157616, - "learning_rate": 1.9597823464735204e-05, - "loss": 0.0231, + "epoch": 0.3808, + "grad_norm": 0.17763136606881505, + "learning_rate": 1.951482804507517e-05, + "loss": 0.0045, "step": 238 }, { - "epoch": 0.35778443113772457, - "grad_norm": 0.3260937932412784, - "learning_rate": 1.9593271616035666e-05, - "loss": 0.0201, + "epoch": 0.3824, + "grad_norm": 0.1530279950906812, + "learning_rate": 1.9509496602102253e-05, + "loss": 0.0035, "step": 239 }, { - "epoch": 0.3592814371257485, - "grad_norm": 0.313463122278002, - "learning_rate": 1.9588694687725448e-05, - "loss": 0.0203, + "epoch": 0.384, + "grad_norm": 0.12479236693345638, + "learning_rate": 1.9504136762329046e-05, + "loss": 0.0026, "step": 240 }, { - "epoch": 0.36077844311377244, - "grad_norm": 0.4452480165482146, - "learning_rate": 1.9584092691769978e-05, - "loss": 0.0213, + "epoch": 0.3856, + "grad_norm": 0.18872893104962732, + "learning_rate": 1.9498748541760845e-05, + "loss": 0.002, "step": 241 }, { - "epoch": 0.36227544910179643, - "grad_norm": 0.38506762755559276, - "learning_rate": 1.9579465640200213e-05, - "loss": 0.0227, + "epoch": 0.3872, + "grad_norm": 0.2839066754069719, + "learning_rate": 1.949333195648769e-05, + "loss": 0.0034, "step": 242 }, { - "epoch": 0.36377245508982037, - "grad_norm": 0.39214871815864943, - "learning_rate": 1.957481354511262e-05, - "loss": 0.0199, + "epoch": 0.3888, + "grad_norm": 0.29568207016971043, + "learning_rate": 1.9487887022684336e-05, + "loss": 0.0044, "step": 243 }, { - "epoch": 0.3652694610778443, - "grad_norm": 0.3539339228414202, - "learning_rate": 1.957013641866913e-05, - "loss": 0.0225, + "epoch": 0.3904, + "grad_norm": 0.22126066178256676, + "learning_rate": 1.9482413756610175e-05, + "loss": 0.0035, "step": 244 }, { - "epoch": 0.36676646706586824, - "grad_norm": 0.3539330765036243, - "learning_rate": 1.9565434273097114e-05, - "loss": 0.0207, + "epoch": 0.392, + "grad_norm": 0.19602278733536263, + "learning_rate": 1.947691217460921e-05, + "loss": 0.0032, "step": 245 }, { - "epoch": 0.36826347305389223, - "grad_norm": 0.3753514827965324, - "learning_rate": 1.9560707120689354e-05, - "loss": 0.021, + "epoch": 0.3936, + "grad_norm": 0.4201801990164135, + "learning_rate": 1.9471382293110004e-05, + "loss": 0.0038, "step": 246 }, { - "epoch": 0.36976047904191617, - "grad_norm": 0.40608828993861595, - "learning_rate": 1.9555954973804013e-05, - "loss": 0.0181, + "epoch": 0.3952, + "grad_norm": 0.1668178273336579, + "learning_rate": 1.946582412862562e-05, + "loss": 0.004, "step": 247 }, { - "epoch": 0.3712574850299401, - "grad_norm": 0.3303053398457487, - "learning_rate": 1.9551177844864577e-05, - "loss": 0.0208, + "epoch": 0.3968, + "grad_norm": 0.2201357009016026, + "learning_rate": 1.9460237697753577e-05, + "loss": 0.0055, "step": 248 }, { - "epoch": 0.3727544910179641, - "grad_norm": 0.41949758179222085, - "learning_rate": 1.954637574635986e-05, - "loss": 0.0212, + "epoch": 0.3984, + "grad_norm": 0.32997962181070606, + "learning_rate": 1.9454623017175814e-05, + "loss": 0.0039, "step": 249 }, { - "epoch": 0.37425149700598803, - "grad_norm": 0.2764763651977154, - "learning_rate": 1.954154869084395e-05, - "loss": 0.0159, + "epoch": 0.4, + "grad_norm": 0.10041694708026828, + "learning_rate": 1.9448980103658613e-05, + "loss": 0.0023, "step": 250 }, { - "epoch": 0.37574850299401197, - "grad_norm": 0.39212670974147856, - "learning_rate": 1.9536696690936177e-05, - "loss": 0.0189, + "epoch": 0.4016, + "grad_norm": 0.2468138214155966, + "learning_rate": 1.9443308974052574e-05, + "loss": 0.005, "step": 251 }, { - "epoch": 0.3772455089820359, - "grad_norm": 0.32025921970118243, - "learning_rate": 1.9531819759321083e-05, - "loss": 0.0169, + "epoch": 0.4032, + "grad_norm": 0.15123509597682863, + "learning_rate": 1.943760964529255e-05, + "loss": 0.0024, "step": 252 }, { - "epoch": 0.3787425149700599, - "grad_norm": 0.3429001255459339, - "learning_rate": 1.9526917908748395e-05, - "loss": 0.0232, + "epoch": 0.4048, + "grad_norm": 0.08875285519072525, + "learning_rate": 1.9431882134397596e-05, + "loss": 0.0016, "step": 253 }, { - "epoch": 0.38023952095808383, - "grad_norm": 0.37665262959840423, - "learning_rate": 1.9521991152032974e-05, - "loss": 0.023, + "epoch": 0.4064, + "grad_norm": 0.21246156328609328, + "learning_rate": 1.9426126458470936e-05, + "loss": 0.0024, "step": 254 }, { - "epoch": 0.38173652694610777, - "grad_norm": 0.2649408332614438, - "learning_rate": 1.9517039502054805e-05, - "loss": 0.0153, + "epoch": 0.408, + "grad_norm": 0.07721338540221492, + "learning_rate": 1.9420342634699893e-05, + "loss": 0.0013, "step": 255 }, { - "epoch": 0.38323353293413176, - "grad_norm": 0.2772926318508054, - "learning_rate": 1.9512062971758937e-05, - "loss": 0.0177, + "epoch": 0.4096, + "grad_norm": 0.19326745142293367, + "learning_rate": 1.9414530680355837e-05, + "loss": 0.0017, "step": 256 }, { - "epoch": 0.3847305389221557, - "grad_norm": 0.333717122655441, - "learning_rate": 1.950706157415548e-05, - "loss": 0.0175, + "epoch": 0.4112, + "grad_norm": 0.17732669112234006, + "learning_rate": 1.9408690612794146e-05, + "loss": 0.0045, "step": 257 }, { - "epoch": 0.38622754491017963, - "grad_norm": 0.2881853890457447, - "learning_rate": 1.9502035322319546e-05, - "loss": 0.0197, + "epoch": 0.4128, + "grad_norm": 0.2250739228373143, + "learning_rate": 1.9402822449454154e-05, + "loss": 0.0025, "step": 258 }, { - "epoch": 0.38772455089820357, - "grad_norm": 0.3011392842260328, - "learning_rate": 1.949698422939122e-05, - "loss": 0.0194, + "epoch": 0.4144, + "grad_norm": 0.1541916050350658, + "learning_rate": 1.9396926207859085e-05, + "loss": 0.0019, "step": 259 }, { - "epoch": 0.38922155688622756, - "grad_norm": 0.25182071202500195, - "learning_rate": 1.9491908308575537e-05, - "loss": 0.0168, + "epoch": 0.416, + "grad_norm": 0.2841457747343044, + "learning_rate": 1.939100190561601e-05, + "loss": 0.0034, "step": 260 }, { - "epoch": 0.3907185628742515, - "grad_norm": 0.33730858021035043, - "learning_rate": 1.948680757314243e-05, - "loss": 0.0132, + "epoch": 0.4176, + "grad_norm": 0.30187004448055127, + "learning_rate": 1.9385049560415794e-05, + "loss": 0.0043, "step": 261 }, { - "epoch": 0.39221556886227543, - "grad_norm": 0.3785903917651171, - "learning_rate": 1.9481682036426707e-05, - "loss": 0.0151, + "epoch": 0.4192, + "grad_norm": 0.0886576343921295, + "learning_rate": 1.9379069190033042e-05, + "loss": 0.001, "step": 262 }, { - "epoch": 0.3937125748502994, - "grad_norm": 0.21775764169004, - "learning_rate": 1.9476531711828027e-05, - "loss": 0.0133, + "epoch": 0.4208, + "grad_norm": 0.40808645948350025, + "learning_rate": 1.9373060812326053e-05, + "loss": 0.0072, "step": 263 }, { - "epoch": 0.39520958083832336, - "grad_norm": 0.22229766801022924, - "learning_rate": 1.9471356612810832e-05, - "loss": 0.0103, + "epoch": 0.4224, + "grad_norm": 0.1549022328201513, + "learning_rate": 1.936702444523675e-05, + "loss": 0.0012, "step": 264 }, { - "epoch": 0.3967065868263473, - "grad_norm": 0.3032194913024956, - "learning_rate": 1.9466156752904344e-05, - "loss": 0.0131, + "epoch": 0.424, + "grad_norm": 0.21280823018695852, + "learning_rate": 1.9360960106790645e-05, + "loss": 0.0046, "step": 265 }, { - "epoch": 0.39820359281437123, - "grad_norm": 0.3924806246674395, - "learning_rate": 1.9460932145702514e-05, - "loss": 0.0164, + "epoch": 0.4256, + "grad_norm": 0.18046256341062997, + "learning_rate": 1.9354867815096772e-05, + "loss": 0.0038, "step": 266 }, { - "epoch": 0.3997005988023952, - "grad_norm": 0.2958650726177419, - "learning_rate": 1.9455682804863986e-05, - "loss": 0.0132, + "epoch": 0.4272, + "grad_norm": 0.08032384959654924, + "learning_rate": 1.9348747588347637e-05, + "loss": 0.0009, "step": 267 }, { - "epoch": 0.40119760479041916, - "grad_norm": 0.32232453177932663, - "learning_rate": 1.9450408744112072e-05, - "loss": 0.016, + "epoch": 0.4288, + "grad_norm": 0.22947414894255785, + "learning_rate": 1.9342599444819167e-05, + "loss": 0.0027, "step": 268 }, { - "epoch": 0.4026946107784431, - "grad_norm": 0.3492194562524053, - "learning_rate": 1.9445109977234704e-05, - "loss": 0.0145, + "epoch": 0.4304, + "grad_norm": 0.1845631124409086, + "learning_rate": 1.9336423402870655e-05, + "loss": 0.002, "step": 269 }, { - "epoch": 0.4041916167664671, - "grad_norm": 0.374197844485198, - "learning_rate": 1.9439786518084404e-05, - "loss": 0.0135, + "epoch": 0.432, + "grad_norm": 0.1400394929275762, + "learning_rate": 1.9330219480944693e-05, + "loss": 0.0022, "step": 270 }, { - "epoch": 0.405688622754491, - "grad_norm": 0.3268312149590773, - "learning_rate": 1.9434438380578248e-05, - "loss": 0.0164, + "epoch": 0.4336, + "grad_norm": 0.09366624568050941, + "learning_rate": 1.932398769756714e-05, + "loss": 0.0011, "step": 271 }, { - "epoch": 0.40718562874251496, - "grad_norm": 0.22923876520135972, - "learning_rate": 1.9429065578697833e-05, - "loss": 0.0121, + "epoch": 0.4352, + "grad_norm": 0.020707890357983063, + "learning_rate": 1.931772807134704e-05, + "loss": 0.0005, "step": 272 }, { - "epoch": 0.4086826347305389, - "grad_norm": 0.29090023648655766, - "learning_rate": 1.942366812648922e-05, - "loss": 0.0103, + "epoch": 0.4368, + "grad_norm": 0.18065363744047988, + "learning_rate": 1.9311440620976597e-05, + "loss": 0.0026, "step": 273 }, { - "epoch": 0.4101796407185629, - "grad_norm": 0.3117702117434219, - "learning_rate": 1.941824603806293e-05, - "loss": 0.0154, + "epoch": 0.4384, + "grad_norm": 0.2604405111691777, + "learning_rate": 1.9305125365231087e-05, + "loss": 0.0024, "step": 274 }, { - "epoch": 0.4116766467065868, - "grad_norm": 0.38775196821617675, - "learning_rate": 1.9412799327593884e-05, - "loss": 0.0106, + "epoch": 0.44, + "grad_norm": 0.10457477256845, + "learning_rate": 1.9298782322968817e-05, + "loss": 0.0015, "step": 275 }, { - "epoch": 0.41317365269461076, - "grad_norm": 0.2310944482235676, - "learning_rate": 1.9407328009321367e-05, - "loss": 0.0091, + "epoch": 0.4416, + "grad_norm": 0.09114897913357566, + "learning_rate": 1.929241151313108e-05, + "loss": 0.0017, "step": 276 }, { - "epoch": 0.41467065868263475, - "grad_norm": 0.23882380072426646, - "learning_rate": 1.940183209754901e-05, - "loss": 0.0096, + "epoch": 0.4432, + "grad_norm": 0.15876593967910152, + "learning_rate": 1.9286012954742078e-05, + "loss": 0.0012, "step": 277 }, { - "epoch": 0.4161676646706587, - "grad_norm": 0.47743508487331154, - "learning_rate": 1.939631160664472e-05, - "loss": 0.0138, + "epoch": 0.4448, + "grad_norm": 0.05828086512032646, + "learning_rate": 1.9279586666908886e-05, + "loss": 0.0007, "step": 278 }, { - "epoch": 0.4176646706586826, - "grad_norm": 0.24267162900266767, - "learning_rate": 1.939076655104068e-05, - "loss": 0.0127, + "epoch": 0.4464, + "grad_norm": 0.1754559731453586, + "learning_rate": 1.9273132668821363e-05, + "loss": 0.0045, "step": 279 }, { - "epoch": 0.41916167664670656, - "grad_norm": 0.25724021933042784, - "learning_rate": 1.9385196945233276e-05, - "loss": 0.008, + "epoch": 0.448, + "grad_norm": 0.16575326861643688, + "learning_rate": 1.9266650979752137e-05, + "loss": 0.0049, "step": 280 }, { - "epoch": 0.42065868263473055, - "grad_norm": 0.2863894617393607, - "learning_rate": 1.9379602803783088e-05, - "loss": 0.0118, + "epoch": 0.4496, + "grad_norm": 0.04984852486915607, + "learning_rate": 1.9260141619056507e-05, + "loss": 0.0006, "step": 281 }, { - "epoch": 0.4221556886227545, - "grad_norm": 0.24028996046153225, - "learning_rate": 1.9373984141314827e-05, - "loss": 0.0123, + "epoch": 0.4512, + "grad_norm": 0.08051247848310401, + "learning_rate": 1.925360460617242e-05, + "loss": 0.0012, "step": 282 }, { - "epoch": 0.4236526946107784, - "grad_norm": 0.24877798633512982, - "learning_rate": 1.936834097251732e-05, - "loss": 0.01, + "epoch": 0.4528, + "grad_norm": 0.07838917524463092, + "learning_rate": 1.924703996062038e-05, + "loss": 0.001, "step": 283 }, { - "epoch": 0.4251497005988024, - "grad_norm": 0.24480091385137137, - "learning_rate": 1.936267331214346e-05, - "loss": 0.0088, + "epoch": 0.4544, + "grad_norm": 0.11163090665148108, + "learning_rate": 1.9240447702003422e-05, + "loss": 0.0019, "step": 284 }, { - "epoch": 0.42664670658682635, - "grad_norm": 0.378474089158859, - "learning_rate": 1.9356981175010155e-05, - "loss": 0.0138, + "epoch": 0.456, + "grad_norm": 0.13500917519862524, + "learning_rate": 1.9233827850007028e-05, + "loss": 0.0039, "step": 285 }, { - "epoch": 0.4281437125748503, - "grad_norm": 0.2731112425501055, - "learning_rate": 1.9351264575998322e-05, - "loss": 0.0111, + "epoch": 0.4576, + "grad_norm": 0.05401903025575265, + "learning_rate": 1.9227180424399082e-05, + "loss": 0.0009, "step": 286 }, { - "epoch": 0.4296407185628742, - "grad_norm": 0.45385449828176233, - "learning_rate": 1.934552353005281e-05, - "loss": 0.0144, + "epoch": 0.4592, + "grad_norm": 0.15767776098462866, + "learning_rate": 1.9220505445029803e-05, + "loss": 0.0034, "step": 287 }, { - "epoch": 0.4311377245508982, - "grad_norm": 0.2426064120408043, - "learning_rate": 1.9339758052182396e-05, - "loss": 0.0113, + "epoch": 0.4608, + "grad_norm": 0.18565041494539808, + "learning_rate": 1.9213802931831697e-05, + "loss": 0.0021, "step": 288 }, { - "epoch": 0.43263473053892215, - "grad_norm": 0.2840729140180023, - "learning_rate": 1.933396815745972e-05, - "loss": 0.0088, + "epoch": 0.4624, + "grad_norm": 0.07266220337631138, + "learning_rate": 1.9207072904819484e-05, + "loss": 0.0008, "step": 289 }, { - "epoch": 0.4341317365269461, - "grad_norm": 0.4447034848523609, - "learning_rate": 1.932815386102125e-05, - "loss": 0.0104, + "epoch": 0.464, + "grad_norm": 0.0937701933953401, + "learning_rate": 1.9200315384090045e-05, + "loss": 0.0013, "step": 290 }, { - "epoch": 0.4356287425149701, - "grad_norm": 0.27318626797446804, - "learning_rate": 1.9322315178067255e-05, - "loss": 0.0082, + "epoch": 0.4656, + "grad_norm": 0.08508182747781863, + "learning_rate": 1.9193530389822364e-05, + "loss": 0.0007, "step": 291 }, { - "epoch": 0.437125748502994, - "grad_norm": 0.28620936956656057, - "learning_rate": 1.931645212386176e-05, - "loss": 0.0113, + "epoch": 0.4672, + "grad_norm": 0.08968822087577417, + "learning_rate": 1.9186717942277466e-05, + "loss": 0.001, "step": 292 }, { - "epoch": 0.43862275449101795, - "grad_norm": 0.26386101205038476, - "learning_rate": 1.9310564713732495e-05, - "loss": 0.0082, + "epoch": 0.4688, + "grad_norm": 0.04091429398607774, + "learning_rate": 1.9179878061798347e-05, + "loss": 0.0006, "step": 293 }, { - "epoch": 0.44011976047904194, - "grad_norm": 0.40806356163691765, - "learning_rate": 1.9304652963070868e-05, - "loss": 0.0107, + "epoch": 0.4704, + "grad_norm": 0.14716595852890726, + "learning_rate": 1.9173010768809934e-05, + "loss": 0.0017, "step": 294 }, { - "epoch": 0.4416167664670659, - "grad_norm": 0.19248450148831384, - "learning_rate": 1.929871688733192e-05, - "loss": 0.0091, + "epoch": 0.472, + "grad_norm": 0.19756136061689594, + "learning_rate": 1.9166116083819002e-05, + "loss": 0.0014, "step": 295 }, { - "epoch": 0.4431137724550898, - "grad_norm": 0.28001071490300894, - "learning_rate": 1.929275650203429e-05, - "loss": 0.0086, + "epoch": 0.4736, + "grad_norm": 0.2356635049518032, + "learning_rate": 1.915919402741413e-05, + "loss": 0.004, "step": 296 }, { - "epoch": 0.44461077844311375, - "grad_norm": 0.19281433588980926, - "learning_rate": 1.9286771822760155e-05, - "loss": 0.0075, + "epoch": 0.4752, + "grad_norm": 0.11227394052898403, + "learning_rate": 1.915224462026563e-05, + "loss": 0.0034, "step": 297 }, { - "epoch": 0.44610778443113774, - "grad_norm": 0.30951373486991235, - "learning_rate": 1.9280762865155217e-05, - "loss": 0.0126, + "epoch": 0.4768, + "grad_norm": 0.16940535443283003, + "learning_rate": 1.9145267883125483e-05, + "loss": 0.002, "step": 298 }, { - "epoch": 0.4476047904191617, - "grad_norm": 0.24670484913137125, - "learning_rate": 1.9274729644928646e-05, - "loss": 0.0079, + "epoch": 0.4784, + "grad_norm": 0.13956098006188228, + "learning_rate": 1.913826383682729e-05, + "loss": 0.002, "step": 299 }, { - "epoch": 0.4491017964071856, - "grad_norm": 0.21597250129732382, - "learning_rate": 1.9268672177853037e-05, - "loss": 0.0082, + "epoch": 0.48, + "grad_norm": 0.11477156950801032, + "learning_rate": 1.913123250228619e-05, + "loss": 0.0015, "step": 300 }, { - "epoch": 0.4505988023952096, - "grad_norm": 0.25565404173278816, - "learning_rate": 1.926259047976438e-05, - "loss": 0.0078, + "epoch": 0.4816, + "grad_norm": 0.058908329416148174, + "learning_rate": 1.912417390049882e-05, + "loss": 0.0009, "step": 301 }, { - "epoch": 0.45209580838323354, - "grad_norm": 0.25018467551033313, - "learning_rate": 1.9256484566562002e-05, - "loss": 0.0082, + "epoch": 0.4832, + "grad_norm": 0.026814308548944957, + "learning_rate": 1.9117088052543233e-05, + "loss": 0.0005, "step": 302 }, { - "epoch": 0.4535928143712575, - "grad_norm": 0.24949067973854458, - "learning_rate": 1.9250354454208545e-05, - "loss": 0.0084, + "epoch": 0.4848, + "grad_norm": 0.12194884329880719, + "learning_rate": 1.9109974979578852e-05, + "loss": 0.0018, "step": 303 }, { - "epoch": 0.4550898203592814, - "grad_norm": 0.31313857717111415, - "learning_rate": 1.924420015872991e-05, - "loss": 0.0102, + "epoch": 0.4864, + "grad_norm": 0.09882593225561369, + "learning_rate": 1.9102834702846387e-05, + "loss": 0.0009, "step": 304 }, { - "epoch": 0.4565868263473054, - "grad_norm": 0.20485306938289613, - "learning_rate": 1.923802169621522e-05, - "loss": 0.0075, + "epoch": 0.488, + "grad_norm": 0.30833315654476523, + "learning_rate": 1.909566724366779e-05, + "loss": 0.0028, "step": 305 }, { - "epoch": 0.45808383233532934, - "grad_norm": 0.20454714142094618, - "learning_rate": 1.9231819082816786e-05, - "loss": 0.0076, + "epoch": 0.4896, + "grad_norm": 0.2594408640023605, + "learning_rate": 1.9088472623446182e-05, + "loss": 0.0036, "step": 306 }, { - "epoch": 0.4595808383233533, - "grad_norm": 0.24763142577873085, - "learning_rate": 1.9225592334750036e-05, - "loss": 0.0081, + "epoch": 0.4912, + "grad_norm": 0.24354763997282527, + "learning_rate": 1.9081250863665794e-05, + "loss": 0.0057, "step": 307 }, { - "epoch": 0.46107784431137727, - "grad_norm": 0.27799312574579305, - "learning_rate": 1.9219341468293518e-05, - "loss": 0.0079, + "epoch": 0.4928, + "grad_norm": 0.028349596310172764, + "learning_rate": 1.9074001985891893e-05, + "loss": 0.0005, "step": 308 }, { - "epoch": 0.4625748502994012, - "grad_norm": 0.16117009627843906, - "learning_rate": 1.921306649978881e-05, - "loss": 0.006, + "epoch": 0.4944, + "grad_norm": 0.15763944104469357, + "learning_rate": 1.9066726011770725e-05, + "loss": 0.0032, "step": 309 }, { - "epoch": 0.46407185628742514, - "grad_norm": 0.2451738774041042, - "learning_rate": 1.920676744564052e-05, - "loss": 0.0103, + "epoch": 0.496, + "grad_norm": 0.18356997394024363, + "learning_rate": 1.9059422963029464e-05, + "loss": 0.0029, "step": 310 }, { - "epoch": 0.4655688622754491, - "grad_norm": 0.23167970747659175, - "learning_rate": 1.9200444322316206e-05, - "loss": 0.0069, + "epoch": 0.4976, + "grad_norm": 0.2508451892317318, + "learning_rate": 1.905209286147611e-05, + "loss": 0.0018, "step": 311 }, { - "epoch": 0.46706586826347307, - "grad_norm": 0.19444027025835037, - "learning_rate": 1.919409714634636e-05, - "loss": 0.0064, + "epoch": 0.4992, + "grad_norm": 0.18411645717395078, + "learning_rate": 1.9044735728999472e-05, + "loss": 0.0053, "step": 312 }, { - "epoch": 0.468562874251497, - "grad_norm": 0.259521752410209, - "learning_rate": 1.9187725934324352e-05, - "loss": 0.0087, + "epoch": 0.5008, + "grad_norm": 0.14408244481084634, + "learning_rate": 1.903735158756905e-05, + "loss": 0.0018, "step": 313 }, { - "epoch": 0.47005988023952094, - "grad_norm": 0.1870803982415873, - "learning_rate": 1.918133070290639e-05, - "loss": 0.0099, + "epoch": 0.5024, + "grad_norm": 0.17633416659439113, + "learning_rate": 1.902994045923502e-05, + "loss": 0.001, "step": 314 }, { - "epoch": 0.47155688622754494, - "grad_norm": 0.2782883408394447, - "learning_rate": 1.917491146881147e-05, - "loss": 0.0079, + "epoch": 0.504, + "grad_norm": 0.29912218229546866, + "learning_rate": 1.9022502366128136e-05, + "loss": 0.003, "step": 315 }, { - "epoch": 0.47305389221556887, - "grad_norm": 0.30227323802831124, - "learning_rate": 1.916846824882135e-05, - "loss": 0.0068, + "epoch": 0.5056, + "grad_norm": 0.24668813909693074, + "learning_rate": 1.901503733045967e-05, + "loss": 0.0066, "step": 316 }, { - "epoch": 0.4745508982035928, - "grad_norm": 0.17450053993957124, - "learning_rate": 1.916200105978048e-05, - "loss": 0.0055, + "epoch": 0.5072, + "grad_norm": 0.0708971703831616, + "learning_rate": 1.9007545374521354e-05, + "loss": 0.0007, "step": 317 }, { - "epoch": 0.47604790419161674, - "grad_norm": 0.22986677982746617, - "learning_rate": 1.9155509918595977e-05, - "loss": 0.0094, + "epoch": 0.5088, + "grad_norm": 0.0664197198165233, + "learning_rate": 1.90000265206853e-05, + "loss": 0.0011, "step": 318 }, { - "epoch": 0.47754491017964074, - "grad_norm": 0.2210009165017658, - "learning_rate": 1.9148994842237585e-05, - "loss": 0.0089, + "epoch": 0.5104, + "grad_norm": 0.09508964161653133, + "learning_rate": 1.8992480791403957e-05, + "loss": 0.0011, "step": 319 }, { - "epoch": 0.47904191616766467, - "grad_norm": 0.3369866058031818, - "learning_rate": 1.9142455847737613e-05, - "loss": 0.0102, + "epoch": 0.512, + "grad_norm": 0.133749388372953, + "learning_rate": 1.898490820921001e-05, + "loss": 0.0011, "step": 320 }, { - "epoch": 0.4805389221556886, - "grad_norm": 0.31529088422929136, - "learning_rate": 1.9135892952190894e-05, - "loss": 0.0067, + "epoch": 0.5136, + "grad_norm": 0.32095988664680164, + "learning_rate": 1.897730879671634e-05, + "loss": 0.0042, "step": 321 }, { - "epoch": 0.4820359281437126, - "grad_norm": 0.34819262640952137, - "learning_rate": 1.912930617275476e-05, - "loss": 0.0083, + "epoch": 0.5152, + "grad_norm": 0.08222690605282151, + "learning_rate": 1.8969682576615947e-05, + "loss": 0.0009, "step": 322 }, { - "epoch": 0.48353293413173654, - "grad_norm": 0.2823658083150663, - "learning_rate": 1.9122695526648968e-05, - "loss": 0.0074, + "epoch": 0.5168, + "grad_norm": 0.03976299754595526, + "learning_rate": 1.8962029571681887e-05, + "loss": 0.0005, "step": 323 }, { - "epoch": 0.48502994011976047, - "grad_norm": 0.22578397023227845, - "learning_rate": 1.9116061031155683e-05, - "loss": 0.0054, + "epoch": 0.5184, + "grad_norm": 0.08048980227033051, + "learning_rate": 1.8954349804767185e-05, + "loss": 0.0014, "step": 324 }, { - "epoch": 0.4865269461077844, - "grad_norm": 0.1798278839714749, - "learning_rate": 1.9109402703619396e-05, - "loss": 0.0069, + "epoch": 0.52, + "grad_norm": 0.12346521034266841, + "learning_rate": 1.8946643298804794e-05, + "loss": 0.0038, "step": 325 }, { - "epoch": 0.4880239520958084, - "grad_norm": 0.3173459554977556, - "learning_rate": 1.9102720561446933e-05, - "loss": 0.0097, + "epoch": 0.5216, + "grad_norm": 0.06561097915457406, + "learning_rate": 1.8938910076807514e-05, + "loss": 0.001, "step": 326 }, { - "epoch": 0.48952095808383234, - "grad_norm": 0.4520342362723284, - "learning_rate": 1.9096014622107363e-05, - "loss": 0.0064, + "epoch": 0.5232, + "grad_norm": 0.06053879999625217, + "learning_rate": 1.8931150161867917e-05, + "loss": 0.0007, "step": 327 }, { - "epoch": 0.49101796407185627, - "grad_norm": 0.2950917880335438, - "learning_rate": 1.9089284903131956e-05, - "loss": 0.0075, + "epoch": 0.5248, + "grad_norm": 0.06900447980893769, + "learning_rate": 1.892336357715829e-05, + "loss": 0.004, "step": 328 }, { - "epoch": 0.49251497005988026, - "grad_norm": 0.27125581637418156, - "learning_rate": 1.908253142211417e-05, - "loss": 0.0055, + "epoch": 0.5264, + "grad_norm": 0.10509614257503373, + "learning_rate": 1.891555034593055e-05, + "loss": 0.0011, "step": 329 }, { - "epoch": 0.4940119760479042, - "grad_norm": 0.4292444115677685, - "learning_rate": 1.9075754196709574e-05, - "loss": 0.0089, + "epoch": 0.528, + "grad_norm": 0.047031312862866576, + "learning_rate": 1.8907710491516197e-05, + "loss": 0.0009, "step": 330 }, { - "epoch": 0.49550898203592814, - "grad_norm": 0.2737159333850911, - "learning_rate": 1.9068953244635805e-05, - "loss": 0.0061, + "epoch": 0.5296, + "grad_norm": 0.01697870761243344, + "learning_rate": 1.8899844037326227e-05, + "loss": 0.0004, "step": 331 }, { - "epoch": 0.49700598802395207, - "grad_norm": 0.2813618023609554, - "learning_rate": 1.9062128583672543e-05, - "loss": 0.0055, + "epoch": 0.5312, + "grad_norm": 0.06180188702083097, + "learning_rate": 1.889195100685106e-05, + "loss": 0.0006, "step": 332 }, { - "epoch": 0.49850299401197606, - "grad_norm": 0.21811824525996454, - "learning_rate": 1.9055280231661443e-05, - "loss": 0.0076, + "epoch": 0.5328, + "grad_norm": 0.054644161559529975, + "learning_rate": 1.8884031423660492e-05, + "loss": 0.0007, "step": 333 }, { - "epoch": 0.5, - "grad_norm": 0.3024569403044615, - "learning_rate": 1.904840820650609e-05, - "loss": 0.0076, + "epoch": 0.5344, + "grad_norm": 0.06836013722792311, + "learning_rate": 1.8876085311403592e-05, + "loss": 0.0008, "step": 334 }, { - "epoch": 0.5014970059880239, - "grad_norm": 0.3747520923531163, - "learning_rate": 1.9041512526171968e-05, - "loss": 0.0073, + "epoch": 0.536, + "grad_norm": 0.08405432130742992, + "learning_rate": 1.8868112693808664e-05, + "loss": 0.0007, "step": 335 }, { - "epoch": 0.5029940119760479, - "grad_norm": 0.32327667748094846, - "learning_rate": 1.9034593208686397e-05, - "loss": 0.0086, + "epoch": 0.5376, + "grad_norm": 0.052804793325438484, + "learning_rate": 1.8860113594683148e-05, + "loss": 0.0009, "step": 336 }, { - "epoch": 0.5044910179640718, - "grad_norm": 0.2599402605515639, - "learning_rate": 1.9027650272138483e-05, - "loss": 0.008, + "epoch": 0.5392, + "grad_norm": 0.0903202357847314, + "learning_rate": 1.8852088037913577e-05, + "loss": 0.0011, "step": 337 }, { - "epoch": 0.5059880239520959, - "grad_norm": 0.309419497369784, - "learning_rate": 1.9020683734679098e-05, - "loss": 0.0076, + "epoch": 0.5408, + "grad_norm": 0.04558030866715895, + "learning_rate": 1.884403604746547e-05, + "loss": 0.0006, "step": 338 }, { - "epoch": 0.5074850299401198, - "grad_norm": 0.3980869606666559, - "learning_rate": 1.9013693614520795e-05, - "loss": 0.011, + "epoch": 0.5424, + "grad_norm": 0.02140025655887005, + "learning_rate": 1.8835957647383304e-05, + "loss": 0.0004, "step": 339 }, { - "epoch": 0.5089820359281437, - "grad_norm": 0.17974783303905734, - "learning_rate": 1.9006679929937796e-05, - "loss": 0.0064, + "epoch": 0.544, + "grad_norm": 0.043490093005678035, + "learning_rate": 1.8827852861790398e-05, + "loss": 0.0006, "step": 340 }, { - "epoch": 0.5104790419161677, - "grad_norm": 0.33752895891985435, - "learning_rate": 1.899964269926591e-05, - "loss": 0.0058, + "epoch": 0.5456, + "grad_norm": 0.14053030141846226, + "learning_rate": 1.8819721714888878e-05, + "loss": 0.0008, "step": 341 }, { - "epoch": 0.5119760479041916, - "grad_norm": 0.2851367706634093, - "learning_rate": 1.8992581940902517e-05, - "loss": 0.0059, + "epoch": 0.5472, + "grad_norm": 0.07394631076602469, + "learning_rate": 1.8811564230959585e-05, + "loss": 0.0009, "step": 342 }, { - "epoch": 0.5134730538922155, - "grad_norm": 0.19085718115700934, - "learning_rate": 1.8985497673306495e-05, - "loss": 0.0042, + "epoch": 0.5488, + "grad_norm": 0.11601250028806075, + "learning_rate": 1.8803380434362e-05, + "loss": 0.004, "step": 343 }, { - "epoch": 0.5149700598802395, - "grad_norm": 0.2450459731205004, - "learning_rate": 1.897838991499819e-05, - "loss": 0.0084, + "epoch": 0.5504, + "grad_norm": 0.07109290389899153, + "learning_rate": 1.879517034953418e-05, + "loss": 0.0008, "step": 344 }, { - "epoch": 0.5164670658682635, - "grad_norm": 0.2159471296015911, - "learning_rate": 1.8971258684559354e-05, - "loss": 0.0062, + "epoch": 0.552, + "grad_norm": 0.05902437246594027, + "learning_rate": 1.878693400099269e-05, + "loss": 0.0007, "step": 345 }, { - "epoch": 0.5179640718562875, - "grad_norm": 0.2582075201511502, - "learning_rate": 1.8964104000633104e-05, - "loss": 0.0072, + "epoch": 0.5536, + "grad_norm": 0.1309935252514575, + "learning_rate": 1.8778671413332513e-05, + "loss": 0.0025, "step": 346 }, { - "epoch": 0.5194610778443114, - "grad_norm": 0.2826246708970663, - "learning_rate": 1.895692588192387e-05, - "loss": 0.0075, + "epoch": 0.5552, + "grad_norm": 0.026865000045514807, + "learning_rate": 1.877038261122699e-05, + "loss": 0.0003, "step": 347 }, { - "epoch": 0.5209580838323353, - "grad_norm": 0.2612365012660709, - "learning_rate": 1.894972434719735e-05, - "loss": 0.0046, + "epoch": 0.5568, + "grad_norm": 0.200787114993862, + "learning_rate": 1.8762067619427745e-05, + "loss": 0.0015, "step": 348 }, { - "epoch": 0.5224550898203593, - "grad_norm": 0.15056193683838454, - "learning_rate": 1.8942499415280458e-05, - "loss": 0.0042, + "epoch": 0.5584, + "grad_norm": 0.18264175410560715, + "learning_rate": 1.87537264627646e-05, + "loss": 0.0035, "step": 349 }, { - "epoch": 0.5239520958083832, - "grad_norm": 0.22875131238932167, - "learning_rate": 1.8935251105061268e-05, - "loss": 0.0046, + "epoch": 0.56, + "grad_norm": 0.04120060687514351, + "learning_rate": 1.8745359166145526e-05, + "loss": 0.0005, "step": 350 }, { - "epoch": 0.5254491017964071, - "grad_norm": 0.2534927935258981, - "learning_rate": 1.892797943548898e-05, - "loss": 0.0084, + "epoch": 0.5616, + "grad_norm": 0.34090676386146507, + "learning_rate": 1.8736965754556527e-05, + "loss": 0.0021, "step": 351 }, { - "epoch": 0.5269461077844312, - "grad_norm": 0.21952770072925085, - "learning_rate": 1.8920684425573865e-05, - "loss": 0.0035, + "epoch": 0.5632, + "grad_norm": 0.019128712884290862, + "learning_rate": 1.8728546253061614e-05, + "loss": 0.0004, "step": 352 }, { - "epoch": 0.5284431137724551, - "grad_norm": 0.22335232414184136, - "learning_rate": 1.8913366094387203e-05, - "loss": 0.0055, + "epoch": 0.5648, + "grad_norm": 0.027911285732412968, + "learning_rate": 1.8720100686802693e-05, + "loss": 0.0004, "step": 353 }, { - "epoch": 0.5299401197604791, - "grad_norm": 0.18343084280328395, - "learning_rate": 1.8906024461061247e-05, - "loss": 0.0064, + "epoch": 0.5664, + "grad_norm": 0.1729529053601621, + "learning_rate": 1.8711629080999506e-05, + "loss": 0.0013, "step": 354 }, { - "epoch": 0.531437125748503, - "grad_norm": 0.21313883463880326, - "learning_rate": 1.8898659544789166e-05, - "loss": 0.0052, + "epoch": 0.568, + "grad_norm": 0.06981929668164648, + "learning_rate": 1.8703131460949555e-05, + "loss": 0.0012, "step": 355 }, { - "epoch": 0.5329341317365269, - "grad_norm": 0.17900066861920666, - "learning_rate": 1.8891271364825002e-05, - "loss": 0.0054, + "epoch": 0.5696, + "grad_norm": 0.13135552462130254, + "learning_rate": 1.869460785202802e-05, + "loss": 0.0028, "step": 356 }, { - "epoch": 0.5344311377245509, - "grad_norm": 0.3186411908486247, - "learning_rate": 1.8883859940483614e-05, - "loss": 0.0085, + "epoch": 0.5712, + "grad_norm": 0.13398125098852007, + "learning_rate": 1.86860582796877e-05, + "loss": 0.0019, "step": 357 }, { - "epoch": 0.5359281437125748, - "grad_norm": 0.23748527458193902, - "learning_rate": 1.8876425291140626e-05, - "loss": 0.0064, + "epoch": 0.5728, + "grad_norm": 0.019718997811409525, + "learning_rate": 1.8677482769458905e-05, + "loss": 0.0005, "step": 358 }, { - "epoch": 0.5374251497005988, - "grad_norm": 0.18189619860086126, - "learning_rate": 1.8868967436232378e-05, - "loss": 0.0061, + "epoch": 0.5744, + "grad_norm": 0.05279323543297812, + "learning_rate": 1.866888134694942e-05, + "loss": 0.0007, "step": 359 }, { - "epoch": 0.5389221556886228, - "grad_norm": 0.3081977476306483, - "learning_rate": 1.886148639525588e-05, - "loss": 0.0049, + "epoch": 0.576, + "grad_norm": 0.16886373007352773, + "learning_rate": 1.866025403784439e-05, + "loss": 0.0011, "step": 360 }, { - "epoch": 0.5404191616766467, - "grad_norm": 0.3589434090609591, - "learning_rate": 1.885398218776876e-05, - "loss": 0.0052, + "epoch": 0.5776, + "grad_norm": 0.12528521939164913, + "learning_rate": 1.865160086790627e-05, + "loss": 0.0012, "step": 361 }, { - "epoch": 0.5419161676646707, - "grad_norm": 0.2719093022438886, - "learning_rate": 1.8846454833389202e-05, - "loss": 0.0063, + "epoch": 0.5792, + "grad_norm": 0.07723492991438953, + "learning_rate": 1.8642921862974742e-05, + "loss": 0.0008, "step": 362 }, { - "epoch": 0.5434131736526946, - "grad_norm": 0.2134250055694977, - "learning_rate": 1.8838904351795905e-05, - "loss": 0.0047, + "epoch": 0.5808, + "grad_norm": 0.10497027463979597, + "learning_rate": 1.8634217048966638e-05, + "loss": 0.0013, "step": 363 }, { - "epoch": 0.5449101796407185, - "grad_norm": 0.36939383043202, - "learning_rate": 1.8831330762728028e-05, - "loss": 0.0061, + "epoch": 0.5824, + "grad_norm": 0.0606074346839084, + "learning_rate": 1.8625486451875843e-05, + "loss": 0.0006, "step": 364 }, { - "epoch": 0.5464071856287425, - "grad_norm": 0.3092372268044756, - "learning_rate": 1.882373408598515e-05, - "loss": 0.0055, + "epoch": 0.584, + "grad_norm": 0.07920612799392682, + "learning_rate": 1.861673009777325e-05, + "loss": 0.0008, "step": 365 }, { - "epoch": 0.5479041916167665, - "grad_norm": 0.22805534768018146, - "learning_rate": 1.8816114341427196e-05, - "loss": 0.0065, + "epoch": 0.5856, + "grad_norm": 0.0346305606132078, + "learning_rate": 1.8607948012806664e-05, + "loss": 0.0005, "step": 366 }, { - "epoch": 0.5494011976047904, - "grad_norm": 0.20186378709158326, - "learning_rate": 1.88084715489744e-05, - "loss": 0.0055, + "epoch": 0.5872, + "grad_norm": 0.06547468094544372, + "learning_rate": 1.8599140223200716e-05, + "loss": 0.0008, "step": 367 }, { - "epoch": 0.5508982035928144, - "grad_norm": 0.2447957214059236, - "learning_rate": 1.8800805728607257e-05, - "loss": 0.0059, + "epoch": 0.5888, + "grad_norm": 0.15325174641228742, + "learning_rate": 1.859030675525681e-05, + "loss": 0.0026, "step": 368 }, { - "epoch": 0.5523952095808383, - "grad_norm": 0.24624052829362808, - "learning_rate": 1.879311690036645e-05, - "loss": 0.005, + "epoch": 0.5904, + "grad_norm": 0.1389172010956714, + "learning_rate": 1.858144763535302e-05, + "loss": 0.0037, "step": 369 }, { - "epoch": 0.5538922155688623, - "grad_norm": 0.4598990071533753, - "learning_rate": 1.8785405084352825e-05, - "loss": 0.0119, + "epoch": 0.592, + "grad_norm": 0.06355969619933752, + "learning_rate": 1.857256288994402e-05, + "loss": 0.0006, "step": 370 }, { - "epoch": 0.5553892215568862, - "grad_norm": 0.2254413046985076, - "learning_rate": 1.8777670300727314e-05, - "loss": 0.0047, + "epoch": 0.5936, + "grad_norm": 0.08663530000385801, + "learning_rate": 1.8563652545561014e-05, + "loss": 0.0006, "step": 371 }, { - "epoch": 0.5568862275449101, - "grad_norm": 0.2817060159533667, - "learning_rate": 1.8769912569710902e-05, - "loss": 0.0054, + "epoch": 0.5952, + "grad_norm": 0.18572281033057678, + "learning_rate": 1.855471662881164e-05, + "loss": 0.0027, "step": 372 }, { - "epoch": 0.5583832335329342, - "grad_norm": 0.23401568986125731, - "learning_rate": 1.876213191158456e-05, - "loss": 0.0066, + "epoch": 0.5968, + "grad_norm": 0.022392349584578558, + "learning_rate": 1.8545755166379898e-05, + "loss": 0.0003, "step": 373 }, { - "epoch": 0.5598802395209581, - "grad_norm": 0.31712264048465694, - "learning_rate": 1.8754328346689194e-05, - "loss": 0.0051, + "epoch": 0.5984, + "grad_norm": 0.01772969160150952, + "learning_rate": 1.8536768185026085e-05, + "loss": 0.0004, "step": 374 }, { - "epoch": 0.561377245508982, - "grad_norm": 0.1853362893894113, - "learning_rate": 1.8746501895425603e-05, - "loss": 0.0031, + "epoch": 0.6, + "grad_norm": 0.03463345597440423, + "learning_rate": 1.852775571158668e-05, + "loss": 0.0004, "step": 375 }, { - "epoch": 0.562874251497006, - "grad_norm": 0.1512730540107703, - "learning_rate": 1.8738652578254414e-05, - "loss": 0.0031, + "epoch": 0.6016, + "grad_norm": 0.0687422345719889, + "learning_rate": 1.85187177729743e-05, + "loss": 0.0034, "step": 376 }, { - "epoch": 0.5643712574850299, - "grad_norm": 0.251381383971643, - "learning_rate": 1.8730780415696034e-05, - "loss": 0.0061, + "epoch": 0.6032, + "grad_norm": 0.15556372482561745, + "learning_rate": 1.850965439617761e-05, + "loss": 0.0033, "step": 377 }, { - "epoch": 0.5658682634730539, - "grad_norm": 0.2090374749619751, - "learning_rate": 1.872288542833059e-05, - "loss": 0.0032, + "epoch": 0.6048, + "grad_norm": 0.08434742559441728, + "learning_rate": 1.8500565608261215e-05, + "loss": 0.0031, "step": 378 }, { - "epoch": 0.5673652694610778, - "grad_norm": 0.29710148489922494, - "learning_rate": 1.871496763679788e-05, - "loss": 0.0061, + "epoch": 0.6064, + "grad_norm": 0.1474992044646562, + "learning_rate": 1.8491451436365628e-05, + "loss": 0.0025, "step": 379 }, { - "epoch": 0.5688622754491018, - "grad_norm": 0.20476911169490797, - "learning_rate": 1.8707027061797328e-05, - "loss": 0.0041, + "epoch": 0.608, + "grad_norm": 0.06662251243376646, + "learning_rate": 1.848231190770714e-05, + "loss": 0.0008, "step": 380 }, { - "epoch": 0.5703592814371258, - "grad_norm": 0.15467140235695107, - "learning_rate": 1.8699063724087905e-05, - "loss": 0.0039, + "epoch": 0.6096, + "grad_norm": 0.0754815447641557, + "learning_rate": 1.8473147049577777e-05, + "loss": 0.001, "step": 381 }, { - "epoch": 0.5718562874251497, - "grad_norm": 0.35593514914651286, - "learning_rate": 1.8691077644488103e-05, - "loss": 0.0051, + "epoch": 0.6112, + "grad_norm": 0.009235009771626088, + "learning_rate": 1.8463956889345195e-05, + "loss": 0.0002, "step": 382 }, { - "epoch": 0.5733532934131736, - "grad_norm": 0.3031925999503722, - "learning_rate": 1.8683068843875866e-05, - "loss": 0.0054, + "epoch": 0.6128, + "grad_norm": 0.01669079074694255, + "learning_rate": 1.8454741454452604e-05, + "loss": 0.0003, "step": 383 }, { - "epoch": 0.5748502994011976, - "grad_norm": 0.392987606385426, - "learning_rate": 1.867503734318853e-05, - "loss": 0.0068, + "epoch": 0.6144, + "grad_norm": 0.05043025132118084, + "learning_rate": 1.8445500772418697e-05, + "loss": 0.0004, "step": 384 }, { - "epoch": 0.5763473053892215, - "grad_norm": 0.1615633068715301, - "learning_rate": 1.8666983163422786e-05, - "loss": 0.004, + "epoch": 0.616, + "grad_norm": 0.3126790444003597, + "learning_rate": 1.843623487083755e-05, + "loss": 0.0032, "step": 385 }, { - "epoch": 0.5778443113772455, - "grad_norm": 0.2623310704504511, - "learning_rate": 1.8658906325634605e-05, - "loss": 0.0065, + "epoch": 0.6176, + "grad_norm": 0.05068038072505894, + "learning_rate": 1.842694377737855e-05, + "loss": 0.0006, "step": 386 }, { - "epoch": 0.5793413173652695, - "grad_norm": 0.41136291194943037, - "learning_rate": 1.86508068509392e-05, - "loss": 0.0065, + "epoch": 0.6192, + "grad_norm": 0.03215920255949678, + "learning_rate": 1.8417627519786317e-05, + "loss": 0.0004, "step": 387 }, { - "epoch": 0.5808383233532934, - "grad_norm": 0.32922655904832243, - "learning_rate": 1.864268476051096e-05, - "loss": 0.0083, + "epoch": 0.6208, + "grad_norm": 0.018828487209741788, + "learning_rate": 1.8408286125880605e-05, + "loss": 0.0003, "step": 388 }, { - "epoch": 0.5823353293413174, - "grad_norm": 0.15328440896027834, - "learning_rate": 1.86345400755834e-05, - "loss": 0.0034, + "epoch": 0.6224, + "grad_norm": 0.14363387776918993, + "learning_rate": 1.839891962355624e-05, + "loss": 0.001, "step": 389 }, { - "epoch": 0.5838323353293413, - "grad_norm": 0.23062078223568724, - "learning_rate": 1.8626372817449107e-05, - "loss": 0.0035, + "epoch": 0.624, + "grad_norm": 0.11747971500364393, + "learning_rate": 1.8389528040783014e-05, + "loss": 0.0043, "step": 390 }, { - "epoch": 0.5853293413173652, - "grad_norm": 0.4137111601133577, - "learning_rate": 1.8618183007459664e-05, - "loss": 0.0057, + "epoch": 0.6256, + "grad_norm": 0.09249479045992653, + "learning_rate": 1.838011140560562e-05, + "loss": 0.0006, "step": 391 }, { - "epoch": 0.5868263473053892, - "grad_norm": 0.27055771834059683, - "learning_rate": 1.8609970667025633e-05, - "loss": 0.0046, + "epoch": 0.6272, + "grad_norm": 0.10553138594399365, + "learning_rate": 1.8370669746143566e-05, + "loss": 0.0037, "step": 392 }, { - "epoch": 0.5883233532934131, - "grad_norm": 0.1400482751380909, - "learning_rate": 1.8601735817616466e-05, - "loss": 0.0033, + "epoch": 0.6288, + "grad_norm": 0.08443128236771645, + "learning_rate": 1.836120309059107e-05, + "loss": 0.0004, "step": 393 }, { - "epoch": 0.5898203592814372, - "grad_norm": 0.2825851174593088, - "learning_rate": 1.859347848076046e-05, - "loss": 0.0062, + "epoch": 0.6304, + "grad_norm": 0.23335718692317806, + "learning_rate": 1.835171146721701e-05, + "loss": 0.0018, "step": 394 }, { - "epoch": 0.5913173652694611, - "grad_norm": 0.3397946721846426, - "learning_rate": 1.85851986780447e-05, - "loss": 0.006, + "epoch": 0.632, + "grad_norm": 0.20442531679113446, + "learning_rate": 1.8342194904364815e-05, + "loss": 0.0016, "step": 395 }, { - "epoch": 0.592814371257485, - "grad_norm": 0.27215859756652094, - "learning_rate": 1.8576896431115017e-05, - "loss": 0.0043, + "epoch": 0.6336, + "grad_norm": 0.010111401639089001, + "learning_rate": 1.8332653430452375e-05, + "loss": 0.0002, "step": 396 }, { - "epoch": 0.594311377245509, - "grad_norm": 0.15206642506994694, - "learning_rate": 1.8568571761675893e-05, - "loss": 0.0035, + "epoch": 0.6352, + "grad_norm": 0.04480967540704622, + "learning_rate": 1.8323087073971996e-05, + "loss": 0.0004, "step": 397 }, { - "epoch": 0.5958083832335329, - "grad_norm": 0.177981847566587, - "learning_rate": 1.8560224691490452e-05, - "loss": 0.0051, + "epoch": 0.6368, + "grad_norm": 0.05123731112538742, + "learning_rate": 1.831349586349026e-05, + "loss": 0.0005, "step": 398 }, { - "epoch": 0.5973053892215568, - "grad_norm": 0.18243440630634988, - "learning_rate": 1.8551855242380366e-05, - "loss": 0.0028, + "epoch": 0.6384, + "grad_norm": 0.05877538711434608, + "learning_rate": 1.8303879827647977e-05, + "loss": 0.0005, "step": 399 }, { - "epoch": 0.5988023952095808, - "grad_norm": 0.22781016153338413, - "learning_rate": 1.854346343622582e-05, - "loss": 0.0061, + "epoch": 0.64, + "grad_norm": 0.2787404123164975, + "learning_rate": 1.8294238995160093e-05, + "loss": 0.0024, "step": 400 }, { - "epoch": 0.6002994011976048, - "grad_norm": 0.14798262053026728, - "learning_rate": 1.8535049294965436e-05, - "loss": 0.0028, + "epoch": 0.6416, + "grad_norm": 0.14221635930646565, + "learning_rate": 1.8284573394815596e-05, + "loss": 0.0022, "step": 401 }, { - "epoch": 0.6017964071856288, - "grad_norm": 0.12876490456064732, - "learning_rate": 1.8526612840596238e-05, - "loss": 0.0029, + "epoch": 0.6432, + "grad_norm": 0.05002110451791554, + "learning_rate": 1.8274883055477436e-05, + "loss": 0.0004, "step": 402 }, { - "epoch": 0.6032934131736527, - "grad_norm": 0.19321532804478075, - "learning_rate": 1.8518154095173583e-05, - "loss": 0.0032, + "epoch": 0.6448, + "grad_norm": 0.03932836435003346, + "learning_rate": 1.826516800608244e-05, + "loss": 0.0005, "step": 403 }, { - "epoch": 0.6047904191616766, - "grad_norm": 0.12070559140749956, - "learning_rate": 1.8509673080811096e-05, - "loss": 0.0033, + "epoch": 0.6464, + "grad_norm": 0.094636335346893, + "learning_rate": 1.8255428275641212e-05, + "loss": 0.0017, "step": 404 }, { - "epoch": 0.6062874251497006, - "grad_norm": 0.18453370264259583, - "learning_rate": 1.8501169819680624e-05, - "loss": 0.005, + "epoch": 0.648, + "grad_norm": 0.04753353819319314, + "learning_rate": 1.8245663893238075e-05, + "loss": 0.0005, "step": 405 }, { - "epoch": 0.6077844311377245, - "grad_norm": 0.1954819685260244, - "learning_rate": 1.849264433401217e-05, - "loss": 0.0036, + "epoch": 0.6496, + "grad_norm": 0.08265945475873478, + "learning_rate": 1.823587488803095e-05, + "loss": 0.0014, "step": 406 }, { - "epoch": 0.6092814371257484, - "grad_norm": 0.15334768361544945, - "learning_rate": 1.8484096646093842e-05, - "loss": 0.0035, + "epoch": 0.6512, + "grad_norm": 0.12295729262860776, + "learning_rate": 1.8226061289251297e-05, + "loss": 0.0015, "step": 407 }, { - "epoch": 0.6107784431137725, - "grad_norm": 0.15359417727888636, - "learning_rate": 1.847552677827179e-05, - "loss": 0.0047, + "epoch": 0.6528, + "grad_norm": 0.139470158108872, + "learning_rate": 1.821622312620401e-05, + "loss": 0.0016, "step": 408 }, { - "epoch": 0.6122754491017964, - "grad_norm": 0.15142354869259234, - "learning_rate": 1.846693475295015e-05, - "loss": 0.0025, + "epoch": 0.6544, + "grad_norm": 0.05305196376502255, + "learning_rate": 1.8206360428267332e-05, + "loss": 0.0004, "step": 409 }, { - "epoch": 0.6137724550898204, - "grad_norm": 0.1305889137402961, - "learning_rate": 1.8458320592590976e-05, - "loss": 0.0039, + "epoch": 0.656, + "grad_norm": 0.018256913526341095, + "learning_rate": 1.8196473224892784e-05, + "loss": 0.0003, "step": 410 }, { - "epoch": 0.6152694610778443, - "grad_norm": 0.24052279035392543, - "learning_rate": 1.8449684319714202e-05, - "loss": 0.0037, + "epoch": 0.6576, + "grad_norm": 0.21253701484431486, + "learning_rate": 1.8186561545605055e-05, + "loss": 0.0024, "step": 411 }, { - "epoch": 0.6167664670658682, - "grad_norm": 0.21510134495795813, - "learning_rate": 1.8441025956897566e-05, - "loss": 0.0047, + "epoch": 0.6592, + "grad_norm": 0.015257919984609451, + "learning_rate": 1.817662542000192e-05, + "loss": 0.0003, "step": 412 }, { - "epoch": 0.6182634730538922, - "grad_norm": 0.1378513852811569, - "learning_rate": 1.8432345526776555e-05, - "loss": 0.0037, + "epoch": 0.6608, + "grad_norm": 0.10312922775285008, + "learning_rate": 1.816666487775416e-05, + "loss": 0.0011, "step": 413 }, { - "epoch": 0.6197604790419161, - "grad_norm": 0.12876211162444984, - "learning_rate": 1.842364305204434e-05, - "loss": 0.0047, + "epoch": 0.6624, + "grad_norm": 0.11624902449700726, + "learning_rate": 1.815667994860547e-05, + "loss": 0.0006, "step": 414 }, { - "epoch": 0.6212574850299402, - "grad_norm": 0.1870068324397345, - "learning_rate": 1.841491855545173e-05, - "loss": 0.0056, + "epoch": 0.664, + "grad_norm": 0.18464390507676323, + "learning_rate": 1.8146670662372353e-05, + "loss": 0.0027, "step": 415 }, { - "epoch": 0.6227544910179641, - "grad_norm": 0.15043391887906607, - "learning_rate": 1.840617205980711e-05, - "loss": 0.0034, + "epoch": 0.6656, + "grad_norm": 0.13216285372914752, + "learning_rate": 1.813663704894407e-05, + "loss": 0.0008, "step": 416 }, { - "epoch": 0.624251497005988, - "grad_norm": 0.1603990561673152, - "learning_rate": 1.8397403587976366e-05, - "loss": 0.0038, + "epoch": 0.6672, + "grad_norm": 0.01942107440009396, + "learning_rate": 1.8126579138282502e-05, + "loss": 0.0003, "step": 417 }, { - "epoch": 0.625748502994012, - "grad_norm": 0.1380674767451333, - "learning_rate": 1.838861316288284e-05, - "loss": 0.0021, + "epoch": 0.6688, + "grad_norm": 0.14540193615014807, + "learning_rate": 1.8116496960422108e-05, + "loss": 0.0008, "step": 418 }, { - "epoch": 0.6272455089820359, - "grad_norm": 0.18002621685496464, - "learning_rate": 1.8379800807507276e-05, - "loss": 0.0041, + "epoch": 0.6704, + "grad_norm": 0.17943893985236523, + "learning_rate": 1.8106390545469797e-05, + "loss": 0.0022, "step": 419 }, { - "epoch": 0.6287425149700598, - "grad_norm": 0.16156445172842446, - "learning_rate": 1.8370966544887734e-05, + "epoch": 0.672, + "grad_norm": 0.2999659081905774, + "learning_rate": 1.809625992360485e-05, "loss": 0.0026, "step": 420 }, { - "epoch": 0.6302395209580839, - "grad_norm": 0.13680763335054835, - "learning_rate": 1.836211039811956e-05, - "loss": 0.0029, + "epoch": 0.6736, + "grad_norm": 0.1345555003175391, + "learning_rate": 1.8086105125078858e-05, + "loss": 0.0044, "step": 421 }, { - "epoch": 0.6317365269461078, - "grad_norm": 0.20502692603268471, - "learning_rate": 1.8353232390355293e-05, - "loss": 0.0045, + "epoch": 0.6752, + "grad_norm": 0.08857667059507833, + "learning_rate": 1.8075926180215576e-05, + "loss": 0.0006, "step": 422 }, { - "epoch": 0.6332335329341318, - "grad_norm": 0.23325599547346149, - "learning_rate": 1.8344332544804647e-05, - "loss": 0.0067, + "epoch": 0.6768, + "grad_norm": 0.07718347252729954, + "learning_rate": 1.8065723119410885e-05, + "loss": 0.0006, "step": 423 }, { - "epoch": 0.6347305389221557, - "grad_norm": 0.29861952724882246, - "learning_rate": 1.8335410884734412e-05, - "loss": 0.006, + "epoch": 0.6784, + "grad_norm": 0.031977559605231326, + "learning_rate": 1.805549597313267e-05, + "loss": 0.0004, "step": 424 }, { - "epoch": 0.6362275449101796, - "grad_norm": 0.1405737685776957, - "learning_rate": 1.832646743346841e-05, - "loss": 0.0025, + "epoch": 0.68, + "grad_norm": 0.11691074486721813, + "learning_rate": 1.804524477192075e-05, + "loss": 0.0015, "step": 425 }, { - "epoch": 0.6377245508982036, - "grad_norm": 0.15314449241818123, - "learning_rate": 1.831750221438743e-05, - "loss": 0.0025, + "epoch": 0.6816, + "grad_norm": 0.06512767211853637, + "learning_rate": 1.803496954638676e-05, + "loss": 0.0006, "step": 426 }, { - "epoch": 0.6392215568862275, - "grad_norm": 0.20564899127887978, - "learning_rate": 1.8308515250929174e-05, - "loss": 0.005, + "epoch": 0.6832, + "grad_norm": 0.11496639985940264, + "learning_rate": 1.8024670327214084e-05, + "loss": 0.003, "step": 427 }, { - "epoch": 0.6407185628742516, - "grad_norm": 0.2250426655272749, - "learning_rate": 1.8299506566588178e-05, - "loss": 0.0063, + "epoch": 0.6848, + "grad_norm": 0.023249205111541698, + "learning_rate": 1.8014347145157757e-05, + "loss": 0.0003, "step": 428 }, { - "epoch": 0.6422155688622755, - "grad_norm": 0.20159344511006713, - "learning_rate": 1.829047618491578e-05, - "loss": 0.0041, + "epoch": 0.6864, + "grad_norm": 0.23618797082446044, + "learning_rate": 1.8004000031044363e-05, + "loss": 0.0019, "step": 429 }, { - "epoch": 0.6437125748502994, - "grad_norm": 0.30367059362305904, - "learning_rate": 1.8281424129520028e-05, - "loss": 0.0045, + "epoch": 0.688, + "grad_norm": 0.006547483779280376, + "learning_rate": 1.799362901577196e-05, + "loss": 0.0002, "step": 430 }, { - "epoch": 0.6452095808383234, - "grad_norm": 0.17853724353758313, - "learning_rate": 1.8272350424065637e-05, - "loss": 0.0037, + "epoch": 0.6896, + "grad_norm": 0.07298271657368005, + "learning_rate": 1.798323413030997e-05, + "loss": 0.0008, "step": 431 }, { - "epoch": 0.6467065868263473, - "grad_norm": 0.13452139718446834, - "learning_rate": 1.8263255092273918e-05, - "loss": 0.0031, + "epoch": 0.6912, + "grad_norm": 0.1512758686758914, + "learning_rate": 1.7972815405699105e-05, + "loss": 0.002, "step": 432 }, { - "epoch": 0.6482035928143712, - "grad_norm": 0.1485870363064914, - "learning_rate": 1.8254138157922726e-05, - "loss": 0.0036, + "epoch": 0.6928, + "grad_norm": 0.03211251447879792, + "learning_rate": 1.796237287305125e-05, + "loss": 0.0003, "step": 433 }, { - "epoch": 0.6497005988023952, - "grad_norm": 0.1369157060039127, - "learning_rate": 1.8244999644846386e-05, - "loss": 0.0049, + "epoch": 0.6944, + "grad_norm": 0.10182772421205202, + "learning_rate": 1.7951906563549397e-05, + "loss": 0.0004, "step": 434 }, { - "epoch": 0.6511976047904192, - "grad_norm": 0.1422170125322691, - "learning_rate": 1.823583957693563e-05, - "loss": 0.0025, + "epoch": 0.696, + "grad_norm": 0.04338935023187058, + "learning_rate": 1.7941416508447537e-05, + "loss": 0.0004, "step": 435 }, { - "epoch": 0.6526946107784432, - "grad_norm": 0.18173894064740984, - "learning_rate": 1.8226657978137556e-05, - "loss": 0.0027, + "epoch": 0.6976, + "grad_norm": 0.1387899079098917, + "learning_rate": 1.793090273907056e-05, + "loss": 0.0018, "step": 436 }, { - "epoch": 0.6541916167664671, - "grad_norm": 0.11382010411529647, - "learning_rate": 1.8217454872455537e-05, - "loss": 0.0029, + "epoch": 0.6992, + "grad_norm": 0.04671281072738134, + "learning_rate": 1.792036528681418e-05, + "loss": 0.0006, "step": 437 }, { - "epoch": 0.655688622754491, - "grad_norm": 0.24972459202509598, - "learning_rate": 1.820823028394918e-05, - "loss": 0.0039, + "epoch": 0.7008, + "grad_norm": 0.04395180681044294, + "learning_rate": 1.7909804183144837e-05, + "loss": 0.0003, "step": 438 }, { - "epoch": 0.657185628742515, - "grad_norm": 0.19783767385058063, - "learning_rate": 1.8198984236734246e-05, - "loss": 0.0043, + "epoch": 0.7024, + "grad_norm": 0.08205063344030432, + "learning_rate": 1.789921945959958e-05, + "loss": 0.0019, "step": 439 }, { - "epoch": 0.6586826347305389, - "grad_norm": 0.2542153997631583, - "learning_rate": 1.8189716754982604e-05, - "loss": 0.0053, + "epoch": 0.704, + "grad_norm": 0.12018305372968996, + "learning_rate": 1.7888611147786003e-05, + "loss": 0.001, "step": 440 }, { - "epoch": 0.6601796407185628, - "grad_norm": 0.16722569392416708, - "learning_rate": 1.8180427862922157e-05, - "loss": 0.004, + "epoch": 0.7056, + "grad_norm": 0.010197674359924965, + "learning_rate": 1.7877979279382135e-05, + "loss": 0.0002, "step": 441 }, { - "epoch": 0.6616766467065869, - "grad_norm": 0.18714906390575894, - "learning_rate": 1.817111758483677e-05, - "loss": 0.0049, + "epoch": 0.7072, + "grad_norm": 0.0930050722342456, + "learning_rate": 1.786732388613635e-05, + "loss": 0.0008, "step": 442 }, { - "epoch": 0.6631736526946108, - "grad_norm": 0.14257993994735135, - "learning_rate": 1.8161785945066235e-05, - "loss": 0.0026, + "epoch": 0.7088, + "grad_norm": 0.07793396487766552, + "learning_rate": 1.7856644999867264e-05, + "loss": 0.0006, "step": 443 }, { - "epoch": 0.6646706586826348, - "grad_norm": 0.16935381936187344, - "learning_rate": 1.8152432968006176e-05, - "loss": 0.0036, + "epoch": 0.7104, + "grad_norm": 0.10577783436370959, + "learning_rate": 1.784594265246366e-05, + "loss": 0.0008, "step": 444 }, { - "epoch": 0.6661676646706587, - "grad_norm": 0.1342503188843719, - "learning_rate": 1.8143058678108002e-05, - "loss": 0.0028, + "epoch": 0.712, + "grad_norm": 0.06379628265447959, + "learning_rate": 1.783521687588437e-05, + "loss": 0.0007, "step": 445 }, { - "epoch": 0.6676646706586826, - "grad_norm": 0.25046636983085335, - "learning_rate": 1.8133663099878846e-05, - "loss": 0.0068, + "epoch": 0.7136, + "grad_norm": 0.013076260307259675, + "learning_rate": 1.782446770215819e-05, + "loss": 0.0002, "step": 446 }, { - "epoch": 0.6691616766467066, - "grad_norm": 0.19961420082825826, - "learning_rate": 1.812424625788149e-05, - "loss": 0.0035, + "epoch": 0.7152, + "grad_norm": 0.04927902298242426, + "learning_rate": 1.781369516338378e-05, + "loss": 0.0006, "step": 447 }, { - "epoch": 0.6706586826347305, - "grad_norm": 0.17568029731674176, - "learning_rate": 1.8114808176734302e-05, - "loss": 0.003, + "epoch": 0.7168, + "grad_norm": 0.03146289331146791, + "learning_rate": 1.7802899291729585e-05, + "loss": 0.0004, "step": 448 }, { - "epoch": 0.6721556886227545, - "grad_norm": 0.17477245221194124, - "learning_rate": 1.8105348881111182e-05, - "loss": 0.0047, + "epoch": 0.7184, + "grad_norm": 0.0437458279896608, + "learning_rate": 1.779208011943371e-05, + "loss": 0.0005, "step": 449 }, { - "epoch": 0.6736526946107785, - "grad_norm": 0.22428133343136628, - "learning_rate": 1.809586839574149e-05, - "loss": 0.0027, + "epoch": 0.72, + "grad_norm": 0.13167614822032764, + "learning_rate": 1.7781237678803845e-05, + "loss": 0.0026, "step": 450 }, { - "epoch": 0.6751497005988024, - "grad_norm": 0.19664866060996913, - "learning_rate": 1.808636674540998e-05, - "loss": 0.0034, + "epoch": 0.7216, + "grad_norm": 0.054018249029343136, + "learning_rate": 1.777037200221717e-05, + "loss": 0.0004, "step": 451 }, { - "epoch": 0.6766467065868264, - "grad_norm": 0.14083033586251215, - "learning_rate": 1.8076843954956736e-05, - "loss": 0.004, + "epoch": 0.7232, + "grad_norm": 0.04532830776342967, + "learning_rate": 1.775948312212024e-05, + "loss": 0.0005, "step": 452 }, { - "epoch": 0.6781437125748503, - "grad_norm": 0.1647022452825819, - "learning_rate": 1.8067300049277113e-05, - "loss": 0.0023, + "epoch": 0.7248, + "grad_norm": 0.03224800812198159, + "learning_rate": 1.77485710710289e-05, + "loss": 0.0004, "step": 453 }, { - "epoch": 0.6796407185628742, - "grad_norm": 0.32202530277073854, - "learning_rate": 1.805773505332166e-05, - "loss": 0.0066, + "epoch": 0.7264, + "grad_norm": 0.04750517727874858, + "learning_rate": 1.7737635881528198e-05, + "loss": 0.0004, "step": 454 }, { - "epoch": 0.6811377245508982, - "grad_norm": 0.14979165173614356, - "learning_rate": 1.8048148992096073e-05, - "loss": 0.0032, + "epoch": 0.728, + "grad_norm": 0.0928588978408594, + "learning_rate": 1.7726677586272263e-05, + "loss": 0.0009, "step": 455 }, { - "epoch": 0.6826347305389222, - "grad_norm": 0.1418060842276349, - "learning_rate": 1.8038541890661106e-05, - "loss": 0.0036, + "epoch": 0.7296, + "grad_norm": 0.08608768777399839, + "learning_rate": 1.7715696217984233e-05, + "loss": 0.0009, "step": 456 }, { - "epoch": 0.6841317365269461, - "grad_norm": 0.16706209980708875, - "learning_rate": 1.802891377413253e-05, - "loss": 0.0019, + "epoch": 0.7312, + "grad_norm": 0.09417667548425367, + "learning_rate": 1.7704691809456142e-05, + "loss": 0.0006, "step": 457 }, { - "epoch": 0.6856287425149701, - "grad_norm": 0.18905559651264367, - "learning_rate": 1.8019264667681053e-05, - "loss": 0.0023, + "epoch": 0.7328, + "grad_norm": 0.02712742007020753, + "learning_rate": 1.7693664393548822e-05, + "loss": 0.0003, "step": 458 }, { - "epoch": 0.687125748502994, - "grad_norm": 0.23786995943332953, - "learning_rate": 1.800959459653224e-05, - "loss": 0.0041, + "epoch": 0.7344, + "grad_norm": 0.1108581252930463, + "learning_rate": 1.7682614003191807e-05, + "loss": 0.0007, "step": 459 }, { - "epoch": 0.688622754491018, - "grad_norm": 0.11727770023510513, - "learning_rate": 1.79999035859665e-05, - "loss": 0.0018, + "epoch": 0.736, + "grad_norm": 0.012332852158620117, + "learning_rate": 1.7671540671383245e-05, + "loss": 0.0002, "step": 460 }, { - "epoch": 0.6901197604790419, - "grad_norm": 0.190583040451331, - "learning_rate": 1.7990191661318942e-05, - "loss": 0.0044, + "epoch": 0.7376, + "grad_norm": 0.1661080884851385, + "learning_rate": 1.766044443118978e-05, + "loss": 0.0014, "step": 461 }, { - "epoch": 0.6916167664670658, - "grad_norm": 0.19155482816471409, - "learning_rate": 1.798045884797938e-05, - "loss": 0.0034, + "epoch": 0.7392, + "grad_norm": 0.15411155835183601, + "learning_rate": 1.764932531574648e-05, + "loss": 0.0014, "step": 462 }, { - "epoch": 0.6931137724550899, - "grad_norm": 0.2432660367460031, - "learning_rate": 1.797070517139223e-05, - "loss": 0.0064, + "epoch": 0.7408, + "grad_norm": 0.06491648792911973, + "learning_rate": 1.76381833582567e-05, + "loss": 0.0004, "step": 463 }, { - "epoch": 0.6946107784431138, - "grad_norm": 0.16246882216240055, - "learning_rate": 1.796093065705644e-05, - "loss": 0.0033, + "epoch": 0.7424, + "grad_norm": 0.1235036068169899, + "learning_rate": 1.762701859199202e-05, + "loss": 0.0013, "step": 464 }, { - "epoch": 0.6961077844311377, - "grad_norm": 0.10829584717187421, - "learning_rate": 1.7951135330525447e-05, - "loss": 0.0021, + "epoch": 0.744, + "grad_norm": 0.03855206819431079, + "learning_rate": 1.761583105029213e-05, + "loss": 0.0004, "step": 465 }, { - "epoch": 0.6976047904191617, - "grad_norm": 0.13169748817522234, - "learning_rate": 1.794131921740709e-05, - "loss": 0.0034, + "epoch": 0.7456, + "grad_norm": 0.05325264463367167, + "learning_rate": 1.7604620766564725e-05, + "loss": 0.0006, "step": 466 }, { - "epoch": 0.6991017964071856, - "grad_norm": 0.22261122260930077, - "learning_rate": 1.7931482343363552e-05, - "loss": 0.0032, + "epoch": 0.7472, + "grad_norm": 0.04346566688047951, + "learning_rate": 1.7593387774285412e-05, + "loss": 0.0004, "step": 467 }, { - "epoch": 0.7005988023952096, - "grad_norm": 0.21429988838318564, - "learning_rate": 1.7921624734111292e-05, - "loss": 0.0064, + "epoch": 0.7488, + "grad_norm": 0.136998305712324, + "learning_rate": 1.7582132106997615e-05, + "loss": 0.0021, "step": 468 }, { - "epoch": 0.7020958083832335, - "grad_norm": 0.15085978142323003, - "learning_rate": 1.7911746415420982e-05, - "loss": 0.0019, + "epoch": 0.7504, + "grad_norm": 0.007818539193188706, + "learning_rate": 1.7570853798312462e-05, + "loss": 0.0002, "step": 469 }, { - "epoch": 0.7035928143712575, - "grad_norm": 0.09480266222976796, - "learning_rate": 1.7901847413117424e-05, - "loss": 0.0018, + "epoch": 0.752, + "grad_norm": 0.01723914456383822, + "learning_rate": 1.7559552881908698e-05, + "loss": 0.0002, "step": 470 }, { - "epoch": 0.7050898203592815, - "grad_norm": 0.1840037147393193, - "learning_rate": 1.7891927753079495e-05, - "loss": 0.0033, + "epoch": 0.7536, + "grad_norm": 0.07756515160296149, + "learning_rate": 1.7548229391532572e-05, + "loss": 0.0005, "step": 471 }, { - "epoch": 0.7065868263473054, - "grad_norm": 0.12251157236145369, - "learning_rate": 1.7881987461240086e-05, - "loss": 0.0019, + "epoch": 0.7552, + "grad_norm": 0.023715500088758376, + "learning_rate": 1.7536883360997743e-05, + "loss": 0.0003, "step": 472 }, { - "epoch": 0.7080838323353293, - "grad_norm": 0.3770740041300004, - "learning_rate": 1.7872026563586015e-05, - "loss": 0.0061, + "epoch": 0.7568, + "grad_norm": 0.13636676401004547, + "learning_rate": 1.7525514824185187e-05, + "loss": 0.0019, "step": 473 }, { - "epoch": 0.7095808383233533, - "grad_norm": 0.14686337723150117, - "learning_rate": 1.7862045086157975e-05, - "loss": 0.0036, + "epoch": 0.7584, + "grad_norm": 0.0144699251390561, + "learning_rate": 1.7514123815043073e-05, + "loss": 0.0002, "step": 474 }, { - "epoch": 0.7110778443113772, - "grad_norm": 0.12202140713465083, - "learning_rate": 1.7852043055050466e-05, - "loss": 0.0028, + "epoch": 0.76, + "grad_norm": 0.02802581269937591, + "learning_rate": 1.750271036758669e-05, + "loss": 0.0002, "step": 475 }, { - "epoch": 0.7125748502994012, - "grad_norm": 0.20749262333762236, - "learning_rate": 1.7842020496411707e-05, - "loss": 0.0021, + "epoch": 0.7616, + "grad_norm": 0.13859051756276258, + "learning_rate": 1.749127451589832e-05, + "loss": 0.001, "step": 476 }, { - "epoch": 0.7140718562874252, - "grad_norm": 0.243735318182604, - "learning_rate": 1.7831977436443594e-05, - "loss": 0.004, + "epoch": 0.7632, + "grad_norm": 0.08188164186474299, + "learning_rate": 1.747981629412715e-05, + "loss": 0.0031, "step": 477 }, { - "epoch": 0.7155688622754491, - "grad_norm": 0.2123776492606604, - "learning_rate": 1.7821913901401616e-05, - "loss": 0.0033, + "epoch": 0.7648, + "grad_norm": 0.1023395386215927, + "learning_rate": 1.7468335736489177e-05, + "loss": 0.001, "step": 478 }, { - "epoch": 0.7170658682634731, - "grad_norm": 0.0860593828548492, - "learning_rate": 1.781182991759479e-05, - "loss": 0.0012, + "epoch": 0.7664, + "grad_norm": 0.233632163259218, + "learning_rate": 1.7456832877267083e-05, + "loss": 0.003, "step": 479 }, { - "epoch": 0.718562874251497, - "grad_norm": 0.17480422685210906, - "learning_rate": 1.780172551138559e-05, - "loss": 0.0037, + "epoch": 0.768, + "grad_norm": 0.40600459683828993, + "learning_rate": 1.7445307750810153e-05, + "loss": 0.0002, "step": 480 }, { - "epoch": 0.7200598802395209, - "grad_norm": 0.2672043506973695, - "learning_rate": 1.779160070918988e-05, - "loss": 0.0046, + "epoch": 0.7696, + "grad_norm": 0.03720264501874191, + "learning_rate": 1.7433760391534166e-05, + "loss": 0.0003, "step": 481 }, { - "epoch": 0.7215568862275449, - "grad_norm": 0.30149827478860836, - "learning_rate": 1.7781455537476848e-05, - "loss": 0.0052, + "epoch": 0.7712, + "grad_norm": 0.0053336116920516185, + "learning_rate": 1.7422190833921284e-05, + "loss": 0.0001, "step": 482 }, { - "epoch": 0.7230538922155688, - "grad_norm": 0.23829191956670007, - "learning_rate": 1.7771290022768933e-05, - "loss": 0.0032, + "epoch": 0.7728, + "grad_norm": 0.010534130055701448, + "learning_rate": 1.741059911251997e-05, + "loss": 0.0002, "step": 483 }, { - "epoch": 0.7245508982035929, - "grad_norm": 0.19471894040554305, - "learning_rate": 1.776110419164176e-05, - "loss": 0.0025, + "epoch": 0.7744, + "grad_norm": 0.0279036924483479, + "learning_rate": 1.7398985261944857e-05, + "loss": 0.0003, "step": 484 }, { - "epoch": 0.7260479041916168, - "grad_norm": 0.3281317127559268, - "learning_rate": 1.7750898070724057e-05, - "loss": 0.0053, + "epoch": 0.776, + "grad_norm": 0.07436924158810333, + "learning_rate": 1.7387349316876668e-05, + "loss": 0.0044, "step": 485 }, { - "epoch": 0.7275449101796407, - "grad_norm": 0.22137467518837572, - "learning_rate": 1.7740671686697605e-05, - "loss": 0.005, + "epoch": 0.7776, + "grad_norm": 0.0782832167728117, + "learning_rate": 1.7375691312062102e-05, + "loss": 0.0021, "step": 486 }, { - "epoch": 0.7290419161676647, - "grad_norm": 0.16708307413649082, - "learning_rate": 1.7730425066297158e-05, - "loss": 0.0037, + "epoch": 0.7792, + "grad_norm": 0.00800420181406178, + "learning_rate": 1.7364011282313732e-05, + "loss": 0.0002, "step": 487 }, { - "epoch": 0.7305389221556886, - "grad_norm": 0.0707938538877929, - "learning_rate": 1.772015823631036e-05, - "loss": 0.0023, + "epoch": 0.7808, + "grad_norm": 0.06008163797598921, + "learning_rate": 1.7352309262509894e-05, + "loss": 0.0006, "step": 488 }, { - "epoch": 0.7320359281437125, - "grad_norm": 0.1183727230604871, - "learning_rate": 1.7709871223577713e-05, - "loss": 0.0024, + "epoch": 0.7824, + "grad_norm": 0.2942563905755528, + "learning_rate": 1.7340585287594605e-05, + "loss": 0.0011, "step": 489 }, { - "epoch": 0.7335329341317365, - "grad_norm": 0.20204690451609672, - "learning_rate": 1.769956405499246e-05, - "loss": 0.0045, + "epoch": 0.784, + "grad_norm": 0.08926818808154893, + "learning_rate": 1.7328839392577422e-05, + "loss": 0.0008, "step": 490 }, { - "epoch": 0.7350299401197605, - "grad_norm": 0.22427020009198373, - "learning_rate": 1.768923675750055e-05, - "loss": 0.0037, + "epoch": 0.7856, + "grad_norm": 0.04506569681041617, + "learning_rate": 1.731707161253338e-05, + "loss": 0.0003, "step": 491 }, { - "epoch": 0.7365269461077845, - "grad_norm": 0.09360542375246439, - "learning_rate": 1.767888935810055e-05, - "loss": 0.0026, + "epoch": 0.7872, + "grad_norm": 0.07750705989037712, + "learning_rate": 1.730528198260285e-05, + "loss": 0.0005, "step": 492 }, { - "epoch": 0.7380239520958084, - "grad_norm": 0.18339744192805107, - "learning_rate": 1.7668521883843586e-05, - "loss": 0.003, + "epoch": 0.7888, + "grad_norm": 0.09605363767215523, + "learning_rate": 1.7293470537991463e-05, + "loss": 0.0007, "step": 493 }, { - "epoch": 0.7395209580838323, - "grad_norm": 0.1079849963238365, - "learning_rate": 1.7658134361833253e-05, - "loss": 0.0019, + "epoch": 0.7904, + "grad_norm": 0.01733481247306688, + "learning_rate": 1.728163731396998e-05, + "loss": 0.0003, "step": 494 }, { - "epoch": 0.7410179640718563, - "grad_norm": 0.1345391487313826, - "learning_rate": 1.764772681922557e-05, - "loss": 0.0027, + "epoch": 0.792, + "grad_norm": 0.06161393660807383, + "learning_rate": 1.7269782345874204e-05, + "loss": 0.0009, "step": 495 }, { - "epoch": 0.7425149700598802, - "grad_norm": 0.14603341686152715, - "learning_rate": 1.7637299283228887e-05, - "loss": 0.0032, + "epoch": 0.7936, + "grad_norm": 0.12245228516343439, + "learning_rate": 1.7257905669104874e-05, + "loss": 0.0005, "step": 496 }, { - "epoch": 0.7440119760479041, - "grad_norm": 0.16829676525727566, - "learning_rate": 1.762685178110382e-05, - "loss": 0.0029, + "epoch": 0.7952, + "grad_norm": 0.14061289559871648, + "learning_rate": 1.7246007319127547e-05, + "loss": 0.0022, "step": 497 }, { - "epoch": 0.7455089820359282, - "grad_norm": 0.2128675734460732, - "learning_rate": 1.7616384340163196e-05, - "loss": 0.0019, + "epoch": 0.7968, + "grad_norm": 0.09233719827904271, + "learning_rate": 1.72340873314725e-05, + "loss": 0.0009, "step": 498 }, { - "epoch": 0.7470059880239521, - "grad_norm": 0.12654769972937424, - "learning_rate": 1.7605896987771957e-05, - "loss": 0.0026, + "epoch": 0.7984, + "grad_norm": 0.009727144786638353, + "learning_rate": 1.7222145741734625e-05, + "loss": 0.0002, "step": 499 }, { - "epoch": 0.7485029940119761, - "grad_norm": 0.05725603798182813, - "learning_rate": 1.75953897513471e-05, - "loss": 0.0011, + "epoch": 0.8, + "grad_norm": 0.15069270935914425, + "learning_rate": 1.721018258557333e-05, + "loss": 0.0023, "step": 500 }, { - "epoch": 0.75, - "grad_norm": 0.12305721333189583, - "learning_rate": 1.7584862658357603e-05, - "loss": 0.002, + "epoch": 0.8016, + "grad_norm": 0.13093952054952349, + "learning_rate": 1.7198197898712402e-05, + "loss": 0.0008, "step": 501 }, { - "epoch": 0.7514970059880239, - "grad_norm": 0.1998661971442016, - "learning_rate": 1.757431573632437e-05, - "loss": 0.0028, + "epoch": 0.8032, + "grad_norm": 0.08970411506665783, + "learning_rate": 1.7186191716939946e-05, + "loss": 0.0006, "step": 502 }, { - "epoch": 0.7529940119760479, - "grad_norm": 0.21334825352182804, - "learning_rate": 1.7563749012820115e-05, - "loss": 0.0042, + "epoch": 0.8048, + "grad_norm": 0.02490276069628502, + "learning_rate": 1.717416407610824e-05, + "loss": 0.0003, "step": 503 }, { - "epoch": 0.7544910179640718, - "grad_norm": 0.08550116259152069, - "learning_rate": 1.7553162515469346e-05, - "loss": 0.0015, + "epoch": 0.8064, + "grad_norm": 0.04979174521877706, + "learning_rate": 1.7162115012133643e-05, + "loss": 0.0004, "step": 504 }, { - "epoch": 0.7559880239520959, - "grad_norm": 0.3173087453070982, - "learning_rate": 1.754255627194826e-05, - "loss": 0.0027, + "epoch": 0.808, + "grad_norm": 0.00856385237921387, + "learning_rate": 1.7150044560996488e-05, + "loss": 0.0002, "step": 505 }, { - "epoch": 0.7574850299401198, - "grad_norm": 0.12840517943608828, - "learning_rate": 1.7531930309984663e-05, - "loss": 0.0021, + "epoch": 0.8096, + "grad_norm": 0.008284973985246698, + "learning_rate": 1.713795275874098e-05, + "loss": 0.0002, "step": 506 }, { - "epoch": 0.7589820359281437, - "grad_norm": 0.22380401998833405, - "learning_rate": 1.752128465735792e-05, - "loss": 0.0042, + "epoch": 0.8112, + "grad_norm": 0.06734893182692521, + "learning_rate": 1.7125839641475074e-05, + "loss": 0.0006, "step": 507 }, { - "epoch": 0.7604790419161677, - "grad_norm": 0.17755574559866727, - "learning_rate": 1.751061934189888e-05, - "loss": 0.0043, + "epoch": 0.8128, + "grad_norm": 0.18508359604792338, + "learning_rate": 1.711370524537037e-05, + "loss": 0.0025, "step": 508 }, { - "epoch": 0.7619760479041916, - "grad_norm": 0.13841142360132738, - "learning_rate": 1.749993439148978e-05, - "loss": 0.0027, + "epoch": 0.8144, + "grad_norm": 0.13809444329979642, + "learning_rate": 1.7101549606662025e-05, + "loss": 0.0011, "step": 509 }, { - "epoch": 0.7634730538922155, - "grad_norm": 0.19949827808136236, - "learning_rate": 1.7489229834064207e-05, - "loss": 0.0015, + "epoch": 0.816, + "grad_norm": 0.5696537447435555, + "learning_rate": 1.7089372761648617e-05, + "loss": 0.0006, "step": 510 }, { - "epoch": 0.7649700598802395, - "grad_norm": 0.08462524659982372, - "learning_rate": 1.747850569760699e-05, - "loss": 0.002, + "epoch": 0.8176, + "grad_norm": 0.02721585821128559, + "learning_rate": 1.7077174746692054e-05, + "loss": 0.0004, "step": 511 }, { - "epoch": 0.7664670658682635, - "grad_norm": 0.185019733009154, - "learning_rate": 1.7467762010154148e-05, - "loss": 0.0028, + "epoch": 0.8192, + "grad_norm": 0.09701224812397982, + "learning_rate": 1.7064955598217463e-05, + "loss": 0.0008, "step": 512 }, { - "epoch": 0.7679640718562875, - "grad_norm": 0.14537973618978656, - "learning_rate": 1.7456998799792822e-05, - "loss": 0.0045, + "epoch": 0.8208, + "grad_norm": 0.049111193053924744, + "learning_rate": 1.7052715352713076e-05, + "loss": 0.0004, "step": 513 }, { - "epoch": 0.7694610778443114, - "grad_norm": 0.0919302359343689, - "learning_rate": 1.7446216094661183e-05, - "loss": 0.0017, + "epoch": 0.8224, + "grad_norm": 0.12230021958553583, + "learning_rate": 1.7040454046730118e-05, + "loss": 0.0009, "step": 514 }, { - "epoch": 0.7709580838323353, - "grad_norm": 0.09806191993657888, - "learning_rate": 1.7435413922948358e-05, - "loss": 0.0015, + "epoch": 0.824, + "grad_norm": 0.012243744364003616, + "learning_rate": 1.7028171716882714e-05, + "loss": 0.0002, "step": 515 }, { - "epoch": 0.7724550898203593, - "grad_norm": 0.17581073699332905, - "learning_rate": 1.742459231289439e-05, - "loss": 0.0036, + "epoch": 0.8256, + "grad_norm": 0.08499201626336292, + "learning_rate": 1.7015868399847768e-05, + "loss": 0.0004, "step": 516 }, { - "epoch": 0.7739520958083832, - "grad_norm": 0.16231931806189956, - "learning_rate": 1.7413751292790113e-05, - "loss": 0.0035, + "epoch": 0.8272, + "grad_norm": 0.27820795609998483, + "learning_rate": 1.7003544132364847e-05, + "loss": 0.0034, "step": 517 }, { - "epoch": 0.7754491017964071, - "grad_norm": 0.13525922667777576, - "learning_rate": 1.740289089097712e-05, - "loss": 0.0031, + "epoch": 0.8288, + "grad_norm": 0.02335971532394588, + "learning_rate": 1.6991198951236088e-05, + "loss": 0.0003, "step": 518 }, { - "epoch": 0.7769461077844312, - "grad_norm": 0.20823952255636147, - "learning_rate": 1.739201113584767e-05, - "loss": 0.0023, + "epoch": 0.8304, + "grad_norm": 0.4862075696612623, + "learning_rate": 1.6978832893326074e-05, + "loss": 0.0024, "step": 519 }, { - "epoch": 0.7784431137724551, - "grad_norm": 0.16208283117260136, - "learning_rate": 1.738111205584462e-05, - "loss": 0.0014, + "epoch": 0.832, + "grad_norm": 0.0493441407586534, + "learning_rate": 1.696644599556173e-05, + "loss": 0.0005, "step": 520 }, { - "epoch": 0.7799401197604791, - "grad_norm": 0.07133110277997594, - "learning_rate": 1.737019367946134e-05, - "loss": 0.0012, + "epoch": 0.8336, + "grad_norm": 0.006952259716357961, + "learning_rate": 1.6954038294932215e-05, + "loss": 0.0002, "step": 521 }, { - "epoch": 0.781437125748503, - "grad_norm": 0.08862054013594506, - "learning_rate": 1.735925603524165e-05, - "loss": 0.0015, + "epoch": 0.8352, + "grad_norm": 0.06091477273979244, + "learning_rate": 1.6941609828488806e-05, + "loss": 0.0004, "step": 522 }, { - "epoch": 0.7829341317365269, - "grad_norm": 0.18494335464634515, - "learning_rate": 1.7348299151779748e-05, - "loss": 0.0023, + "epoch": 0.8368, + "grad_norm": 0.07508272808691038, + "learning_rate": 1.692916063334479e-05, + "loss": 0.003, "step": 523 }, { - "epoch": 0.7844311377245509, - "grad_norm": 0.2671256300047652, - "learning_rate": 1.7337323057720122e-05, - "loss": 0.0047, + "epoch": 0.8384, + "grad_norm": 0.01362913734282188, + "learning_rate": 1.691669074667535e-05, + "loss": 0.0002, "step": 524 }, { - "epoch": 0.7859281437125748, - "grad_norm": 0.2617413925072033, - "learning_rate": 1.7326327781757482e-05, - "loss": 0.0038, + "epoch": 0.84, + "grad_norm": 0.10616161563646664, + "learning_rate": 1.690420020571747e-05, + "loss": 0.0006, "step": 525 }, { - "epoch": 0.7874251497005988, - "grad_norm": 0.125912865428105, - "learning_rate": 1.731531335263669e-05, - "loss": 0.0028, + "epoch": 0.8416, + "grad_norm": 0.03894127123081356, + "learning_rate": 1.689168904776979e-05, + "loss": 0.0003, "step": 526 }, { - "epoch": 0.7889221556886228, - "grad_norm": 0.13267509240686437, - "learning_rate": 1.7304279799152677e-05, - "loss": 0.0033, + "epoch": 0.8432, + "grad_norm": 0.1378168598257233, + "learning_rate": 1.6879157310192537e-05, + "loss": 0.0023, "step": 527 }, { - "epoch": 0.7904191616766467, - "grad_norm": 0.14976117950694603, - "learning_rate": 1.7293227150150366e-05, - "loss": 0.0029, + "epoch": 0.8448, + "grad_norm": 0.05358355520865817, + "learning_rate": 1.686660503040737e-05, + "loss": 0.0005, "step": 528 }, { - "epoch": 0.7919161676646707, - "grad_norm": 0.09070339743848918, - "learning_rate": 1.728215543452461e-05, - "loss": 0.0016, + "epoch": 0.8464, + "grad_norm": 0.041106143597292144, + "learning_rate": 1.685403224589731e-05, + "loss": 0.0004, "step": 529 }, { - "epoch": 0.7934131736526946, - "grad_norm": 0.14490201715247109, - "learning_rate": 1.7271064681220105e-05, - "loss": 0.0016, + "epoch": 0.848, + "grad_norm": 0.019024841308057497, + "learning_rate": 1.6841438994206597e-05, + "loss": 0.0003, "step": 530 }, { - "epoch": 0.7949101796407185, - "grad_norm": 0.22041533329824736, - "learning_rate": 1.725995491923131e-05, - "loss": 0.0048, + "epoch": 0.8496, + "grad_norm": 0.09954312413327895, + "learning_rate": 1.6828825312940594e-05, + "loss": 0.004, "step": 531 }, { - "epoch": 0.7964071856287425, - "grad_norm": 0.18152422489317313, - "learning_rate": 1.724882617760239e-05, - "loss": 0.0024, + "epoch": 0.8512, + "grad_norm": 0.01904708410465074, + "learning_rate": 1.6816191239765668e-05, + "loss": 0.0002, "step": 532 }, { - "epoch": 0.7979041916167665, - "grad_norm": 0.13200938457839367, - "learning_rate": 1.7237678485427117e-05, - "loss": 0.0025, + "epoch": 0.8528, + "grad_norm": 0.057975810389029034, + "learning_rate": 1.6803536812409077e-05, + "loss": 0.0004, "step": 533 }, { - "epoch": 0.7994011976047904, - "grad_norm": 0.2569963553188449, - "learning_rate": 1.7226511871848816e-05, - "loss": 0.003, + "epoch": 0.8544, + "grad_norm": 0.11015783475479203, + "learning_rate": 1.6790862068658863e-05, + "loss": 0.0008, "step": 534 }, { - "epoch": 0.8008982035928144, - "grad_norm": 0.11384609771555894, - "learning_rate": 1.7215326366060266e-05, - "loss": 0.0024, + "epoch": 0.856, + "grad_norm": 0.04375619009470118, + "learning_rate": 1.6778167046363735e-05, + "loss": 0.0003, "step": 535 }, { - "epoch": 0.8023952095808383, - "grad_norm": 0.07638650411484138, - "learning_rate": 1.720412199730365e-05, - "loss": 0.0011, + "epoch": 0.8576, + "grad_norm": 0.03149655971331083, + "learning_rate": 1.6765451783432953e-05, + "loss": 0.0003, "step": 536 }, { - "epoch": 0.8038922155688623, - "grad_norm": 0.12630645830506568, - "learning_rate": 1.719289879487045e-05, - "loss": 0.0017, + "epoch": 0.8592, + "grad_norm": 0.015188077650469912, + "learning_rate": 1.675271631783623e-05, + "loss": 0.0002, "step": 537 }, { - "epoch": 0.8053892215568862, - "grad_norm": 0.17122405410573677, - "learning_rate": 1.7181656788101394e-05, - "loss": 0.0018, + "epoch": 0.8608, + "grad_norm": 0.08937385959607695, + "learning_rate": 1.6739960687603592e-05, + "loss": 0.0017, "step": 538 }, { - "epoch": 0.8068862275449101, - "grad_norm": 0.11978605848911922, - "learning_rate": 1.7170396006386364e-05, - "loss": 0.0022, + "epoch": 0.8624, + "grad_norm": 0.046733492907307606, + "learning_rate": 1.672718493082529e-05, + "loss": 0.0004, "step": 539 }, { - "epoch": 0.8083832335329342, - "grad_norm": 0.22805307525076485, - "learning_rate": 1.7159116479164336e-05, - "loss": 0.0017, + "epoch": 0.864, + "grad_norm": 0.14393512602359823, + "learning_rate": 1.671438908565167e-05, + "loss": 0.0011, "step": 540 }, { - "epoch": 0.8098802395209581, - "grad_norm": 0.042607090167363824, - "learning_rate": 1.7147818235923282e-05, - "loss": 0.001, + "epoch": 0.8656, + "grad_norm": 0.03969373767492347, + "learning_rate": 1.6701573190293076e-05, + "loss": 0.0005, "step": 541 }, { - "epoch": 0.811377245508982, - "grad_norm": 0.3303493277968501, - "learning_rate": 1.71365013062001e-05, - "loss": 0.003, + "epoch": 0.8672, + "grad_norm": 0.07936059414567308, + "learning_rate": 1.6688737283019708e-05, + "loss": 0.0005, "step": 542 }, { - "epoch": 0.812874251497006, - "grad_norm": 0.20139977963643052, - "learning_rate": 1.712516571958055e-05, - "loss": 0.0051, + "epoch": 0.8688, + "grad_norm": 0.03756583107434095, + "learning_rate": 1.667588140216154e-05, + "loss": 0.0003, "step": 543 }, { - "epoch": 0.8143712574850299, - "grad_norm": 0.18852567500825385, - "learning_rate": 1.711381150569917e-05, - "loss": 0.0023, + "epoch": 0.8704, + "grad_norm": 0.06389471297505031, + "learning_rate": 1.6663005586108175e-05, + "loss": 0.0005, "step": 544 }, { - "epoch": 0.8158682634730539, - "grad_norm": 0.1533989779344055, - "learning_rate": 1.7102438694239172e-05, - "loss": 0.0015, + "epoch": 0.872, + "grad_norm": 0.027066712019042576, + "learning_rate": 1.6650109873308763e-05, + "loss": 0.0003, "step": 545 }, { - "epoch": 0.8173652694610778, - "grad_norm": 0.18945005903791085, - "learning_rate": 1.7091047314932418e-05, - "loss": 0.0052, + "epoch": 0.8736, + "grad_norm": 0.07561152443479349, + "learning_rate": 1.663719430227186e-05, + "loss": 0.0006, "step": 546 }, { - "epoch": 0.8188622754491018, - "grad_norm": 0.10940559482924138, - "learning_rate": 1.7079637397559284e-05, - "loss": 0.0036, + "epoch": 0.8752, + "grad_norm": 0.04267934929242688, + "learning_rate": 1.6624258911565312e-05, + "loss": 0.0003, "step": 547 }, { - "epoch": 0.8203592814371258, - "grad_norm": 0.1733647440192747, - "learning_rate": 1.706820897194863e-05, - "loss": 0.0026, + "epoch": 0.8768, + "grad_norm": 0.16933992446835514, + "learning_rate": 1.661130373981617e-05, + "loss": 0.0014, "step": 548 }, { - "epoch": 0.8218562874251497, - "grad_norm": 0.2284794974338924, - "learning_rate": 1.7056762067977696e-05, - "loss": 0.0026, + "epoch": 0.8784, + "grad_norm": 0.25073619762167976, + "learning_rate": 1.6598328825710536e-05, + "loss": 0.0013, "step": 549 }, { - "epoch": 0.8233532934131736, - "grad_norm": 0.14329072634628595, - "learning_rate": 1.7045296715572024e-05, - "loss": 0.0024, + "epoch": 0.88, + "grad_norm": 0.16778477153881266, + "learning_rate": 1.6585334207993475e-05, + "loss": 0.0004, "step": 550 }, { - "epoch": 0.8248502994011976, - "grad_norm": 0.31093763718850237, - "learning_rate": 1.703381294470539e-05, - "loss": 0.0033, + "epoch": 0.8816, + "grad_norm": 0.11826857642808859, + "learning_rate": 1.6572319925468892e-05, + "loss": 0.001, "step": 551 }, { - "epoch": 0.8263473053892215, - "grad_norm": 0.20477098480741565, - "learning_rate": 1.7022310785399722e-05, - "loss": 0.0028, + "epoch": 0.8832, + "grad_norm": 0.04772459571547264, + "learning_rate": 1.65592860169994e-05, + "loss": 0.0003, "step": 552 }, { - "epoch": 0.8278443113772455, - "grad_norm": 0.10382900594838523, - "learning_rate": 1.7010790267725023e-05, - "loss": 0.0017, + "epoch": 0.8848, + "grad_norm": 0.02117988616831091, + "learning_rate": 1.654623252150624e-05, + "loss": 0.0002, "step": 553 }, { - "epoch": 0.8293413173652695, - "grad_norm": 0.10870430271363642, - "learning_rate": 1.6999251421799285e-05, - "loss": 0.0014, + "epoch": 0.8864, + "grad_norm": 0.048392587850409105, + "learning_rate": 1.6533159477969122e-05, + "loss": 0.0004, "step": 554 }, { - "epoch": 0.8308383233532934, - "grad_norm": 0.2938613690271062, - "learning_rate": 1.698769427778842e-05, - "loss": 0.0043, + "epoch": 0.888, + "grad_norm": 0.006232812342251042, + "learning_rate": 1.6520066925426146e-05, + "loss": 0.0001, "step": 555 }, { - "epoch": 0.8323353293413174, - "grad_norm": 0.16919545121928964, - "learning_rate": 1.6976118865906167e-05, - "loss": 0.0024, + "epoch": 0.8896, + "grad_norm": 0.057442234935372385, + "learning_rate": 1.6506954902973657e-05, + "loss": 0.0005, "step": 556 }, { - "epoch": 0.8338323353293413, - "grad_norm": 0.19047981855746474, - "learning_rate": 1.6964525216414033e-05, - "loss": 0.0028, + "epoch": 0.8912, + "grad_norm": 0.2509062616236011, + "learning_rate": 1.6493823449766137e-05, + "loss": 0.0032, "step": 557 }, { - "epoch": 0.8353293413173652, - "grad_norm": 0.15373361819794654, - "learning_rate": 1.6952913359621204e-05, - "loss": 0.0028, + "epoch": 0.8928, + "grad_norm": 0.11471451638158715, + "learning_rate": 1.648067260501611e-05, + "loss": 0.0008, "step": 558 }, { - "epoch": 0.8368263473053892, - "grad_norm": 0.13994232783724012, - "learning_rate": 1.6941283325884458e-05, - "loss": 0.0019, + "epoch": 0.8944, + "grad_norm": 0.24056809953548722, + "learning_rate": 1.6467502407993995e-05, + "loss": 0.0023, "step": 559 }, { - "epoch": 0.8383233532934131, - "grad_norm": 0.20016507933450822, - "learning_rate": 1.6929635145608098e-05, - "loss": 0.0031, + "epoch": 0.896, + "grad_norm": 0.005796607367392201, + "learning_rate": 1.6454312898027992e-05, + "loss": 0.0001, "step": 560 }, { - "epoch": 0.8398203592814372, - "grad_norm": 0.11790210879503302, - "learning_rate": 1.691796884924386e-05, - "loss": 0.0017, + "epoch": 0.8976, + "grad_norm": 0.07955779624413827, + "learning_rate": 1.644110411450398e-05, + "loss": 0.0024, "step": 561 }, { - "epoch": 0.8413173652694611, - "grad_norm": 0.060596148104514636, - "learning_rate": 1.6906284467290852e-05, - "loss": 0.0009, + "epoch": 0.8992, + "grad_norm": 0.0066621476684513466, + "learning_rate": 1.6427876096865394e-05, + "loss": 0.0001, "step": 562 }, { - "epoch": 0.842814371257485, - "grad_norm": 0.11618444614587378, - "learning_rate": 1.6894582030295458e-05, - "loss": 0.0018, + "epoch": 0.9008, + "grad_norm": 0.01414635288312015, + "learning_rate": 1.6414628884613106e-05, + "loss": 0.0002, "step": 563 }, { - "epoch": 0.844311377245509, - "grad_norm": 0.16389923156347438, - "learning_rate": 1.6882861568851258e-05, - "loss": 0.0025, + "epoch": 0.9024, + "grad_norm": 0.05874496851316498, + "learning_rate": 1.6401362517305296e-05, + "loss": 0.0003, "step": 564 }, { - "epoch": 0.8458083832335329, - "grad_norm": 0.2998453874019442, - "learning_rate": 1.6871123113598954e-05, - "loss": 0.0029, + "epoch": 0.904, + "grad_norm": 0.06435940379851529, + "learning_rate": 1.6388077034557355e-05, + "loss": 0.0004, "step": 565 }, { - "epoch": 0.8473053892215568, - "grad_norm": 0.2986815290378479, - "learning_rate": 1.6859366695226303e-05, - "loss": 0.003, + "epoch": 0.9056, + "grad_norm": 0.004431934862705762, + "learning_rate": 1.637477247604175e-05, + "loss": 0.0001, "step": 566 }, { - "epoch": 0.8488023952095808, - "grad_norm": 0.21672636750976904, - "learning_rate": 1.6847592344468e-05, - "loss": 0.0035, + "epoch": 0.9072, + "grad_norm": 0.12964315424935577, + "learning_rate": 1.6361448881487913e-05, + "loss": 0.002, "step": 567 }, { - "epoch": 0.8502994011976048, - "grad_norm": 0.10471468615303521, - "learning_rate": 1.6835800092105647e-05, - "loss": 0.0015, + "epoch": 0.9088, + "grad_norm": 0.0405461289802062, + "learning_rate": 1.6348106290682117e-05, + "loss": 0.0002, "step": 568 }, { - "epoch": 0.8517964071856288, - "grad_norm": 0.12014153904526044, - "learning_rate": 1.682398996896762e-05, - "loss": 0.0016, + "epoch": 0.9104, + "grad_norm": 0.04518253762629917, + "learning_rate": 1.6334744743467366e-05, + "loss": 0.0003, "step": 569 }, { - "epoch": 0.8532934131736527, - "grad_norm": 0.24386062518444432, - "learning_rate": 1.681216200592903e-05, - "loss": 0.0029, + "epoch": 0.912, + "grad_norm": 0.1278136950485491, + "learning_rate": 1.6321364279743267e-05, + "loss": 0.0005, "step": 570 }, { - "epoch": 0.8547904191616766, - "grad_norm": 0.4891409878621194, - "learning_rate": 1.6800316233911627e-05, - "loss": 0.003, + "epoch": 0.9136, + "grad_norm": 0.026243421967412844, + "learning_rate": 1.6307964939465914e-05, + "loss": 0.0003, "step": 571 }, { - "epoch": 0.8562874251497006, - "grad_norm": 0.11158025115959051, - "learning_rate": 1.678845268388371e-05, - "loss": 0.0013, + "epoch": 0.9152, + "grad_norm": 0.005911777918320239, + "learning_rate": 1.6294546762647775e-05, + "loss": 0.0001, "step": 572 }, { - "epoch": 0.8577844311377245, - "grad_norm": 0.16176832999905424, - "learning_rate": 1.677657138686006e-05, - "loss": 0.0029, + "epoch": 0.9168, + "grad_norm": 0.016433329334518097, + "learning_rate": 1.628110978935756e-05, + "loss": 0.0002, "step": 573 }, { - "epoch": 0.8592814371257484, - "grad_norm": 0.2862714242004369, - "learning_rate": 1.6764672373901857e-05, - "loss": 0.0044, + "epoch": 0.9184, + "grad_norm": 0.024671596956701715, + "learning_rate": 1.626765405972011e-05, + "loss": 0.0002, "step": 574 }, { - "epoch": 0.8607784431137725, - "grad_norm": 0.23371137768377162, - "learning_rate": 1.6752755676116586e-05, - "loss": 0.0029, + "epoch": 0.92, + "grad_norm": 0.15693017537160817, + "learning_rate": 1.625417961391628e-05, + "loss": 0.0022, "step": 575 }, { - "epoch": 0.8622754491017964, - "grad_norm": 0.16513171198212837, - "learning_rate": 1.6740821324657983e-05, - "loss": 0.0028, + "epoch": 0.9216, + "grad_norm": 0.1821290033572893, + "learning_rate": 1.6240686492182806e-05, + "loss": 0.0022, "step": 576 }, { - "epoch": 0.8637724550898204, - "grad_norm": 0.0989955200636401, - "learning_rate": 1.672886935072591e-05, - "loss": 0.001, + "epoch": 0.9232, + "grad_norm": 0.019637133645766462, + "learning_rate": 1.62271747348122e-05, + "loss": 0.0002, "step": 577 }, { - "epoch": 0.8652694610778443, - "grad_norm": 0.3256050916149061, - "learning_rate": 1.6716899785566324e-05, - "loss": 0.0041, + "epoch": 0.9248, + "grad_norm": 0.032264525390541345, + "learning_rate": 1.621364438215262e-05, + "loss": 0.0002, "step": 578 }, { - "epoch": 0.8667664670658682, - "grad_norm": 0.2090094852879972, - "learning_rate": 1.670491266047116e-05, - "loss": 0.0032, + "epoch": 0.9264, + "grad_norm": 0.17257532678963658, + "learning_rate": 1.6200095474607753e-05, + "loss": 0.0021, "step": 579 }, { - "epoch": 0.8682634730538922, - "grad_norm": 0.15452654886105557, - "learning_rate": 1.669290800677825e-05, - "loss": 0.0024, + "epoch": 0.928, + "grad_norm": 0.28931998966513617, + "learning_rate": 1.6186528052636692e-05, + "loss": 0.0006, "step": 580 }, { - "epoch": 0.8697604790419161, - "grad_norm": 0.12984740923736884, - "learning_rate": 1.668088585587127e-05, - "loss": 0.0021, + "epoch": 0.9296, + "grad_norm": 0.005154357147886349, + "learning_rate": 1.6172942156753822e-05, + "loss": 0.0001, "step": 581 }, { - "epoch": 0.8712574850299402, - "grad_norm": 0.20135825659725679, - "learning_rate": 1.666884623917963e-05, - "loss": 0.0025, + "epoch": 0.9312, + "grad_norm": 0.11689177832511896, + "learning_rate": 1.6159337827528686e-05, + "loss": 0.0023, "step": 582 }, { - "epoch": 0.8727544910179641, - "grad_norm": 0.2843598279050385, - "learning_rate": 1.6656789188178394e-05, - "loss": 0.0048, + "epoch": 0.9328, + "grad_norm": 0.01065347283461778, + "learning_rate": 1.614571510558588e-05, + "loss": 0.0002, "step": 583 }, { - "epoch": 0.874251497005988, - "grad_norm": 0.18200615608698034, - "learning_rate": 1.664471473438822e-05, - "loss": 0.0032, + "epoch": 0.9344, + "grad_norm": 0.026100680293083087, + "learning_rate": 1.6132074031604917e-05, + "loss": 0.0002, "step": 584 }, { - "epoch": 0.875748502994012, - "grad_norm": 0.10309687291550676, - "learning_rate": 1.6632622909375244e-05, - "loss": 0.0014, + "epoch": 0.936, + "grad_norm": 0.0512772513165749, + "learning_rate": 1.6118414646320115e-05, + "loss": 0.0005, "step": 585 }, { - "epoch": 0.8772455089820359, - "grad_norm": 0.06697443098407523, - "learning_rate": 1.662051374475103e-05, - "loss": 0.0008, + "epoch": 0.9376, + "grad_norm": 0.013347670491282037, + "learning_rate": 1.6104736990520468e-05, + "loss": 0.0002, "step": 586 }, { - "epoch": 0.8787425149700598, - "grad_norm": 0.20351165773382962, - "learning_rate": 1.660838727217246e-05, - "loss": 0.0025, + "epoch": 0.9392, + "grad_norm": 0.0794315824612763, + "learning_rate": 1.6091041105049542e-05, + "loss": 0.0005, "step": 587 }, { - "epoch": 0.8802395209580839, - "grad_norm": 0.19444272969584192, - "learning_rate": 1.6596243523341678e-05, - "loss": 0.0048, + "epoch": 0.9408, + "grad_norm": 0.0879977258359348, + "learning_rate": 1.6077327030805318e-05, + "loss": 0.001, "step": 588 }, { - "epoch": 0.8817365269461078, - "grad_norm": 0.1718287018179276, - "learning_rate": 1.658408253000598e-05, - "loss": 0.0036, + "epoch": 0.9424, + "grad_norm": 0.16486274087034078, + "learning_rate": 1.6063594808740112e-05, + "loss": 0.0025, "step": 589 }, { - "epoch": 0.8832335329341318, - "grad_norm": 0.1473170594956288, - "learning_rate": 1.657190432395776e-05, - "loss": 0.0033, + "epoch": 0.944, + "grad_norm": 0.3474795772948319, + "learning_rate": 1.604984447986042e-05, + "loss": 0.0011, "step": 590 }, { - "epoch": 0.8847305389221557, - "grad_norm": 0.05320979757128209, - "learning_rate": 1.6559708937034394e-05, - "loss": 0.0009, + "epoch": 0.9456, + "grad_norm": 0.013463877091480161, + "learning_rate": 1.6036076085226813e-05, + "loss": 0.0002, "step": 591 }, { - "epoch": 0.8862275449101796, - "grad_norm": 0.1948054351678213, - "learning_rate": 1.654749640111818e-05, - "loss": 0.0027, + "epoch": 0.9472, + "grad_norm": 0.10577053585975192, + "learning_rate": 1.602228966595381e-05, + "loss": 0.0011, "step": 592 }, { - "epoch": 0.8877245508982036, - "grad_norm": 0.1342630451535342, - "learning_rate": 1.6535266748136255e-05, - "loss": 0.0021, + "epoch": 0.9488, + "grad_norm": 0.11085121896124762, + "learning_rate": 1.6008485263209742e-05, + "loss": 0.0006, "step": 593 }, { - "epoch": 0.8892215568862275, - "grad_norm": 0.10784490714170987, - "learning_rate": 1.6523020010060497e-05, - "loss": 0.0027, + "epoch": 0.9504, + "grad_norm": 0.07342027964029993, + "learning_rate": 1.599466291821666e-05, + "loss": 0.001, "step": 594 }, { - "epoch": 0.8907185628742516, - "grad_norm": 0.1546876958105241, - "learning_rate": 1.6510756218907453e-05, - "loss": 0.0022, + "epoch": 0.952, + "grad_norm": 0.01696710108604813, + "learning_rate": 1.598082267225018e-05, + "loss": 0.0002, "step": 595 }, { - "epoch": 0.8922155688622755, - "grad_norm": 0.16370527891824135, - "learning_rate": 1.6498475406738248e-05, - "loss": 0.0023, + "epoch": 0.9536, + "grad_norm": 0.1328848387639668, + "learning_rate": 1.596696456663938e-05, + "loss": 0.0016, "step": 596 }, { - "epoch": 0.8937125748502994, - "grad_norm": 0.09977139023857336, - "learning_rate": 1.648617760565851e-05, - "loss": 0.0009, + "epoch": 0.9552, + "grad_norm": 0.012110705106772179, + "learning_rate": 1.595308864276666e-05, + "loss": 0.0002, "step": 597 }, { - "epoch": 0.8952095808383234, - "grad_norm": 0.14481161212272883, - "learning_rate": 1.647386284781828e-05, - "loss": 0.0026, + "epoch": 0.9568, + "grad_norm": 0.017146486661923956, + "learning_rate": 1.5939194942067647e-05, + "loss": 0.0002, "step": 598 }, { - "epoch": 0.8967065868263473, - "grad_norm": 0.2579988975748116, - "learning_rate": 1.6461531165411924e-05, - "loss": 0.0039, + "epoch": 0.9584, + "grad_norm": 0.027149900419092123, + "learning_rate": 1.592528350603103e-05, + "loss": 0.0003, "step": 599 }, { - "epoch": 0.8982035928143712, - "grad_norm": 0.11325757290508497, - "learning_rate": 1.644918259067806e-05, - "loss": 0.0015, + "epoch": 0.96, + "grad_norm": 0.0346024532568189, + "learning_rate": 1.5911354376198468e-05, + "loss": 0.0003, "step": 600 }, { - "epoch": 0.8997005988023952, - "grad_norm": 0.10693055025728564, - "learning_rate": 1.643681715589946e-05, - "loss": 0.0018, + "epoch": 0.9616, + "grad_norm": 0.11389671831412627, + "learning_rate": 1.5897407594164468e-05, + "loss": 0.0005, "step": 601 }, { - "epoch": 0.9011976047904192, - "grad_norm": 0.10387304790724435, - "learning_rate": 1.6424434893402988e-05, - "loss": 0.0016, + "epoch": 0.9632, + "grad_norm": 0.03419471934683977, + "learning_rate": 1.5883443201576225e-05, + "loss": 0.0003, "step": 602 }, { - "epoch": 0.9026946107784432, - "grad_norm": 0.08953073156215048, - "learning_rate": 1.6412035835559483e-05, - "loss": 0.0008, + "epoch": 0.9648, + "grad_norm": 0.16922390481395708, + "learning_rate": 1.586946124013354e-05, + "loss": 0.0004, "step": 603 }, { - "epoch": 0.9041916167664671, - "grad_norm": 0.1882854229415982, - "learning_rate": 1.6399620014783702e-05, - "loss": 0.0035, + "epoch": 0.9664, + "grad_norm": 0.020426447409353753, + "learning_rate": 1.585546175158868e-05, + "loss": 0.0002, "step": 604 }, { - "epoch": 0.905688622754491, - "grad_norm": 0.05376462878864106, - "learning_rate": 1.6387187463534222e-05, - "loss": 0.0007, + "epoch": 0.968, + "grad_norm": 0.17459777602170934, + "learning_rate": 1.5841444777746232e-05, + "loss": 0.0021, "step": 605 }, { - "epoch": 0.907185628742515, - "grad_norm": 0.2652074145829586, - "learning_rate": 1.6374738214313354e-05, - "loss": 0.0035, + "epoch": 0.9696, + "grad_norm": 0.014776040366925668, + "learning_rate": 1.582741036046301e-05, + "loss": 0.0002, "step": 606 }, { - "epoch": 0.9086826347305389, - "grad_norm": 0.11598887341928915, - "learning_rate": 1.6362272299667078e-05, - "loss": 0.001, + "epoch": 0.9712, + "grad_norm": 0.07247446983177594, + "learning_rate": 1.5813358541647915e-05, + "loss": 0.0004, "step": 607 }, { - "epoch": 0.9101796407185628, - "grad_norm": 0.0870013457319502, - "learning_rate": 1.6349789752184922e-05, - "loss": 0.0013, + "epoch": 0.9728, + "grad_norm": 0.02386915552711786, + "learning_rate": 1.5799289363261815e-05, + "loss": 0.0002, "step": 608 }, { - "epoch": 0.9116766467065869, - "grad_norm": 0.17343366028225063, - "learning_rate": 1.6337290604499913e-05, - "loss": 0.0025, + "epoch": 0.9744, + "grad_norm": 0.2050066873881811, + "learning_rate": 1.578520286731741e-05, + "loss": 0.0024, "step": 609 }, { - "epoch": 0.9131736526946108, - "grad_norm": 0.0859523102698752, - "learning_rate": 1.6324774889288462e-05, - "loss": 0.0018, + "epoch": 0.976, + "grad_norm": 0.09157781403934438, + "learning_rate": 1.5771099095879108e-05, + "loss": 0.0004, "step": 610 }, { - "epoch": 0.9146706586826348, - "grad_norm": 0.07712475151325601, - "learning_rate": 1.6312242639270308e-05, - "loss": 0.0011, + "epoch": 0.9776, + "grad_norm": 0.003624846129716664, + "learning_rate": 1.575697809106292e-05, + "loss": 0.0001, "step": 611 }, { - "epoch": 0.9161676646706587, - "grad_norm": 0.07162451662462921, - "learning_rate": 1.6299693887208404e-05, - "loss": 0.0031, + "epoch": 0.9792, + "grad_norm": 0.010323437128790412, + "learning_rate": 1.5742839895036305e-05, + "loss": 0.0002, "step": 612 }, { - "epoch": 0.9176646706586826, - "grad_norm": 0.1607194705076756, - "learning_rate": 1.628712866590885e-05, - "loss": 0.0021, + "epoch": 0.9808, + "grad_norm": 0.017110117169375917, + "learning_rate": 1.5728684550018066e-05, + "loss": 0.0002, "step": 613 }, { - "epoch": 0.9191616766467066, - "grad_norm": 0.08490453147422353, - "learning_rate": 1.6274547008220804e-05, - "loss": 0.0015, + "epoch": 0.9824, + "grad_norm": 0.4304006186302777, + "learning_rate": 1.571451209827821e-05, + "loss": 0.0047, "step": 614 }, { - "epoch": 0.9206586826347305, - "grad_norm": 0.1508869109445389, - "learning_rate": 1.626194894703638e-05, - "loss": 0.0024, + "epoch": 0.984, + "grad_norm": 0.013547103462652449, + "learning_rate": 1.570032258213783e-05, + "loss": 0.0001, "step": 615 }, { - "epoch": 0.9221556886227545, - "grad_norm": 0.08485546738561354, - "learning_rate": 1.6249334515290593e-05, - "loss": 0.0013, + "epoch": 0.9856, + "grad_norm": 0.060641420331837864, + "learning_rate": 1.5686116043968975e-05, + "loss": 0.0003, "step": 616 }, { - "epoch": 0.9236526946107785, - "grad_norm": 0.16110417372420824, - "learning_rate": 1.6236703745961244e-05, - "loss": 0.0022, + "epoch": 0.9872, + "grad_norm": 0.01713297463284221, + "learning_rate": 1.5671892526194515e-05, + "loss": 0.0002, "step": 617 }, { - "epoch": 0.9251497005988024, - "grad_norm": 0.06607135428636862, - "learning_rate": 1.622405667206885e-05, - "loss": 0.0007, + "epoch": 0.9888, + "grad_norm": 0.29587661098059337, + "learning_rate": 1.565765207128805e-05, + "loss": 0.0015, "step": 618 }, { - "epoch": 0.9266467065868264, - "grad_norm": 0.2938746182376294, - "learning_rate": 1.6211393326676558e-05, - "loss": 0.0031, + "epoch": 0.9904, + "grad_norm": 0.18927537268665268, + "learning_rate": 1.564339472177373e-05, + "loss": 0.0032, "step": 619 }, { - "epoch": 0.9281437125748503, - "grad_norm": 0.14606146095893197, - "learning_rate": 1.6198713742890036e-05, - "loss": 0.0029, + "epoch": 0.992, + "grad_norm": 0.08369681391942292, + "learning_rate": 1.5629120520226163e-05, + "loss": 0.0009, "step": 620 }, { - "epoch": 0.9296407185628742, - "grad_norm": 0.05652377223402481, - "learning_rate": 1.6186017953857423e-05, - "loss": 0.0009, + "epoch": 0.9936, + "grad_norm": 0.01595611295946394, + "learning_rate": 1.561482950927029e-05, + "loss": 0.0002, "step": 621 }, { - "epoch": 0.9311377245508982, - "grad_norm": 0.18019462860068167, - "learning_rate": 1.617330599276921e-05, - "loss": 0.0043, + "epoch": 0.9952, + "grad_norm": 0.03823447390409338, + "learning_rate": 1.560052173158123e-05, + "loss": 0.0002, "step": 622 }, { - "epoch": 0.9326347305389222, - "grad_norm": 0.0804731431661998, - "learning_rate": 1.6160577892858178e-05, - "loss": 0.0012, + "epoch": 0.9968, + "grad_norm": 0.16298578879060066, + "learning_rate": 1.5586197229884185e-05, + "loss": 0.002, "step": 623 }, { - "epoch": 0.9341317365269461, - "grad_norm": 0.23044713115642781, - "learning_rate": 1.6147833687399287e-05, - "loss": 0.0026, + "epoch": 0.9984, + "grad_norm": 0.0541019946743606, + "learning_rate": 1.5571856046954284e-05, + "loss": 0.0005, "step": 624 }, { - "epoch": 0.9356287425149701, - "grad_norm": 0.15040761212205267, - "learning_rate": 1.6135073409709613e-05, - "loss": 0.0033, + "epoch": 1.0, + "grad_norm": 0.02004341106821655, + "learning_rate": 1.5557498225616488e-05, + "loss": 0.0002, "step": 625 }, { - "epoch": 0.937125748502994, - "grad_norm": 0.17617733364441931, - "learning_rate": 1.6122297093148242e-05, - "loss": 0.0021, + "epoch": 1.0016, + "grad_norm": 0.10660463958703306, + "learning_rate": 1.5543123808745418e-05, + "loss": 0.0011, "step": 626 }, { - "epoch": 0.938622754491018, - "grad_norm": 0.2182223824425347, - "learning_rate": 1.6109504771116193e-05, - "loss": 0.0014, + "epoch": 1.0032, + "grad_norm": 0.05934520626247313, + "learning_rate": 1.5528732839265272e-05, + "loss": 0.0005, "step": 627 }, { - "epoch": 0.9401197604790419, - "grad_norm": 0.20104637613306378, - "learning_rate": 1.6096696477056325e-05, - "loss": 0.0025, + "epoch": 1.0048, + "grad_norm": 0.21529588391063953, + "learning_rate": 1.5514325360149668e-05, + "loss": 0.003, "step": 628 }, { - "epoch": 0.9416167664670658, - "grad_norm": 0.10319298502135164, - "learning_rate": 1.608387224445326e-05, - "loss": 0.0016, + "epoch": 1.0064, + "grad_norm": 0.0260550732980306, + "learning_rate": 1.549990141442153e-05, + "loss": 0.0002, "step": 629 }, { - "epoch": 0.9431137724550899, - "grad_norm": 0.1744369089861333, - "learning_rate": 1.607103210683328e-05, - "loss": 0.0019, + "epoch": 1.008, + "grad_norm": 0.08169285818621008, + "learning_rate": 1.5485461045152937e-05, + "loss": 0.0011, "step": 630 }, { - "epoch": 0.9446107784431138, - "grad_norm": 0.1344166064567352, - "learning_rate": 1.6058176097764254e-05, - "loss": 0.0013, + "epoch": 1.0096, + "grad_norm": 0.06427658111081822, + "learning_rate": 1.5471004295465034e-05, + "loss": 0.0005, "step": 631 }, { - "epoch": 0.9461077844311377, - "grad_norm": 0.08853174252976158, - "learning_rate": 1.6045304250855534e-05, - "loss": 0.0013, + "epoch": 1.0112, + "grad_norm": 0.04754833786155076, + "learning_rate": 1.5456531208527868e-05, + "loss": 0.0005, "step": 632 }, { - "epoch": 0.9476047904191617, - "grad_norm": 0.08846484292192285, - "learning_rate": 1.6032416599757894e-05, - "loss": 0.002, + "epoch": 1.0128, + "grad_norm": 0.028872058901928507, + "learning_rate": 1.5442041827560274e-05, + "loss": 0.0003, "step": 633 }, { - "epoch": 0.9491017964071856, - "grad_norm": 0.12884476368073972, - "learning_rate": 1.6019513178163403e-05, - "loss": 0.0011, + "epoch": 1.0144, + "grad_norm": 0.04961506841238305, + "learning_rate": 1.542753619582974e-05, + "loss": 0.0004, "step": 634 }, { - "epoch": 0.9505988023952096, - "grad_norm": 0.15795219235072977, - "learning_rate": 1.6006594019805376e-05, - "loss": 0.0011, + "epoch": 1.016, + "grad_norm": 0.023923799642960132, + "learning_rate": 1.5413014356652287e-05, + "loss": 0.0001, "step": 635 }, { - "epoch": 0.9520958083832335, - "grad_norm": 0.13187039248116258, - "learning_rate": 1.5993659158458255e-05, - "loss": 0.0024, + "epoch": 1.0176, + "grad_norm": 0.12805240672927612, + "learning_rate": 1.5398476353392323e-05, + "loss": 0.0005, "step": 636 }, { - "epoch": 0.9535928143712575, - "grad_norm": 0.15925859946439425, - "learning_rate": 1.5980708627937548e-05, - "loss": 0.0025, + "epoch": 1.0192, + "grad_norm": 0.1158424283730911, + "learning_rate": 1.538392222946255e-05, + "loss": 0.0007, "step": 637 }, { - "epoch": 0.9550898203592815, - "grad_norm": 0.13331313394073413, - "learning_rate": 1.5967742462099717e-05, - "loss": 0.0017, + "epoch": 1.0208, + "grad_norm": 0.058814949979394474, + "learning_rate": 1.5369352028323773e-05, + "loss": 0.0003, "step": 638 }, { - "epoch": 0.9565868263473054, - "grad_norm": 0.08975182166363489, - "learning_rate": 1.59547606948421e-05, - "loss": 0.0014, + "epoch": 1.0224, + "grad_norm": 0.02345027815015694, + "learning_rate": 1.5354765793484834e-05, + "loss": 0.0002, "step": 639 }, { - "epoch": 0.9580838323353293, - "grad_norm": 0.3449291274347138, - "learning_rate": 1.594176336010283e-05, - "loss": 0.0024, + "epoch": 1.024, + "grad_norm": 0.03851324265626075, + "learning_rate": 1.534016356850244e-05, + "loss": 0.0003, "step": 640 }, { - "epoch": 0.9595808383233533, - "grad_norm": 0.06409320220899797, - "learning_rate": 1.592875049186073e-05, - "loss": 0.0011, + "epoch": 1.0256, + "grad_norm": 0.007581656257266726, + "learning_rate": 1.5325545396981053e-05, + "loss": 0.0001, "step": 641 }, { - "epoch": 0.9610778443113772, - "grad_norm": 0.17833684537580516, - "learning_rate": 1.5915722124135227e-05, - "loss": 0.002, + "epoch": 1.0272, + "grad_norm": 0.18789016214748264, + "learning_rate": 1.531091132257275e-05, + "loss": 0.0012, "step": 642 }, { - "epoch": 0.9625748502994012, - "grad_norm": 0.15331420541889684, - "learning_rate": 1.590267829098628e-05, - "loss": 0.002, + "epoch": 1.0288, + "grad_norm": 0.017634275364985862, + "learning_rate": 1.5296261388977107e-05, + "loss": 0.0002, "step": 643 }, { - "epoch": 0.9640718562874252, - "grad_norm": 0.5899661957634779, - "learning_rate": 1.5889619026514272e-05, - "loss": 0.0043, + "epoch": 1.0304, + "grad_norm": 0.005853495436668061, + "learning_rate": 1.528159563994104e-05, + "loss": 0.0001, "step": 644 }, { - "epoch": 0.9655688622754491, - "grad_norm": 0.1657525859558746, - "learning_rate": 1.5876544364859937e-05, - "loss": 0.0022, + "epoch": 1.032, + "grad_norm": 0.031525289383469456, + "learning_rate": 1.52669141192587e-05, + "loss": 0.0002, "step": 645 }, { - "epoch": 0.9670658682634731, - "grad_norm": 0.18379072480540792, - "learning_rate": 1.5863454340204247e-05, - "loss": 0.003, + "epoch": 1.0336, + "grad_norm": 0.006841301694317977, + "learning_rate": 1.5252216870771345e-05, + "loss": 0.0001, "step": 646 }, { - "epoch": 0.968562874251497, - "grad_norm": 0.12209910222944766, - "learning_rate": 1.585034898676835e-05, - "loss": 0.0015, + "epoch": 1.0352, + "grad_norm": 0.0587557732396357, + "learning_rate": 1.5237503938367186e-05, + "loss": 0.0003, "step": 647 }, { - "epoch": 0.9700598802395209, - "grad_norm": 0.10607555085781809, - "learning_rate": 1.583722833881346e-05, - "loss": 0.0019, + "epoch": 1.0368, + "grad_norm": 0.01676862267717498, + "learning_rate": 1.5222775365981272e-05, + "loss": 0.0002, "step": 648 }, { - "epoch": 0.9715568862275449, - "grad_norm": 0.26474190068427933, - "learning_rate": 1.5824092430640784e-05, - "loss": 0.0038, + "epoch": 1.0384, + "grad_norm": 0.013717804585198331, + "learning_rate": 1.5208031197595357e-05, + "loss": 0.0002, "step": 649 }, { - "epoch": 0.9730538922155688, - "grad_norm": 0.4156799624786208, - "learning_rate": 1.5810941296591406e-05, - "loss": 0.0028, + "epoch": 1.04, + "grad_norm": 0.042297522196741995, + "learning_rate": 1.5193271477237761e-05, + "loss": 0.0002, "step": 650 }, { - "epoch": 0.9745508982035929, - "grad_norm": 0.353881703591171, - "learning_rate": 1.5797774971046243e-05, - "loss": 0.0041, + "epoch": 1.0416, + "grad_norm": 0.005830534636442314, + "learning_rate": 1.5178496248983254e-05, + "loss": 0.0001, "step": 651 }, { - "epoch": 0.9760479041916168, - "grad_norm": 0.47331385633169554, - "learning_rate": 1.5784593488425898e-05, - "loss": 0.0036, + "epoch": 1.0432, + "grad_norm": 0.02023393751390376, + "learning_rate": 1.5163705556952912e-05, + "loss": 0.0002, "step": 652 }, { - "epoch": 0.9775449101796407, - "grad_norm": 0.23996897207608175, - "learning_rate": 1.577139688319062e-05, - "loss": 0.0042, + "epoch": 1.0448, + "grad_norm": 0.004917776209081582, + "learning_rate": 1.5148899445313983e-05, + "loss": 0.0001, "step": 653 }, { - "epoch": 0.9790419161676647, - "grad_norm": 0.14040120360537384, - "learning_rate": 1.5758185189840182e-05, - "loss": 0.0015, + "epoch": 1.0464, + "grad_norm": 0.011971086698735709, + "learning_rate": 1.5134077958279764e-05, + "loss": 0.0002, "step": 654 }, { - "epoch": 0.9805389221556886, - "grad_norm": 0.16782002665806564, - "learning_rate": 1.5744958442913804e-05, - "loss": 0.0033, + "epoch": 1.048, + "grad_norm": 0.01749647351425708, + "learning_rate": 1.5119241140109466e-05, + "loss": 0.0002, "step": 655 }, { - "epoch": 0.9820359281437125, - "grad_norm": 0.3548917708393882, - "learning_rate": 1.5731716676990057e-05, - "loss": 0.0037, + "epoch": 1.0496, + "grad_norm": 0.08810717240182654, + "learning_rate": 1.5104389035108078e-05, + "loss": 0.001, "step": 656 }, { - "epoch": 0.9835329341317365, - "grad_norm": 0.19405634340063596, - "learning_rate": 1.571845992668679e-05, - "loss": 0.0024, + "epoch": 1.0512, + "grad_norm": 0.026469620934283282, + "learning_rate": 1.5089521687626243e-05, + "loss": 0.0002, "step": 657 }, { - "epoch": 0.9850299401197605, - "grad_norm": 0.23475543702616705, - "learning_rate": 1.5705188226661013e-05, - "loss": 0.0045, + "epoch": 1.0528, + "grad_norm": 0.051846745706488596, + "learning_rate": 1.5074639142060119e-05, + "loss": 0.0003, "step": 658 }, { - "epoch": 0.9865269461077845, - "grad_norm": 0.36667176478027685, - "learning_rate": 1.569190161160882e-05, - "loss": 0.0039, + "epoch": 1.0544, + "grad_norm": 0.059650765192615585, + "learning_rate": 1.505974144285124e-05, + "loss": 0.0004, "step": 659 }, { - "epoch": 0.9880239520958084, - "grad_norm": 0.1896831017088073, - "learning_rate": 1.5678600116265293e-05, - "loss": 0.0027, + "epoch": 1.056, + "grad_norm": 0.11858662320135964, + "learning_rate": 1.50448286344864e-05, + "loss": 0.0005, "step": 660 }, { - "epoch": 0.9895209580838323, - "grad_norm": 0.10818436139444451, - "learning_rate": 1.566528377540443e-05, - "loss": 0.0021, + "epoch": 1.0576, + "grad_norm": 0.009351544749928867, + "learning_rate": 1.5029900761497507e-05, + "loss": 0.0001, "step": 661 }, { - "epoch": 0.9910179640718563, - "grad_norm": 0.1635417574332313, - "learning_rate": 1.5651952623839028e-05, - "loss": 0.0009, + "epoch": 1.0592, + "grad_norm": 0.1343413427308062, + "learning_rate": 1.501495786846146e-05, + "loss": 0.0011, "step": 662 }, { - "epoch": 0.9925149700598802, - "grad_norm": 0.2787481879240596, - "learning_rate": 1.5638606696420602e-05, - "loss": 0.0034, + "epoch": 1.0608, + "grad_norm": 0.08194886239543721, + "learning_rate": 1.5000000000000002e-05, + "loss": 0.0029, "step": 663 }, { - "epoch": 0.9940119760479041, - "grad_norm": 0.12776549267504195, - "learning_rate": 1.5625246028039308e-05, - "loss": 0.0013, + "epoch": 1.0624, + "grad_norm": 0.11053103566195514, + "learning_rate": 1.4985027200779599e-05, + "loss": 0.0014, "step": 664 }, { - "epoch": 0.9955089820359282, - "grad_norm": 0.19106271050333715, - "learning_rate": 1.5611870653623826e-05, - "loss": 0.0026, + "epoch": 1.064, + "grad_norm": 0.005638188966523121, + "learning_rate": 1.4970039515511303e-05, + "loss": 0.0001, "step": 665 }, { - "epoch": 0.9970059880239521, - "grad_norm": 0.1120405670371837, - "learning_rate": 1.5598480608141286e-05, - "loss": 0.0013, + "epoch": 1.0656, + "grad_norm": 0.050703501379441136, + "learning_rate": 1.4955036988950617e-05, + "loss": 0.0003, "step": 666 }, { - "epoch": 0.9985029940119761, - "grad_norm": 0.06889850891061895, - "learning_rate": 1.5585075926597174e-05, - "loss": 0.0008, + "epoch": 1.0672, + "grad_norm": 0.09081826808070569, + "learning_rate": 1.4940019665897363e-05, + "loss": 0.003, "step": 667 }, { - "epoch": 1.0, - "grad_norm": 0.1249403668339043, - "learning_rate": 1.557165664403524e-05, + "epoch": 1.0688, + "grad_norm": 0.10279802127103428, + "learning_rate": 1.4924987591195548e-05, "loss": 0.0011, "step": 668 }, { - "epoch": 1.001497005988024, - "grad_norm": 0.22792093858397705, - "learning_rate": 1.5558222795537403e-05, - "loss": 0.0021, + "epoch": 1.0704, + "grad_norm": 0.05482382391695781, + "learning_rate": 1.4909940809733223e-05, + "loss": 0.0003, "step": 669 }, { - "epoch": 1.0029940119760479, - "grad_norm": 0.13959837895980517, - "learning_rate": 1.554477441622366e-05, - "loss": 0.0019, + "epoch": 1.072, + "grad_norm": 0.01978226020479878, + "learning_rate": 1.489487936644237e-05, + "loss": 0.0002, "step": 670 }, { - "epoch": 1.0044910179640718, - "grad_norm": 0.11152024170430974, - "learning_rate": 1.5531311541251995e-05, - "loss": 0.0015, + "epoch": 1.0735999999999999, + "grad_norm": 0.0507206114531236, + "learning_rate": 1.4879803306298736e-05, + "loss": 0.0003, "step": 671 }, { - "epoch": 1.0059880239520957, - "grad_norm": 0.16100791789200442, - "learning_rate": 1.55178342058183e-05, - "loss": 0.0039, + "epoch": 1.0752, + "grad_norm": 0.1033269224973426, + "learning_rate": 1.4864712674321733e-05, + "loss": 0.0004, "step": 672 }, { - "epoch": 1.0074850299401197, - "grad_norm": 0.16419979563070153, - "learning_rate": 1.5504342445156253e-05, - "loss": 0.0018, + "epoch": 1.0768, + "grad_norm": 0.03604980553918912, + "learning_rate": 1.4849607515574276e-05, + "loss": 0.0002, "step": 673 }, { - "epoch": 1.0089820359281436, - "grad_norm": 0.18649044296735573, - "learning_rate": 1.549083629453726e-05, - "loss": 0.0029, + "epoch": 1.0784, + "grad_norm": 0.11108493026233916, + "learning_rate": 1.4834487875162657e-05, + "loss": 0.0013, "step": 674 }, { - "epoch": 1.0104790419161676, - "grad_norm": 0.12322267561572196, - "learning_rate": 1.547731578927033e-05, - "loss": 0.0017, + "epoch": 1.08, + "grad_norm": 0.217810679569032, + "learning_rate": 1.4819353798236427e-05, + "loss": 0.0018, "step": 675 }, { - "epoch": 1.0119760479041917, - "grad_norm": 0.1847824809505218, - "learning_rate": 1.5463780964702017e-05, - "loss": 0.002, + "epoch": 1.0816, + "grad_norm": 0.006093427175663653, + "learning_rate": 1.4804205329988226e-05, + "loss": 0.0001, "step": 676 }, { - "epoch": 1.0134730538922156, - "grad_norm": 0.09718374857400291, - "learning_rate": 1.5450231856216294e-05, - "loss": 0.0014, + "epoch": 1.0832, + "grad_norm": 0.0969390115935947, + "learning_rate": 1.4789042515653687e-05, + "loss": 0.0005, "step": 677 }, { - "epoch": 1.0149700598802396, - "grad_norm": 0.11714727810772602, - "learning_rate": 1.5436668499234487e-05, - "loss": 0.0015, + "epoch": 1.0848, + "grad_norm": 0.016953908558966193, + "learning_rate": 1.477386540051127e-05, + "loss": 0.0002, "step": 678 }, { - "epoch": 1.0164670658682635, - "grad_norm": 0.17180981362774483, - "learning_rate": 1.5423090929215164e-05, - "loss": 0.0016, + "epoch": 1.0864, + "grad_norm": 0.009974731053493537, + "learning_rate": 1.4758674029882152e-05, + "loss": 0.0001, "step": 679 }, { - "epoch": 1.0179640718562875, - "grad_norm": 0.1229563131274387, - "learning_rate": 1.5409499181654062e-05, - "loss": 0.0022, + "epoch": 1.088, + "grad_norm": 0.08345962192160167, + "learning_rate": 1.4743468449130065e-05, + "loss": 0.0004, "step": 680 }, { - "epoch": 1.0194610778443114, - "grad_norm": 0.08902729595877537, - "learning_rate": 1.5395893292083967e-05, - "loss": 0.0012, + "epoch": 1.0896, + "grad_norm": 0.11817389598335944, + "learning_rate": 1.4728248703661183e-05, + "loss": 0.0005, "step": 681 }, { - "epoch": 1.0209580838323353, - "grad_norm": 0.08491224818504717, - "learning_rate": 1.5382273296074647e-05, - "loss": 0.0014, + "epoch": 1.0912, + "grad_norm": 0.07616966500087563, + "learning_rate": 1.4713014838923975e-05, + "loss": 0.0011, "step": 682 }, { - "epoch": 1.0224550898203593, - "grad_norm": 0.12463262583052379, - "learning_rate": 1.5368639229232744e-05, - "loss": 0.0012, + "epoch": 1.0928, + "grad_norm": 0.04248575954910725, + "learning_rate": 1.4697766900409076e-05, + "loss": 0.0002, "step": 683 }, { - "epoch": 1.0239520958083832, - "grad_norm": 0.08714987099310947, - "learning_rate": 1.5354991127201682e-05, - "loss": 0.0009, + "epoch": 1.0944, + "grad_norm": 0.028265559311665706, + "learning_rate": 1.4682504933649144e-05, + "loss": 0.0002, "step": 684 }, { - "epoch": 1.0254491017964071, - "grad_norm": 0.10996908003888108, - "learning_rate": 1.5341329025661593e-05, - "loss": 0.0016, + "epoch": 1.096, + "grad_norm": 0.1910697994779768, + "learning_rate": 1.466722898421873e-05, + "loss": 0.0006, "step": 685 }, { - "epoch": 1.026946107784431, - "grad_norm": 0.06688339180384366, - "learning_rate": 1.5327652960329182e-05, - "loss": 0.0011, + "epoch": 1.0976, + "grad_norm": 0.023588905330173503, + "learning_rate": 1.4651939097734132e-05, + "loss": 0.0002, "step": 686 }, { - "epoch": 1.028443113772455, - "grad_norm": 0.11389018736693612, - "learning_rate": 1.5313962966957683e-05, - "loss": 0.0013, + "epoch": 1.0992, + "grad_norm": 0.06545131637159864, + "learning_rate": 1.4636635319853274e-05, + "loss": 0.0006, "step": 687 }, { - "epoch": 1.029940119760479, - "grad_norm": 0.1487541170115952, - "learning_rate": 1.5300259081336733e-05, - "loss": 0.0029, + "epoch": 1.1008, + "grad_norm": 0.21589569803438816, + "learning_rate": 1.4621317696275563e-05, + "loss": 0.0035, "step": 688 }, { - "epoch": 1.031437125748503, - "grad_norm": 0.18232592006619516, - "learning_rate": 1.5286541339292277e-05, - "loss": 0.0019, + "epoch": 1.1024, + "grad_norm": 0.024193057611844735, + "learning_rate": 1.4605986272741748e-05, + "loss": 0.0002, "step": 689 }, { - "epoch": 1.032934131736527, - "grad_norm": 0.09073197180745511, - "learning_rate": 1.5272809776686507e-05, - "loss": 0.001, + "epoch": 1.104, + "grad_norm": 0.017582182124678495, + "learning_rate": 1.4590641095033786e-05, + "loss": 0.0002, "step": 690 }, { - "epoch": 1.034431137724551, - "grad_norm": 0.11198750989897426, - "learning_rate": 1.5259064429417724e-05, - "loss": 0.0011, + "epoch": 1.1056, + "grad_norm": 0.1314061948393412, + "learning_rate": 1.4575282208974704e-05, + "loss": 0.0009, "step": 691 }, { - "epoch": 1.035928143712575, - "grad_norm": 0.13524279713865056, - "learning_rate": 1.5245305333420284e-05, - "loss": 0.0027, + "epoch": 1.1072, + "grad_norm": 0.04667058659825015, + "learning_rate": 1.4559909660428469e-05, + "loss": 0.0002, "step": 692 }, { - "epoch": 1.0374251497005988, - "grad_norm": 0.13473254834146128, - "learning_rate": 1.5231532524664472e-05, - "loss": 0.0013, + "epoch": 1.1088, + "grad_norm": 0.15259160104285077, + "learning_rate": 1.4544523495299843e-05, + "loss": 0.0019, "step": 693 }, { - "epoch": 1.0389221556886228, - "grad_norm": 0.09705487229884018, - "learning_rate": 1.5217746039156426e-05, - "loss": 0.0014, + "epoch": 1.1104, + "grad_norm": 0.15183714545641216, + "learning_rate": 1.4529123759534253e-05, + "loss": 0.0011, "step": 694 }, { - "epoch": 1.0404191616766467, - "grad_norm": 0.12743903961819494, - "learning_rate": 1.5203945912938047e-05, - "loss": 0.0031, + "epoch": 1.112, + "grad_norm": 0.03155705221297175, + "learning_rate": 1.4513710499117648e-05, + "loss": 0.0002, "step": 695 }, { - "epoch": 1.0419161676646707, - "grad_norm": 0.15772232058272861, - "learning_rate": 1.5190132182086887e-05, - "loss": 0.0035, + "epoch": 1.1136, + "grad_norm": 0.00761205137275975, + "learning_rate": 1.4498283760076362e-05, + "loss": 0.0001, "step": 696 }, { - "epoch": 1.0434131736526946, - "grad_norm": 0.11967873813625308, - "learning_rate": 1.5176304882716074e-05, - "loss": 0.0021, + "epoch": 1.1152, + "grad_norm": 0.17988270920033475, + "learning_rate": 1.4482843588476976e-05, + "loss": 0.0009, "step": 697 }, { - "epoch": 1.0449101796407185, - "grad_norm": 0.061224633547482255, - "learning_rate": 1.5162464050974195e-05, + "epoch": 1.1168, + "grad_norm": 0.06711856696888893, + "learning_rate": 1.4467390030426187e-05, "loss": 0.0006, "step": 698 }, { - "epoch": 1.0464071856287425, - "grad_norm": 0.12437711893494492, - "learning_rate": 1.5148609723045227e-05, - "loss": 0.0012, + "epoch": 1.1184, + "grad_norm": 0.007982256658821624, + "learning_rate": 1.445192313207067e-05, + "loss": 0.0001, "step": 699 }, { - "epoch": 1.0479041916167664, - "grad_norm": 0.1745613764437353, - "learning_rate": 1.513474193514842e-05, - "loss": 0.0017, + "epoch": 1.12, + "grad_norm": 0.002927265853794833, + "learning_rate": 1.443644293959693e-05, + "loss": 0.0001, "step": 700 }, { - "epoch": 1.0494011976047903, - "grad_norm": 0.16175781608452677, - "learning_rate": 1.5120860723538224e-05, - "loss": 0.0012, + "epoch": 1.1216, + "grad_norm": 0.10777686557463714, + "learning_rate": 1.4420949499231172e-05, + "loss": 0.0003, "step": 701 }, { - "epoch": 1.0508982035928143, - "grad_norm": 0.10221564506113642, - "learning_rate": 1.5106966124504172e-05, - "loss": 0.002, + "epoch": 1.1232, + "grad_norm": 0.11134637630246084, + "learning_rate": 1.4405442857239151e-05, + "loss": 0.0012, "step": 702 }, { - "epoch": 1.0523952095808382, - "grad_norm": 0.062271705757527857, - "learning_rate": 1.5093058174370798e-05, - "loss": 0.0008, + "epoch": 1.1248, + "grad_norm": 0.017288876298526105, + "learning_rate": 1.4389923059926064e-05, + "loss": 0.0002, "step": 703 }, { - "epoch": 1.0538922155688624, - "grad_norm": 0.10864619286621946, - "learning_rate": 1.5079136909497542e-05, + "epoch": 1.1264, + "grad_norm": 0.10471184371986594, + "learning_rate": 1.437439015363638e-05, "loss": 0.0009, "step": 704 }, { - "epoch": 1.0553892215568863, - "grad_norm": 0.19647706361842368, - "learning_rate": 1.5065202366278657e-05, - "loss": 0.0014, + "epoch": 1.1280000000000001, + "grad_norm": 0.009296799728898991, + "learning_rate": 1.4358844184753713e-05, + "loss": 0.0001, "step": 705 }, { - "epoch": 1.0568862275449102, - "grad_norm": 0.07888279226925543, - "learning_rate": 1.5051254581143095e-05, - "loss": 0.0008, + "epoch": 1.1296, + "grad_norm": 0.20219144885410853, + "learning_rate": 1.4343285199700685e-05, + "loss": 0.0026, "step": 706 }, { - "epoch": 1.0583832335329342, - "grad_norm": 0.12025352331354569, - "learning_rate": 1.5037293590554442e-05, - "loss": 0.0012, + "epoch": 1.1312, + "grad_norm": 0.020697705406549943, + "learning_rate": 1.432771324493879e-05, + "loss": 0.0002, "step": 707 }, { - "epoch": 1.0598802395209581, - "grad_norm": 0.08615367646336641, - "learning_rate": 1.5023319431010798e-05, - "loss": 0.0013, + "epoch": 1.1328, + "grad_norm": 0.0066267694348396385, + "learning_rate": 1.4312128366968244e-05, + "loss": 0.0001, "step": 708 }, { - "epoch": 1.061377245508982, - "grad_norm": 0.14909244953515943, - "learning_rate": 1.5009332139044696e-05, - "loss": 0.0013, + "epoch": 1.1344, + "grad_norm": 0.02044341762711039, + "learning_rate": 1.4296530612327864e-05, + "loss": 0.0002, "step": 709 }, { - "epoch": 1.062874251497006, - "grad_norm": 0.10899940946016365, - "learning_rate": 1.4995331751222992e-05, - "loss": 0.0014, + "epoch": 1.1360000000000001, + "grad_norm": 0.10781695163193859, + "learning_rate": 1.428092002759491e-05, + "loss": 0.0005, "step": 710 }, { - "epoch": 1.06437125748503, - "grad_norm": 0.13011579272063212, - "learning_rate": 1.4981318304146786e-05, - "loss": 0.0026, + "epoch": 1.1376, + "grad_norm": 0.0376769847929488, + "learning_rate": 1.4265296659384956e-05, + "loss": 0.0002, "step": 711 }, { - "epoch": 1.0658682634730539, - "grad_norm": 0.06507001266187337, - "learning_rate": 1.4967291834451317e-05, - "loss": 0.0008, + "epoch": 1.1392, + "grad_norm": 0.019401313477370914, + "learning_rate": 1.4249660554351752e-05, + "loss": 0.0002, "step": 712 }, { - "epoch": 1.0673652694610778, - "grad_norm": 0.1452953575075588, - "learning_rate": 1.4953252378805872e-05, - "loss": 0.0027, + "epoch": 1.1408, + "grad_norm": 0.00674801299827939, + "learning_rate": 1.4234011759187084e-05, + "loss": 0.0001, "step": 713 }, { - "epoch": 1.0688622754491017, - "grad_norm": 0.05183547815818978, - "learning_rate": 1.493919997391368e-05, - "loss": 0.0007, + "epoch": 1.1424, + "grad_norm": 0.01065028286198891, + "learning_rate": 1.4218350320620625e-05, + "loss": 0.0001, "step": 714 }, { - "epoch": 1.0703592814371257, - "grad_norm": 0.07714623310644146, - "learning_rate": 1.492513465651183e-05, - "loss": 0.0008, + "epoch": 1.144, + "grad_norm": 0.05165496788259497, + "learning_rate": 1.4202676285419811e-05, + "loss": 0.0004, "step": 715 }, { - "epoch": 1.0718562874251496, - "grad_norm": 0.1273003039382832, - "learning_rate": 1.4911056463371161e-05, - "loss": 0.0018, + "epoch": 1.1456, + "grad_norm": 0.011510130836619201, + "learning_rate": 1.4186989700389689e-05, + "loss": 0.0001, "step": 716 }, { - "epoch": 1.0733532934131738, - "grad_norm": 0.14647086320390126, - "learning_rate": 1.4896965431296182e-05, - "loss": 0.002, + "epoch": 1.1472, + "grad_norm": 0.044944190679150994, + "learning_rate": 1.4171290612372781e-05, + "loss": 0.0003, "step": 717 }, { - "epoch": 1.0748502994011977, - "grad_norm": 0.11031963684292324, - "learning_rate": 1.4882861597124954e-05, - "loss": 0.0017, + "epoch": 1.1488, + "grad_norm": 0.060448606493635766, + "learning_rate": 1.4155579068248951e-05, + "loss": 0.0003, "step": 718 }, { - "epoch": 1.0763473053892216, - "grad_norm": 0.13321582362567036, - "learning_rate": 1.4868744997729024e-05, - "loss": 0.0015, + "epoch": 1.1504, + "grad_norm": 0.012225954357272079, + "learning_rate": 1.4139855114935253e-05, + "loss": 0.0001, "step": 719 }, { - "epoch": 1.0778443113772456, - "grad_norm": 0.12391394850850491, - "learning_rate": 1.4854615670013295e-05, - "loss": 0.0014, + "epoch": 1.152, + "grad_norm": 0.08028402779935753, + "learning_rate": 1.4124118799385797e-05, + "loss": 0.001, "step": 720 }, { - "epoch": 1.0793413173652695, - "grad_norm": 0.16741971891552657, - "learning_rate": 1.4840473650915952e-05, - "loss": 0.0014, + "epoch": 1.1536, + "grad_norm": 0.059661115677953874, + "learning_rate": 1.410837016859161e-05, + "loss": 0.0004, "step": 721 }, { - "epoch": 1.0808383233532934, - "grad_norm": 0.06084190134638075, - "learning_rate": 1.4826318977408358e-05, - "loss": 0.0008, + "epoch": 1.1552, + "grad_norm": 0.03410361311916781, + "learning_rate": 1.4092609269580498e-05, + "loss": 0.0002, "step": 722 }, { - "epoch": 1.0823353293413174, - "grad_norm": 0.09523046312621482, - "learning_rate": 1.4812151686494962e-05, - "loss": 0.0011, + "epoch": 1.1568, + "grad_norm": 0.11897638231253438, + "learning_rate": 1.4076836149416889e-05, + "loss": 0.0003, "step": 723 }, { - "epoch": 1.0838323353293413, - "grad_norm": 0.2782155057266579, - "learning_rate": 1.4797971815213194e-05, - "loss": 0.0027, + "epoch": 1.1584, + "grad_norm": 0.008149391142795047, + "learning_rate": 1.4061050855201723e-05, + "loss": 0.0001, "step": 724 }, { - "epoch": 1.0853293413173652, - "grad_norm": 0.07866338116148361, - "learning_rate": 1.4783779400633372e-05, - "loss": 0.0019, + "epoch": 1.16, + "grad_norm": 0.04198467796283594, + "learning_rate": 1.4045253434072278e-05, + "loss": 0.0003, "step": 725 }, { - "epoch": 1.0868263473053892, - "grad_norm": 0.08163897393875569, - "learning_rate": 1.4769574479858612e-05, - "loss": 0.0013, + "epoch": 1.1616, + "grad_norm": 0.08246516092578157, + "learning_rate": 1.4029443933202059e-05, + "loss": 0.0034, "step": 726 }, { - "epoch": 1.0883233532934131, - "grad_norm": 0.06549200205338107, - "learning_rate": 1.475535709002472e-05, - "loss": 0.0008, + "epoch": 1.1632, + "grad_norm": 0.014962798528117778, + "learning_rate": 1.4013622399800628e-05, + "loss": 0.0002, "step": 727 }, { - "epoch": 1.089820359281437, - "grad_norm": 0.10112474427638028, - "learning_rate": 1.4741127268300097e-05, - "loss": 0.0015, + "epoch": 1.1648, + "grad_norm": 0.022593058469230293, + "learning_rate": 1.399778888111349e-05, + "loss": 0.0002, "step": 728 }, { - "epoch": 1.091317365269461, - "grad_norm": 0.03922519037579488, - "learning_rate": 1.4726885051885654e-05, - "loss": 0.0005, + "epoch": 1.1663999999999999, + "grad_norm": 0.14837290202030107, + "learning_rate": 1.3981943424421932e-05, + "loss": 0.0016, "step": 729 }, { - "epoch": 1.092814371257485, - "grad_norm": 0.12932978632302858, - "learning_rate": 1.4712630478014696e-05, - "loss": 0.0013, + "epoch": 1.168, + "grad_norm": 0.013417266184973483, + "learning_rate": 1.3966086077042891e-05, + "loss": 0.0001, "step": 730 }, { - "epoch": 1.0943113772455089, - "grad_norm": 0.14927429413650972, - "learning_rate": 1.4698363583952839e-05, - "loss": 0.0019, + "epoch": 1.1696, + "grad_norm": 0.11604785901225578, + "learning_rate": 1.3950216886328818e-05, + "loss": 0.0009, "step": 731 }, { - "epoch": 1.095808383233533, - "grad_norm": 0.08123699864404504, - "learning_rate": 1.4684084406997903e-05, - "loss": 0.0011, + "epoch": 1.1712, + "grad_norm": 0.003183819460730826, + "learning_rate": 1.3934335899667526e-05, + "loss": 0.0001, "step": 732 }, { - "epoch": 1.097305389221557, - "grad_norm": 0.05959853755308904, - "learning_rate": 1.4669792984479828e-05, - "loss": 0.0012, + "epoch": 1.1728, + "grad_norm": 0.006302224762777941, + "learning_rate": 1.3918443164482048e-05, + "loss": 0.0001, "step": 733 }, { - "epoch": 1.098802395209581, - "grad_norm": 0.11908929599820205, - "learning_rate": 1.4655489353760555e-05, - "loss": 0.0014, + "epoch": 1.1743999999999999, + "grad_norm": 0.028227325989654887, + "learning_rate": 1.3902538728230502e-05, + "loss": 0.0002, "step": 734 }, { - "epoch": 1.1002994011976048, - "grad_norm": 0.09093165505220649, - "learning_rate": 1.4641173552233954e-05, - "loss": 0.0008, + "epoch": 1.176, + "grad_norm": 0.07163952091692051, + "learning_rate": 1.3886622638405953e-05, + "loss": 0.0013, "step": 735 }, { - "epoch": 1.1017964071856288, - "grad_norm": 0.11618048676071026, - "learning_rate": 1.4626845617325703e-05, - "loss": 0.0013, + "epoch": 1.1776, + "grad_norm": 0.007588300955138563, + "learning_rate": 1.387069494253626e-05, + "loss": 0.0001, "step": 736 }, { - "epoch": 1.1032934131736527, - "grad_norm": 0.06281753166476825, - "learning_rate": 1.4612505586493204e-05, - "loss": 0.0007, + "epoch": 1.1792, + "grad_norm": 0.004028009467982749, + "learning_rate": 1.3854755688183941e-05, + "loss": 0.0001, "step": 737 }, { - "epoch": 1.1047904191616766, - "grad_norm": 0.2023556555593652, - "learning_rate": 1.4598153497225482e-05, - "loss": 0.002, + "epoch": 1.1808, + "grad_norm": 0.00948987038982055, + "learning_rate": 1.3838804922946027e-05, + "loss": 0.0002, "step": 738 }, { - "epoch": 1.1062874251497006, - "grad_norm": 0.11177065874880013, - "learning_rate": 1.4583789387043083e-05, - "loss": 0.0013, + "epoch": 1.1824, + "grad_norm": 0.03314448760771682, + "learning_rate": 1.3822842694453923e-05, + "loss": 0.0003, "step": 739 }, { - "epoch": 1.1077844311377245, - "grad_norm": 0.14741071238122402, - "learning_rate": 1.4569413293497986e-05, - "loss": 0.0015, + "epoch": 1.184, + "grad_norm": 0.018637812219687012, + "learning_rate": 1.380686905037327e-05, + "loss": 0.0001, "step": 740 }, { - "epoch": 1.1092814371257484, - "grad_norm": 0.24658469283044443, - "learning_rate": 1.455502525417349e-05, - "loss": 0.0012, + "epoch": 1.1856, + "grad_norm": 0.026116887969995242, + "learning_rate": 1.3790884038403796e-05, + "loss": 0.0002, "step": 741 }, { - "epoch": 1.1107784431137724, - "grad_norm": 0.13681115792701468, - "learning_rate": 1.454062530668413e-05, - "loss": 0.0017, + "epoch": 1.1872, + "grad_norm": 0.04576868274139848, + "learning_rate": 1.3774887706279165e-05, + "loss": 0.0002, "step": 742 }, { - "epoch": 1.1122754491017963, - "grad_norm": 0.0834969421467918, - "learning_rate": 1.4526213488675564e-05, - "loss": 0.001, + "epoch": 1.1888, + "grad_norm": 0.21322502984555788, + "learning_rate": 1.375888010176686e-05, + "loss": 0.0012, "step": 743 }, { - "epoch": 1.1137724550898203, - "grad_norm": 0.1940908997890325, - "learning_rate": 1.4511789837824495e-05, - "loss": 0.0015, + "epoch": 1.1904, + "grad_norm": 0.008027445621927178, + "learning_rate": 1.374286127266801e-05, + "loss": 0.0001, "step": 744 }, { - "epoch": 1.1152694610778444, - "grad_norm": 0.10979405041458333, - "learning_rate": 1.4497354391838551e-05, - "loss": 0.0011, + "epoch": 1.192, + "grad_norm": 0.014333097695883089, + "learning_rate": 1.3726831266817278e-05, + "loss": 0.0002, "step": 745 }, { - "epoch": 1.1167664670658684, - "grad_norm": 0.11938289413679191, - "learning_rate": 1.4482907188456198e-05, - "loss": 0.0012, + "epoch": 1.1936, + "grad_norm": 0.01649187155480879, + "learning_rate": 1.3710790132082693e-05, + "loss": 0.0001, "step": 746 }, { - "epoch": 1.1182634730538923, - "grad_norm": 0.1358936092557254, - "learning_rate": 1.4468448265446646e-05, - "loss": 0.0013, + "epoch": 1.1952, + "grad_norm": 0.049496975480294164, + "learning_rate": 1.3694737916365517e-05, + "loss": 0.0001, "step": 747 }, { - "epoch": 1.1197604790419162, - "grad_norm": 0.1354545576585189, - "learning_rate": 1.4453977660609733e-05, - "loss": 0.0014, + "epoch": 1.1968, + "grad_norm": 0.23413285031093056, + "learning_rate": 1.3678674667600102e-05, + "loss": 0.003, "step": 748 }, { - "epoch": 1.1212574850299402, - "grad_norm": 0.08442533262908321, - "learning_rate": 1.4439495411775844e-05, - "loss": 0.001, + "epoch": 1.1984, + "grad_norm": 0.029064562944119434, + "learning_rate": 1.3662600433753746e-05, + "loss": 0.0001, "step": 749 }, { - "epoch": 1.122754491017964, - "grad_norm": 0.07602658081845277, - "learning_rate": 1.4425001556805796e-05, - "loss": 0.0008, + "epoch": 1.2, + "grad_norm": 0.08357082403130471, + "learning_rate": 1.3646515262826551e-05, + "loss": 0.0019, "step": 750 }, { - "epoch": 1.124251497005988, - "grad_norm": 0.13026513890902275, - "learning_rate": 1.4410496133590763e-05, - "loss": 0.0012, + "epoch": 1.2016, + "grad_norm": 0.06813924590320486, + "learning_rate": 1.3630419202851287e-05, + "loss": 0.0007, "step": 751 }, { - "epoch": 1.125748502994012, - "grad_norm": 0.08108549950525631, - "learning_rate": 1.439597918005215e-05, - "loss": 0.0006, + "epoch": 1.2032, + "grad_norm": 0.050105407692925645, + "learning_rate": 1.3614312301893222e-05, + "loss": 0.0003, "step": 752 }, { - "epoch": 1.127245508982036, - "grad_norm": 0.2152317708482709, - "learning_rate": 1.4381450734141505e-05, - "loss": 0.002, + "epoch": 1.2048, + "grad_norm": 0.03927881336852108, + "learning_rate": 1.3598194608050011e-05, + "loss": 0.0002, "step": 753 }, { - "epoch": 1.1287425149700598, - "grad_norm": 0.08773076511235232, - "learning_rate": 1.4366910833840431e-05, - "loss": 0.0021, + "epoch": 1.2064, + "grad_norm": 0.10718101644698451, + "learning_rate": 1.3582066169451535e-05, + "loss": 0.0004, "step": 754 }, { - "epoch": 1.1302395209580838, - "grad_norm": 0.17027037331891098, - "learning_rate": 1.4352359517160462e-05, - "loss": 0.0013, + "epoch": 1.208, + "grad_norm": 0.06591309918475667, + "learning_rate": 1.3565927034259757e-05, + "loss": 0.0004, "step": 755 }, { - "epoch": 1.1317365269461077, - "grad_norm": 0.08918657681225792, - "learning_rate": 1.4337796822142987e-05, - "loss": 0.0015, + "epoch": 1.2096, + "grad_norm": 0.05031545686049723, + "learning_rate": 1.354977725066859e-05, + "loss": 0.0002, "step": 756 }, { - "epoch": 1.1332335329341316, - "grad_norm": 0.1305105215164226, - "learning_rate": 1.4323222786859141e-05, - "loss": 0.0011, + "epoch": 1.2112, + "grad_norm": 0.002031493397625212, + "learning_rate": 1.3533616866903736e-05, + "loss": 0.0001, "step": 757 }, { - "epoch": 1.1347305389221556, - "grad_norm": 0.12593434714214163, - "learning_rate": 1.4308637449409705e-05, - "loss": 0.0011, + "epoch": 1.2128, + "grad_norm": 0.08640031574979783, + "learning_rate": 1.351744593122255e-05, + "loss": 0.0018, "step": 758 }, { - "epoch": 1.1362275449101795, - "grad_norm": 0.3028857079800561, - "learning_rate": 1.4294040847925004e-05, - "loss": 0.0035, + "epoch": 1.2144, + "grad_norm": 0.11112324060376225, + "learning_rate": 1.3501264491913909e-05, + "loss": 0.0011, "step": 759 }, { - "epoch": 1.1377245508982037, - "grad_norm": 0.24334283105322427, - "learning_rate": 1.4279433020564813e-05, - "loss": 0.002, + "epoch": 1.216, + "grad_norm": 0.10879913552858618, + "learning_rate": 1.3485072597298038e-05, + "loss": 0.0013, "step": 760 }, { - "epoch": 1.1392215568862276, - "grad_norm": 0.18593254416098137, - "learning_rate": 1.4264814005518251e-05, - "loss": 0.0033, + "epoch": 1.2176, + "grad_norm": 0.044457695496770214, + "learning_rate": 1.3468870295726399e-05, + "loss": 0.0003, "step": 761 }, { - "epoch": 1.1407185628742516, - "grad_norm": 0.158884487983083, - "learning_rate": 1.4250183841003695e-05, - "loss": 0.0013, + "epoch": 1.2192, + "grad_norm": 0.09420747266028137, + "learning_rate": 1.3452657635581521e-05, + "loss": 0.0004, "step": 762 }, { - "epoch": 1.1422155688622755, - "grad_norm": 0.14632297769339114, - "learning_rate": 1.4235542565268658e-05, - "loss": 0.0015, + "epoch": 1.2208, + "grad_norm": 0.02427716999706509, + "learning_rate": 1.3436434665276865e-05, + "loss": 0.0002, "step": 763 }, { - "epoch": 1.1437125748502994, - "grad_norm": 0.15199709628186464, - "learning_rate": 1.4220890216589708e-05, - "loss": 0.0011, + "epoch": 1.2224, + "grad_norm": 0.009056916304727262, + "learning_rate": 1.342020143325669e-05, + "loss": 0.0001, "step": 764 }, { - "epoch": 1.1452095808383234, - "grad_norm": 0.07701639398769143, - "learning_rate": 1.4206226833272359e-05, - "loss": 0.0011, + "epoch": 1.224, + "grad_norm": 0.08248517704520834, + "learning_rate": 1.3403957987995884e-05, + "loss": 0.0008, "step": 765 }, { - "epoch": 1.1467065868263473, - "grad_norm": 0.06624545307291502, - "learning_rate": 1.4191552453650972e-05, - "loss": 0.0007, + "epoch": 1.2256, + "grad_norm": 0.019999402030378328, + "learning_rate": 1.3387704377999842e-05, + "loss": 0.0002, "step": 766 }, { - "epoch": 1.1482035928143712, - "grad_norm": 0.18898462608830638, - "learning_rate": 1.4176867116088655e-05, - "loss": 0.0017, + "epoch": 1.2272, + "grad_norm": 0.20880613727618944, + "learning_rate": 1.3371440651804313e-05, + "loss": 0.001, "step": 767 }, { - "epoch": 1.1497005988023952, - "grad_norm": 0.11325039624754406, - "learning_rate": 1.4162170858977167e-05, - "loss": 0.0013, + "epoch": 1.2288000000000001, + "grad_norm": 0.05269074816169083, + "learning_rate": 1.335516685797525e-05, + "loss": 0.0003, "step": 768 }, { - "epoch": 1.151197604790419, - "grad_norm": 0.1260621674366222, - "learning_rate": 1.4147463720736813e-05, - "loss": 0.0023, + "epoch": 1.2304, + "grad_norm": 0.029644432614742842, + "learning_rate": 1.3338883045108674e-05, + "loss": 0.0003, "step": 769 }, { - "epoch": 1.152694610778443, - "grad_norm": 0.10527624531422942, - "learning_rate": 1.4132745739816341e-05, - "loss": 0.0008, + "epoch": 1.232, + "grad_norm": 0.05457781914849625, + "learning_rate": 1.3322589261830517e-05, + "loss": 0.0003, "step": 770 }, { - "epoch": 1.154191616766467, - "grad_norm": 0.22598741769102923, - "learning_rate": 1.4118016954692845e-05, - "loss": 0.0022, + "epoch": 1.2336, + "grad_norm": 0.00860704666475476, + "learning_rate": 1.3306285556796494e-05, + "loss": 0.0001, "step": 771 }, { - "epoch": 1.1556886227544911, - "grad_norm": 0.11216311604503693, - "learning_rate": 1.4103277403871667e-05, - "loss": 0.0021, + "epoch": 1.2352, + "grad_norm": 0.03239844982558144, + "learning_rate": 1.328997197869194e-05, + "loss": 0.0002, "step": 772 }, { - "epoch": 1.157185628742515, - "grad_norm": 0.12672041749180385, - "learning_rate": 1.4088527125886294e-05, - "loss": 0.0021, + "epoch": 1.2368000000000001, + "grad_norm": 0.003695385023378297, + "learning_rate": 1.327364857623168e-05, + "loss": 0.0001, "step": 773 }, { - "epoch": 1.158682634730539, - "grad_norm": 0.05731251869782339, - "learning_rate": 1.4073766159298257e-05, - "loss": 0.0006, + "epoch": 1.2384, + "grad_norm": 0.01608456444419951, + "learning_rate": 1.3257315398159865e-05, + "loss": 0.0001, "step": 774 }, { - "epoch": 1.160179640718563, - "grad_norm": 0.12190886299689159, - "learning_rate": 1.4058994542697027e-05, - "loss": 0.0013, + "epoch": 1.24, + "grad_norm": 0.042718393501721276, + "learning_rate": 1.3240972493249846e-05, + "loss": 0.0002, "step": 775 }, { - "epoch": 1.1616766467065869, - "grad_norm": 0.16571654034581215, - "learning_rate": 1.4044212314699919e-05, - "loss": 0.0021, + "epoch": 1.2416, + "grad_norm": 0.053524927786504244, + "learning_rate": 1.3224619910304019e-05, + "loss": 0.0003, "step": 776 }, { - "epoch": 1.1631736526946108, - "grad_norm": 0.12852042640903608, - "learning_rate": 1.402941951395199e-05, - "loss": 0.0017, + "epoch": 1.2432, + "grad_norm": 0.035849151817245385, + "learning_rate": 1.3208257698153677e-05, + "loss": 0.0002, "step": 777 }, { - "epoch": 1.1646706586826348, - "grad_norm": 0.04996364604118305, - "learning_rate": 1.4014616179125935e-05, - "loss": 0.0004, + "epoch": 1.2448, + "grad_norm": 0.05444679154132394, + "learning_rate": 1.3191885905658873e-05, + "loss": 0.0005, "step": 778 }, { - "epoch": 1.1661676646706587, - "grad_norm": 0.22739207612882284, - "learning_rate": 1.3999802348921993e-05, - "loss": 0.0036, + "epoch": 1.2464, + "grad_norm": 0.14300068454824735, + "learning_rate": 1.3175504581708261e-05, + "loss": 0.0035, "step": 779 }, { - "epoch": 1.1676646706586826, - "grad_norm": 0.10329869929004334, - "learning_rate": 1.3984978062067836e-05, - "loss": 0.0008, + "epoch": 1.248, + "grad_norm": 0.007568948806440808, + "learning_rate": 1.3159113775218963e-05, + "loss": 0.0001, "step": 780 }, { - "epoch": 1.1691616766467066, - "grad_norm": 0.16774030611607113, - "learning_rate": 1.3970143357318475e-05, - "loss": 0.0025, + "epoch": 1.2496, + "grad_norm": 0.019533933851628504, + "learning_rate": 1.3142713535136413e-05, + "loss": 0.0002, "step": 781 }, { - "epoch": 1.1706586826347305, - "grad_norm": 0.10032247067801155, - "learning_rate": 1.3955298273456157e-05, - "loss": 0.0008, + "epoch": 1.2511999999999999, + "grad_norm": 0.01251896381864414, + "learning_rate": 1.3126303910434215e-05, + "loss": 0.0002, "step": 782 }, { - "epoch": 1.1721556886227544, - "grad_norm": 0.12720820030778054, - "learning_rate": 1.3940442849290259e-05, - "loss": 0.0028, + "epoch": 1.2528000000000001, + "grad_norm": 0.17193764796753144, + "learning_rate": 1.3109884950114007e-05, + "loss": 0.0017, "step": 783 }, { - "epoch": 1.1736526946107784, - "grad_norm": 0.07501276893929008, - "learning_rate": 1.3925577123657196e-05, - "loss": 0.0013, + "epoch": 1.2544, + "grad_norm": 0.015917363245241088, + "learning_rate": 1.309345670320529e-05, + "loss": 0.0001, "step": 784 }, { - "epoch": 1.1751497005988023, - "grad_norm": 0.13032678151078575, - "learning_rate": 1.3910701135420312e-05, - "loss": 0.0014, + "epoch": 1.256, + "grad_norm": 0.007055309334246423, + "learning_rate": 1.3077019218765306e-05, + "loss": 0.0001, "step": 785 }, { - "epoch": 1.1766467065868262, - "grad_norm": 0.1189117861891301, - "learning_rate": 1.3895814923469781e-05, - "loss": 0.003, + "epoch": 1.2576, + "grad_norm": 0.01697671259128701, + "learning_rate": 1.3060572545878875e-05, + "loss": 0.0002, "step": 786 }, { - "epoch": 1.1781437125748502, - "grad_norm": 0.04852723768916087, - "learning_rate": 1.3880918526722497e-05, - "loss": 0.0006, + "epoch": 1.2591999999999999, + "grad_norm": 0.05641228049039576, + "learning_rate": 1.3044116733658261e-05, + "loss": 0.0002, "step": 787 }, { - "epoch": 1.1796407185628743, - "grad_norm": 0.14044921780427475, - "learning_rate": 1.3866011984121997e-05, - "loss": 0.0016, + "epoch": 1.2608, + "grad_norm": 0.0706754675654207, + "learning_rate": 1.302765183124302e-05, + "loss": 0.0023, "step": 788 }, { - "epoch": 1.1811377245508983, - "grad_norm": 0.10503085747024221, - "learning_rate": 1.3851095334638323e-05, - "loss": 0.0013, + "epoch": 1.2624, + "grad_norm": 0.013578635017518218, + "learning_rate": 1.3011177887799846e-05, + "loss": 0.0002, "step": 789 }, { - "epoch": 1.1826347305389222, - "grad_norm": 0.07687725843746662, - "learning_rate": 1.3836168617267956e-05, - "loss": 0.0007, + "epoch": 1.264, + "grad_norm": 0.006322359265803324, + "learning_rate": 1.2994694952522435e-05, + "loss": 0.0001, "step": 790 }, { - "epoch": 1.1841317365269461, - "grad_norm": 0.13288540054060996, - "learning_rate": 1.3821231871033684e-05, - "loss": 0.0019, + "epoch": 1.2656, + "grad_norm": 0.024743663928371313, + "learning_rate": 1.2978203074631335e-05, + "loss": 0.0002, "step": 791 }, { - "epoch": 1.18562874251497, - "grad_norm": 0.10408705042608069, - "learning_rate": 1.3806285134984525e-05, - "loss": 0.0015, + "epoch": 1.2671999999999999, + "grad_norm": 0.010036392053070036, + "learning_rate": 1.2961702303373795e-05, + "loss": 0.0001, "step": 792 }, { - "epoch": 1.187125748502994, - "grad_norm": 0.09545112906104483, - "learning_rate": 1.37913284481956e-05, - "loss": 0.0013, + "epoch": 1.2688, + "grad_norm": 0.059398968783332944, + "learning_rate": 1.2945192688023625e-05, + "loss": 0.0003, "step": 793 }, { - "epoch": 1.188622754491018, - "grad_norm": 0.19426107849435, - "learning_rate": 1.3776361849768056e-05, - "loss": 0.002, + "epoch": 1.2704, + "grad_norm": 0.1360718622443351, + "learning_rate": 1.2928674277881041e-05, + "loss": 0.0006, "step": 794 }, { - "epoch": 1.1901197604790419, - "grad_norm": 0.1508486355195202, - "learning_rate": 1.3761385378828946e-05, - "loss": 0.0016, + "epoch": 1.272, + "grad_norm": 0.006056760144497216, + "learning_rate": 1.2912147122272523e-05, + "loss": 0.0001, "step": 795 }, { - "epoch": 1.1916167664670658, - "grad_norm": 0.11380042520519763, - "learning_rate": 1.3746399074531139e-05, - "loss": 0.0012, + "epoch": 1.2736, + "grad_norm": 0.0031045773593651495, + "learning_rate": 1.2895611270550666e-05, + "loss": 0.0001, "step": 796 }, { - "epoch": 1.1931137724550898, - "grad_norm": 0.06272949618809354, - "learning_rate": 1.3731402976053203e-05, - "loss": 0.0009, + "epoch": 1.2752, + "grad_norm": 0.013723013938490469, + "learning_rate": 1.287906677209403e-05, + "loss": 0.0001, "step": 797 }, { - "epoch": 1.1946107784431137, - "grad_norm": 0.04908194402100617, - "learning_rate": 1.3716397122599311e-05, - "loss": 0.0004, + "epoch": 1.2768, + "grad_norm": 0.11625251974105492, + "learning_rate": 1.2862513676307009e-05, + "loss": 0.0001, "step": 798 }, { - "epoch": 1.1961077844311376, - "grad_norm": 0.04835379010726109, - "learning_rate": 1.3701381553399147e-05, - "loss": 0.0005, + "epoch": 1.2784, + "grad_norm": 0.023632751991143754, + "learning_rate": 1.2845952032619651e-05, + "loss": 0.0001, "step": 799 }, { - "epoch": 1.1976047904191618, - "grad_norm": 0.07282438701791324, - "learning_rate": 1.3686356307707784e-05, - "loss": 0.0011, + "epoch": 1.28, + "grad_norm": 0.019534406451617272, + "learning_rate": 1.2829381890487536e-05, + "loss": 0.0002, "step": 800 }, { - "epoch": 1.1991017964071857, - "grad_norm": 0.07359224976031348, - "learning_rate": 1.3671321424805602e-05, + "epoch": 1.2816, + "grad_norm": 0.13935122782766068, + "learning_rate": 1.2812803299391629e-05, "loss": 0.0008, "step": 801 }, { - "epoch": 1.2005988023952097, - "grad_norm": 0.0728588455334413, - "learning_rate": 1.365627694399817e-05, - "loss": 0.0011, + "epoch": 1.2832, + "grad_norm": 0.008055208167228007, + "learning_rate": 1.2796216308838116e-05, + "loss": 0.0001, "step": 802 }, { - "epoch": 1.2020958083832336, - "grad_norm": 0.08572484612904316, - "learning_rate": 1.3641222904616149e-05, - "loss": 0.0019, + "epoch": 1.2848, + "grad_norm": 0.01074330815404068, + "learning_rate": 1.2779620968358276e-05, + "loss": 0.0001, "step": 803 }, { - "epoch": 1.2035928143712575, - "grad_norm": 0.10340010986651034, - "learning_rate": 1.3626159346015193e-05, - "loss": 0.0009, + "epoch": 1.2864, + "grad_norm": 0.04719274161820858, + "learning_rate": 1.2763017327508304e-05, + "loss": 0.0003, "step": 804 }, { - "epoch": 1.2050898203592815, - "grad_norm": 0.07243035348418118, - "learning_rate": 1.3611086307575831e-05, - "loss": 0.0008, + "epoch": 1.288, + "grad_norm": 0.10415338321560368, + "learning_rate": 1.2746405435869198e-05, + "loss": 0.0022, "step": 805 }, { - "epoch": 1.2065868263473054, - "grad_norm": 0.1334209046458507, - "learning_rate": 1.3596003828703393e-05, - "loss": 0.0019, + "epoch": 1.2896, + "grad_norm": 0.011459211797809638, + "learning_rate": 1.2729785343046587e-05, + "loss": 0.0001, "step": 806 }, { - "epoch": 1.2080838323353293, - "grad_norm": 0.0570772989713065, - "learning_rate": 1.3580911948827868e-05, - "loss": 0.001, + "epoch": 1.2912, + "grad_norm": 0.0036652103191051693, + "learning_rate": 1.271315709867059e-05, + "loss": 0.0001, "step": 807 }, { - "epoch": 1.2095808383233533, - "grad_norm": 0.1204536054401414, - "learning_rate": 1.3565810707403844e-05, - "loss": 0.0009, + "epoch": 1.2928, + "grad_norm": 0.0025570224469476416, + "learning_rate": 1.2696520752395671e-05, + "loss": 0.0001, "step": 808 }, { - "epoch": 1.2110778443113772, - "grad_norm": 0.1867883260775183, - "learning_rate": 1.3550700143910365e-05, - "loss": 0.0025, + "epoch": 1.2944, + "grad_norm": 0.0036595771184137252, + "learning_rate": 1.2679876353900482e-05, + "loss": 0.0001, "step": 809 }, { - "epoch": 1.2125748502994012, - "grad_norm": 0.07727972942708328, - "learning_rate": 1.3535580297850852e-05, - "loss": 0.0007, + "epoch": 1.296, + "grad_norm": 0.11645277140286601, + "learning_rate": 1.2663223952887724e-05, + "loss": 0.0008, "step": 810 }, { - "epoch": 1.214071856287425, - "grad_norm": 0.09466637885042935, - "learning_rate": 1.3520451208752992e-05, - "loss": 0.0005, + "epoch": 1.2976, + "grad_norm": 0.0041716206625464205, + "learning_rate": 1.2646563599083997e-05, + "loss": 0.0001, "step": 811 }, { - "epoch": 1.215568862275449, - "grad_norm": 0.1580780788542899, - "learning_rate": 1.3505312916168644e-05, - "loss": 0.001, + "epoch": 1.2992, + "grad_norm": 0.028390432744085866, + "learning_rate": 1.2629895342239643e-05, + "loss": 0.0002, "step": 812 }, { - "epoch": 1.217065868263473, - "grad_norm": 0.10526784028411676, - "learning_rate": 1.3490165459673716e-05, + "epoch": 1.3008, + "grad_norm": 0.08071690095947365, + "learning_rate": 1.2613219232128608e-05, "loss": 0.001, "step": 813 }, { - "epoch": 1.218562874251497, - "grad_norm": 0.12653380982480567, - "learning_rate": 1.3475008878868078e-05, - "loss": 0.0013, + "epoch": 1.3024, + "grad_norm": 0.10505099790295679, + "learning_rate": 1.2596535318548288e-05, + "loss": 0.0011, "step": 814 }, { - "epoch": 1.220059880239521, - "grad_norm": 0.069894431976694, - "learning_rate": 1.345984321337546e-05, - "loss": 0.0014, + "epoch": 1.304, + "grad_norm": 0.0049778696815799125, + "learning_rate": 1.2579843651319382e-05, + "loss": 0.0001, "step": 815 }, { - "epoch": 1.221556886227545, - "grad_norm": 0.11138606082666604, - "learning_rate": 1.344466850284333e-05, - "loss": 0.001, + "epoch": 1.3056, + "grad_norm": 0.0043848840789224, + "learning_rate": 1.2563144280285742e-05, + "loss": 0.0001, "step": 816 }, { - "epoch": 1.223053892215569, - "grad_norm": 0.09549692513609892, - "learning_rate": 1.342948478694281e-05, - "loss": 0.0012, + "epoch": 1.3072, + "grad_norm": 0.020550136184270512, + "learning_rate": 1.2546437255314223e-05, + "loss": 0.0001, "step": 817 }, { - "epoch": 1.2245508982035929, - "grad_norm": 0.08071969012117705, - "learning_rate": 1.3414292105368562e-05, - "loss": 0.0011, + "epoch": 1.3088, + "grad_norm": 0.004432220253725601, + "learning_rate": 1.252972262629454e-05, + "loss": 0.0001, "step": 818 }, { - "epoch": 1.2260479041916168, - "grad_norm": 0.014969590068211934, - "learning_rate": 1.339909049783869e-05, - "loss": 0.0002, + "epoch": 1.3104, + "grad_norm": 0.01302457137434428, + "learning_rate": 1.2513000443139112e-05, + "loss": 0.0001, "step": 819 }, { - "epoch": 1.2275449101796407, - "grad_norm": 0.07241700618747784, - "learning_rate": 1.338388000409463e-05, - "loss": 0.0005, + "epoch": 1.312, + "grad_norm": 0.21882702602859708, + "learning_rate": 1.2496270755782913e-05, + "loss": 0.0045, "step": 820 }, { - "epoch": 1.2290419161676647, - "grad_norm": 0.11075869601766639, - "learning_rate": 1.3368660663901045e-05, - "loss": 0.0023, + "epoch": 1.3136, + "grad_norm": 0.011649467674618524, + "learning_rate": 1.2479533614183334e-05, + "loss": 0.0001, "step": 821 }, { - "epoch": 1.2305389221556886, - "grad_norm": 0.17060039325631357, - "learning_rate": 1.3353432517045739e-05, - "loss": 0.0016, + "epoch": 1.3152, + "grad_norm": 0.04260599118754293, + "learning_rate": 1.2462789068320016e-05, + "loss": 0.0003, "step": 822 }, { - "epoch": 1.2320359281437125, - "grad_norm": 0.1022635862904923, - "learning_rate": 1.3338195603339525e-05, - "loss": 0.0011, + "epoch": 1.3168, + "grad_norm": 0.009341435049419606, + "learning_rate": 1.2446037168194716e-05, + "loss": 0.0001, "step": 823 }, { - "epoch": 1.2335329341317365, - "grad_norm": 0.09158836544730396, - "learning_rate": 1.3322949962616142e-05, - "loss": 0.0006, + "epoch": 1.3184, + "grad_norm": 0.014751816702529163, + "learning_rate": 1.2429277963831147e-05, + "loss": 0.0002, "step": 824 }, { - "epoch": 1.2350299401197604, - "grad_norm": 0.048832084216327266, - "learning_rate": 1.3307695634732144e-05, - "loss": 0.0005, + "epoch": 1.32, + "grad_norm": 0.008743327925641906, + "learning_rate": 1.2412511505274845e-05, + "loss": 0.0001, "step": 825 }, { - "epoch": 1.2365269461077844, - "grad_norm": 0.08830232598087197, - "learning_rate": 1.3292432659566791e-05, - "loss": 0.0007, + "epoch": 1.3216, + "grad_norm": 0.003562158556219341, + "learning_rate": 1.2395737842592997e-05, + "loss": 0.0001, "step": 826 }, { - "epoch": 1.2380239520958083, - "grad_norm": 0.052016398871730235, - "learning_rate": 1.3277161077021957e-05, - "loss": 0.0005, + "epoch": 1.3232, + "grad_norm": 0.13376418839075693, + "learning_rate": 1.23789570258743e-05, + "loss": 0.0019, "step": 827 }, { - "epoch": 1.2395209580838324, - "grad_norm": 0.12716710033758089, - "learning_rate": 1.3261880927022008e-05, - "loss": 0.001, + "epoch": 1.3248, + "grad_norm": 0.019777762410234753, + "learning_rate": 1.2362169105228828e-05, + "loss": 0.0002, "step": 828 }, { - "epoch": 1.2410179640718564, - "grad_norm": 0.08239866964993203, - "learning_rate": 1.3246592249513717e-05, - "loss": 0.0008, + "epoch": 1.3264, + "grad_norm": 0.006502459797443767, + "learning_rate": 1.2345374130787855e-05, + "loss": 0.0001, "step": 829 }, { - "epoch": 1.2425149700598803, - "grad_norm": 0.07577545667183459, - "learning_rate": 1.3231295084466149e-05, - "loss": 0.0008, + "epoch": 1.328, + "grad_norm": 0.009005732145788963, + "learning_rate": 1.2328572152703726e-05, + "loss": 0.0001, "step": 830 }, { - "epoch": 1.2440119760479043, - "grad_norm": 0.08810579822674741, - "learning_rate": 1.3215989471870556e-05, - "loss": 0.0009, + "epoch": 1.3296000000000001, + "grad_norm": 0.003715792304581145, + "learning_rate": 1.23117632211497e-05, + "loss": 0.0001, "step": 831 }, { - "epoch": 1.2455089820359282, - "grad_norm": 0.04066723916071258, - "learning_rate": 1.3200675451740273e-05, - "loss": 0.0006, + "epoch": 1.3312, + "grad_norm": 0.01923160629987508, + "learning_rate": 1.2294947386319793e-05, + "loss": 0.0001, "step": 832 }, { - "epoch": 1.2470059880239521, - "grad_norm": 0.14239069700954057, - "learning_rate": 1.3185353064110613e-05, - "loss": 0.0022, + "epoch": 1.3328, + "grad_norm": 0.060602109689933376, + "learning_rate": 1.2278124698428643e-05, + "loss": 0.0004, "step": 833 }, { - "epoch": 1.248502994011976, - "grad_norm": 0.09145370992887267, - "learning_rate": 1.3170022349038774e-05, - "loss": 0.0011, + "epoch": 1.3344, + "grad_norm": 0.05030831181536448, + "learning_rate": 1.2261295207711347e-05, + "loss": 0.0003, "step": 834 }, { - "epoch": 1.25, - "grad_norm": 0.09278110626604044, - "learning_rate": 1.3154683346603712e-05, - "loss": 0.001, + "epoch": 1.336, + "grad_norm": 0.07963954964576811, + "learning_rate": 1.2244458964423328e-05, + "loss": 0.0028, "step": 835 }, { - "epoch": 1.251497005988024, - "grad_norm": 0.04046285571674997, - "learning_rate": 1.3139336096906056e-05, - "loss": 0.0003, + "epoch": 1.3376000000000001, + "grad_norm": 0.05077621892276567, + "learning_rate": 1.2227616018840154e-05, + "loss": 0.0002, "step": 836 }, { - "epoch": 1.2529940119760479, - "grad_norm": 0.1350937599331085, - "learning_rate": 1.3123980640067999e-05, - "loss": 0.0018, + "epoch": 1.3392, + "grad_norm": 0.023897944724496852, + "learning_rate": 1.221076642125742e-05, + "loss": 0.0001, "step": 837 }, { - "epoch": 1.2544910179640718, - "grad_norm": 0.05547862227838062, - "learning_rate": 1.3108617016233178e-05, - "loss": 0.0005, + "epoch": 1.3408, + "grad_norm": 0.024814131724707863, + "learning_rate": 1.2193910221990582e-05, + "loss": 0.0002, "step": 838 }, { - "epoch": 1.2559880239520957, - "grad_norm": 0.13832547458088876, - "learning_rate": 1.3093245265566588e-05, - "loss": 0.0021, + "epoch": 1.3424, + "grad_norm": 0.07223740891735303, + "learning_rate": 1.2177047471374808e-05, + "loss": 0.0005, "step": 839 }, { - "epoch": 1.2574850299401197, - "grad_norm": 0.3205689356680666, - "learning_rate": 1.3077865428254474e-05, - "loss": 0.0017, + "epoch": 1.3439999999999999, + "grad_norm": 0.026021812742085374, + "learning_rate": 1.2160178219764838e-05, + "loss": 0.0002, "step": 840 }, { - "epoch": 1.2589820359281436, - "grad_norm": 0.17333145605990935, - "learning_rate": 1.3062477544504218e-05, - "loss": 0.0028, + "epoch": 1.3456000000000001, + "grad_norm": 0.007538083846824722, + "learning_rate": 1.214330251753481e-05, + "loss": 0.0001, "step": 841 }, { - "epoch": 1.2604790419161676, - "grad_norm": 0.10905825753636433, - "learning_rate": 1.304708165454423e-05, - "loss": 0.001, + "epoch": 1.3472, + "grad_norm": 0.0032914494744931025, + "learning_rate": 1.2126420415078133e-05, + "loss": 0.0001, "step": 842 }, { - "epoch": 1.2619760479041915, - "grad_norm": 0.06230478352428612, - "learning_rate": 1.3031677798623865e-05, - "loss": 0.0005, + "epoch": 1.3488, + "grad_norm": 0.00686056490409744, + "learning_rate": 1.2109531962807333e-05, + "loss": 0.0001, "step": 843 }, { - "epoch": 1.2634730538922156, - "grad_norm": 0.15076033232514022, - "learning_rate": 1.3016266017013295e-05, - "loss": 0.0015, + "epoch": 1.3504, + "grad_norm": 0.08422612909094722, + "learning_rate": 1.2092637211153885e-05, + "loss": 0.0002, "step": 844 }, { - "epoch": 1.2649700598802396, - "grad_norm": 0.10330789403132279, - "learning_rate": 1.300084635000341e-05, - "loss": 0.0013, + "epoch": 1.3519999999999999, + "grad_norm": 0.0859550009021419, + "learning_rate": 1.207573621056809e-05, + "loss": 0.0003, "step": 845 }, { - "epoch": 1.2664670658682635, - "grad_norm": 0.0967360202578523, - "learning_rate": 1.2985418837905719e-05, - "loss": 0.0009, + "epoch": 1.3536000000000001, + "grad_norm": 0.012894853612493646, + "learning_rate": 1.2058829011518896e-05, + "loss": 0.0001, "step": 846 }, { - "epoch": 1.2679640718562875, - "grad_norm": 0.11018485595492865, - "learning_rate": 1.2969983521052243e-05, - "loss": 0.0015, + "epoch": 1.3552, + "grad_norm": 0.01038140375447721, + "learning_rate": 1.2041915664493763e-05, + "loss": 0.0001, "step": 847 }, { - "epoch": 1.2694610778443114, - "grad_norm": 0.2026144990686223, - "learning_rate": 1.29545404397954e-05, - "loss": 0.0016, + "epoch": 1.3568, + "grad_norm": 0.00782320648496516, + "learning_rate": 1.2024996219998517e-05, + "loss": 0.0001, "step": 848 }, { - "epoch": 1.2709580838323353, - "grad_norm": 0.11825794065351451, - "learning_rate": 1.2939089634507908e-05, - "loss": 0.0009, + "epoch": 1.3584, + "grad_norm": 0.016092547758833026, + "learning_rate": 1.2008070728557186e-05, + "loss": 0.0001, "step": 849 }, { - "epoch": 1.2724550898203593, - "grad_norm": 0.1706730641796369, - "learning_rate": 1.2923631145582684e-05, - "loss": 0.0026, + "epoch": 1.3599999999999999, + "grad_norm": 0.007446362479705341, + "learning_rate": 1.1991139240711857e-05, + "loss": 0.0001, "step": 850 }, { - "epoch": 1.2739520958083832, - "grad_norm": 0.057699428000987554, - "learning_rate": 1.2908165013432723e-05, - "loss": 0.0008, + "epoch": 1.3616, + "grad_norm": 0.01801782176947607, + "learning_rate": 1.1974201807022525e-05, + "loss": 0.0001, "step": 851 }, { - "epoch": 1.2754491017964071, - "grad_norm": 0.142393242455963, - "learning_rate": 1.2892691278491012e-05, - "loss": 0.0017, + "epoch": 1.3632, + "grad_norm": 0.11088844118694781, + "learning_rate": 1.195725847806693e-05, + "loss": 0.0004, "step": 852 }, { - "epoch": 1.276946107784431, - "grad_norm": 0.055031594469559006, - "learning_rate": 1.2877209981210405e-05, - "loss": 0.0004, + "epoch": 1.3648, + "grad_norm": 0.05072198627653064, + "learning_rate": 1.1940309304440434e-05, + "loss": 0.0003, "step": 853 }, { - "epoch": 1.278443113772455, - "grad_norm": 0.0849339018863119, - "learning_rate": 1.2861721162063532e-05, - "loss": 0.0004, + "epoch": 1.3664, + "grad_norm": 0.007082916498430339, + "learning_rate": 1.1923354336755835e-05, + "loss": 0.0001, "step": 854 }, { - "epoch": 1.2799401197604792, - "grad_norm": 0.07827653949622694, - "learning_rate": 1.2846224861542677e-05, - "loss": 0.0007, + "epoch": 1.3679999999999999, + "grad_norm": 0.0024532472327597767, + "learning_rate": 1.1906393625643244e-05, + "loss": 0.0001, "step": 855 }, { - "epoch": 1.281437125748503, - "grad_norm": 0.024359597454629765, - "learning_rate": 1.2830721120159697e-05, - "loss": 0.0003, + "epoch": 1.3696, + "grad_norm": 0.03561823023476568, + "learning_rate": 1.1889427221749916e-05, + "loss": 0.0002, "step": 856 }, { - "epoch": 1.282934131736527, - "grad_norm": 0.21035448274091229, - "learning_rate": 1.2815209978445898e-05, - "loss": 0.0011, + "epoch": 1.3712, + "grad_norm": 0.011999726938913933, + "learning_rate": 1.1872455175740111e-05, + "loss": 0.0001, "step": 857 }, { - "epoch": 1.284431137724551, - "grad_norm": 0.07397518183430371, - "learning_rate": 1.2799691476951926e-05, - "loss": 0.0007, + "epoch": 1.3728, + "grad_norm": 0.03409171465268027, + "learning_rate": 1.1855477538294934e-05, + "loss": 0.0002, "step": 858 }, { - "epoch": 1.285928143712575, - "grad_norm": 0.13982310084632887, - "learning_rate": 1.2784165656247668e-05, - "loss": 0.001, + "epoch": 1.3744, + "grad_norm": 0.01687001729393258, + "learning_rate": 1.1838494360112185e-05, + "loss": 0.0001, "step": 859 }, { - "epoch": 1.2874251497005988, - "grad_norm": 0.029670153139229417, - "learning_rate": 1.2768632556922151e-05, - "loss": 0.0004, + "epoch": 1.376, + "grad_norm": 0.08714127876212839, + "learning_rate": 1.1821505691906216e-05, + "loss": 0.0003, "step": 860 }, { - "epoch": 1.2889221556886228, - "grad_norm": 0.08714604279500297, - "learning_rate": 1.275309221958343e-05, - "loss": 0.0012, + "epoch": 1.3776, + "grad_norm": 0.030346824461753623, + "learning_rate": 1.1804511584407763e-05, + "loss": 0.0002, "step": 861 }, { - "epoch": 1.2904191616766467, - "grad_norm": 0.02766702455992605, - "learning_rate": 1.2737544684858476e-05, - "loss": 0.0003, + "epoch": 1.3792, + "grad_norm": 0.06417791238526656, + "learning_rate": 1.1787512088363817e-05, + "loss": 0.0002, "step": 862 }, { - "epoch": 1.2919161676646707, - "grad_norm": 0.15764398213987413, - "learning_rate": 1.2721989993393088e-05, - "loss": 0.0021, + "epoch": 1.3808, + "grad_norm": 0.1100014244959121, + "learning_rate": 1.1770507254537454e-05, + "loss": 0.0009, "step": 863 }, { - "epoch": 1.2934131736526946, - "grad_norm": 0.3620928680238313, - "learning_rate": 1.2706428185851759e-05, - "loss": 0.0031, + "epoch": 1.3824, + "grad_norm": 0.003659563707803467, + "learning_rate": 1.1753497133707678e-05, + "loss": 0.0001, "step": 864 }, { - "epoch": 1.2949101796407185, - "grad_norm": 0.15033902468169197, - "learning_rate": 1.26908593029176e-05, - "loss": 0.0017, + "epoch": 1.384, + "grad_norm": 0.10925148443368389, + "learning_rate": 1.1736481776669307e-05, + "loss": 0.0014, "step": 865 }, { - "epoch": 1.2964071856287425, - "grad_norm": 0.08093325547150344, - "learning_rate": 1.2675283385292212e-05, - "loss": 0.0006, + "epoch": 1.3856, + "grad_norm": 0.05504213576089758, + "learning_rate": 1.1719461234232765e-05, + "loss": 0.0004, "step": 866 }, { - "epoch": 1.2979041916167664, - "grad_norm": 0.19391865613837045, - "learning_rate": 1.2659700473695581e-05, - "loss": 0.0026, + "epoch": 1.3872, + "grad_norm": 0.002039500612383281, + "learning_rate": 1.1702435557223988e-05, + "loss": 0.0001, "step": 867 }, { - "epoch": 1.2994011976047903, - "grad_norm": 0.13771485536341505, - "learning_rate": 1.2644110608865994e-05, - "loss": 0.002, + "epoch": 1.3888, + "grad_norm": 0.08673954375056128, + "learning_rate": 1.1685404796484226e-05, + "loss": 0.0007, "step": 868 }, { - "epoch": 1.3008982035928143, - "grad_norm": 0.082036361858331, - "learning_rate": 1.2628513831559897e-05, - "loss": 0.0012, + "epoch": 1.3904, + "grad_norm": 0.01799475637745854, + "learning_rate": 1.1668369002869912e-05, + "loss": 0.0001, "step": 869 }, { - "epoch": 1.3023952095808382, - "grad_norm": 0.13557425572078727, - "learning_rate": 1.2612910182551822e-05, - "loss": 0.0009, + "epoch": 1.392, + "grad_norm": 0.006362603924743877, + "learning_rate": 1.1651328227252516e-05, + "loss": 0.0001, "step": 870 }, { - "epoch": 1.3038922155688621, - "grad_norm": 0.1248956204266484, - "learning_rate": 1.259729970263426e-05, - "loss": 0.0012, + "epoch": 1.3936, + "grad_norm": 0.0016838244758269412, + "learning_rate": 1.1634282520518382e-05, + "loss": 0.0001, "step": 871 }, { - "epoch": 1.3053892215568863, - "grad_norm": 0.15323443985781152, - "learning_rate": 1.2581682432617554e-05, - "loss": 0.0021, + "epoch": 1.3952, + "grad_norm": 0.011845117010406862, + "learning_rate": 1.1617231933568579e-05, + "loss": 0.0001, "step": 872 }, { - "epoch": 1.3068862275449102, - "grad_norm": 0.16495520023315455, - "learning_rate": 1.2566058413329806e-05, - "loss": 0.0011, + "epoch": 1.3968, + "grad_norm": 0.11842164258071569, + "learning_rate": 1.1600176517318742e-05, + "loss": 0.0014, "step": 873 }, { - "epoch": 1.3083832335329342, - "grad_norm": 0.12612540137072323, - "learning_rate": 1.2550427685616767e-05, - "loss": 0.0019, + "epoch": 1.3984, + "grad_norm": 0.001944117806670603, + "learning_rate": 1.1583116322698936e-05, + "loss": 0.0001, "step": 874 }, { - "epoch": 1.3098802395209581, - "grad_norm": 0.07140252385386611, - "learning_rate": 1.253479029034171e-05, - "loss": 0.0009, + "epoch": 1.4, + "grad_norm": 0.006907884014797453, + "learning_rate": 1.1566051400653486e-05, + "loss": 0.0001, "step": 875 }, { - "epoch": 1.311377245508982, - "grad_norm": 0.07997999603836409, - "learning_rate": 1.2519146268385352e-05, - "loss": 0.0012, + "epoch": 1.4016, + "grad_norm": 0.046163014290470226, + "learning_rate": 1.1548981802140849e-05, + "loss": 0.0002, "step": 876 }, { - "epoch": 1.312874251497006, - "grad_norm": 0.05031120608143949, - "learning_rate": 1.250349566064573e-05, - "loss": 0.0007, + "epoch": 1.4032, + "grad_norm": 0.08173968469395558, + "learning_rate": 1.153190757813343e-05, + "loss": 0.0002, "step": 877 }, { - "epoch": 1.31437125748503, - "grad_norm": 0.10409482982213064, - "learning_rate": 1.2487838508038097e-05, - "loss": 0.0018, + "epoch": 1.4048, + "grad_norm": 0.009413722377722337, + "learning_rate": 1.151482877961746e-05, + "loss": 0.0001, "step": 878 }, { - "epoch": 1.3158682634730539, - "grad_norm": 0.06629243450722555, - "learning_rate": 1.2472174851494823e-05, - "loss": 0.0005, + "epoch": 1.4064, + "grad_norm": 0.007313756445064559, + "learning_rate": 1.1497745457592817e-05, + "loss": 0.0001, "step": 879 }, { - "epoch": 1.3173652694610778, - "grad_norm": 0.11971936347458219, - "learning_rate": 1.2456504731965269e-05, - "loss": 0.0017, + "epoch": 1.408, + "grad_norm": 0.08301497427474906, + "learning_rate": 1.1480657663072896e-05, + "loss": 0.0006, "step": 880 }, { - "epoch": 1.3188622754491017, - "grad_norm": 0.20319873175607275, - "learning_rate": 1.2440828190415706e-05, - "loss": 0.0018, + "epoch": 1.4096, + "grad_norm": 0.005428105104145994, + "learning_rate": 1.1463565447084446e-05, + "loss": 0.0001, "step": 881 }, { - "epoch": 1.3203592814371259, - "grad_norm": 0.04682707557262463, - "learning_rate": 1.2425145267829179e-05, - "loss": 0.0005, + "epoch": 1.4112, + "grad_norm": 0.007285758503868084, + "learning_rate": 1.1446468860667422e-05, + "loss": 0.0001, "step": 882 }, { - "epoch": 1.3218562874251498, - "grad_norm": 0.3848522460027514, - "learning_rate": 1.2409456005205427e-05, - "loss": 0.0017, + "epoch": 1.4128, + "grad_norm": 0.10521185311428458, + "learning_rate": 1.142936795487482e-05, + "loss": 0.0004, "step": 883 }, { - "epoch": 1.3233532934131738, - "grad_norm": 0.20512103175801702, - "learning_rate": 1.239376044356076e-05, - "loss": 0.0024, + "epoch": 1.4144, + "grad_norm": 0.0024113977064178106, + "learning_rate": 1.141226278077254e-05, + "loss": 0.0001, "step": 884 }, { - "epoch": 1.3248502994011977, - "grad_norm": 0.08312153889808901, - "learning_rate": 1.2378058623927955e-05, - "loss": 0.001, + "epoch": 1.416, + "grad_norm": 0.044378254245468034, + "learning_rate": 1.1395153389439232e-05, + "loss": 0.0002, "step": 885 }, { - "epoch": 1.3263473053892216, - "grad_norm": 0.05507532413988543, - "learning_rate": 1.236235058735615e-05, - "loss": 0.0004, + "epoch": 1.4176, + "grad_norm": 0.010464383812279958, + "learning_rate": 1.1378039831966134e-05, + "loss": 0.0001, "step": 886 }, { - "epoch": 1.3278443113772456, - "grad_norm": 0.08688774454813779, - "learning_rate": 1.2346636374910735e-05, - "loss": 0.0008, + "epoch": 1.4192, + "grad_norm": 0.010406195082471622, + "learning_rate": 1.1360922159456929e-05, + "loss": 0.0001, "step": 887 }, { - "epoch": 1.3293413173652695, - "grad_norm": 0.18863114337383052, - "learning_rate": 1.233091602767324e-05, - "loss": 0.0014, + "epoch": 1.4208, + "grad_norm": 0.005944865648898556, + "learning_rate": 1.1343800423027583e-05, + "loss": 0.0001, "step": 888 }, { - "epoch": 1.3308383233532934, - "grad_norm": 0.02191143112881276, - "learning_rate": 1.2315189586741246e-05, - "loss": 0.0002, + "epoch": 1.4224, + "grad_norm": 0.1147538539156489, + "learning_rate": 1.1326674673806195e-05, + "loss": 0.0005, "step": 889 }, { - "epoch": 1.3323353293413174, - "grad_norm": 0.08898221164791097, - "learning_rate": 1.2299457093228253e-05, - "loss": 0.0006, + "epoch": 1.424, + "grad_norm": 0.010290915521585257, + "learning_rate": 1.1309544962932861e-05, + "loss": 0.0001, "step": 890 }, { - "epoch": 1.3338323353293413, - "grad_norm": 0.038820165742322596, - "learning_rate": 1.2283718588263594e-05, + "epoch": 1.4256, + "grad_norm": 0.050496243492681094, + "learning_rate": 1.129241134155949e-05, "loss": 0.0004, "step": 891 }, { - "epoch": 1.3353293413173652, - "grad_norm": 0.07647935504218839, - "learning_rate": 1.2267974112992307e-05, - "loss": 0.0011, + "epoch": 1.4272, + "grad_norm": 0.16811807745325824, + "learning_rate": 1.1275273860849684e-05, + "loss": 0.0012, "step": 892 }, { - "epoch": 1.3368263473053892, - "grad_norm": 0.13982650167833818, - "learning_rate": 1.2252223708575047e-05, - "loss": 0.002, + "epoch": 1.4288, + "grad_norm": 0.12687185170909612, + "learning_rate": 1.1258132571978555e-05, + "loss": 0.0012, "step": 893 }, { - "epoch": 1.3383233532934131, - "grad_norm": 0.1338195205882971, - "learning_rate": 1.2236467416187965e-05, - "loss": 0.0025, + "epoch": 1.4304000000000001, + "grad_norm": 0.008316438089630363, + "learning_rate": 1.1240987526132595e-05, + "loss": 0.0001, "step": 894 }, { - "epoch": 1.339820359281437, - "grad_norm": 0.1307841640751261, - "learning_rate": 1.2220705277022602e-05, - "loss": 0.0016, + "epoch": 1.432, + "grad_norm": 0.03953100157978685, + "learning_rate": 1.1223838774509515e-05, + "loss": 0.0002, "step": 895 }, { - "epoch": 1.341317365269461, - "grad_norm": 0.16651571491950992, - "learning_rate": 1.2204937332285796e-05, - "loss": 0.0024, + "epoch": 1.4336, + "grad_norm": 0.05613133148708491, + "learning_rate": 1.1206686368318087e-05, + "loss": 0.0003, "step": 896 }, { - "epoch": 1.342814371257485, - "grad_norm": 0.054613620669397775, - "learning_rate": 1.2189163623199547e-05, - "loss": 0.0006, + "epoch": 1.4352, + "grad_norm": 0.1734257162416317, + "learning_rate": 1.1189530358778005e-05, + "loss": 0.0014, "step": 897 }, { - "epoch": 1.3443113772455089, - "grad_norm": 0.6200338634281016, - "learning_rate": 1.2173384191000936e-05, - "loss": 0.0027, + "epoch": 1.4368, + "grad_norm": 0.10343915105983238, + "learning_rate": 1.1172370797119711e-05, + "loss": 0.0002, "step": 898 }, { - "epoch": 1.3458083832335328, - "grad_norm": 0.10417162543361413, - "learning_rate": 1.2157599076942004e-05, - "loss": 0.0006, + "epoch": 1.4384000000000001, + "grad_norm": 0.0016368939113926887, + "learning_rate": 1.1155207734584264e-05, + "loss": 0.0001, "step": 899 }, { - "epoch": 1.347305389221557, - "grad_norm": 0.28679445691083094, - "learning_rate": 1.214180832228964e-05, - "loss": 0.0018, + "epoch": 1.44, + "grad_norm": 0.004035518677210255, + "learning_rate": 1.1138041222423177e-05, + "loss": 0.0001, "step": 900 }, { - "epoch": 1.348802395209581, - "grad_norm": 0.13667869860487905, - "learning_rate": 1.2126011968325486e-05, - "loss": 0.001, + "epoch": 1.4416, + "grad_norm": 0.024247413227502176, + "learning_rate": 1.1120871311898254e-05, + "loss": 0.0001, "step": 901 }, { - "epoch": 1.3502994011976048, - "grad_norm": 0.14678301118335677, - "learning_rate": 1.211021005634582e-05, - "loss": 0.0014, + "epoch": 1.4432, + "grad_norm": 0.043977625453513865, + "learning_rate": 1.110369805428146e-05, + "loss": 0.0003, "step": 902 }, { - "epoch": 1.3517964071856288, - "grad_norm": 0.03643518165194748, - "learning_rate": 1.2094402627661447e-05, - "loss": 0.0004, + "epoch": 1.4447999999999999, + "grad_norm": 0.20781567679186627, + "learning_rate": 1.1086521500854746e-05, + "loss": 0.0018, "step": 903 }, { - "epoch": 1.3532934131736527, - "grad_norm": 0.21051811393884823, - "learning_rate": 1.2078589723597599e-05, - "loss": 0.0019, + "epoch": 1.4464000000000001, + "grad_norm": 0.25381820106679076, + "learning_rate": 1.106934170290991e-05, + "loss": 0.0022, "step": 904 }, { - "epoch": 1.3547904191616766, - "grad_norm": 0.4465499366437971, - "learning_rate": 1.2062771385493818e-05, - "loss": 0.0019, + "epoch": 1.448, + "grad_norm": 0.002785668497734354, + "learning_rate": 1.1052158711748435e-05, + "loss": 0.0001, "step": 905 }, { - "epoch": 1.3562874251497006, - "grad_norm": 0.3280121687211394, - "learning_rate": 1.204694765470386e-05, - "loss": 0.0024, + "epoch": 1.4496, + "grad_norm": 0.002876651077840309, + "learning_rate": 1.1034972578681338e-05, + "loss": 0.0001, "step": 906 }, { - "epoch": 1.3577844311377245, - "grad_norm": 0.19136289912588866, - "learning_rate": 1.2031118572595568e-05, - "loss": 0.0012, + "epoch": 1.4512, + "grad_norm": 0.0019333083410343309, + "learning_rate": 1.1017783355029027e-05, + "loss": 0.0001, "step": 907 }, { - "epoch": 1.3592814371257484, - "grad_norm": 0.3123777529319237, - "learning_rate": 1.2015284180550786e-05, - "loss": 0.0028, + "epoch": 1.4527999999999999, + "grad_norm": 0.0031434181560634844, + "learning_rate": 1.1000591092121126e-05, + "loss": 0.0001, "step": 908 }, { - "epoch": 1.3607784431137724, - "grad_norm": 0.14613074780473378, - "learning_rate": 1.1999444519965228e-05, - "loss": 0.0006, + "epoch": 1.4544000000000001, + "grad_norm": 0.0886988903254247, + "learning_rate": 1.0983395841296349e-05, + "loss": 0.0008, "step": 909 }, { - "epoch": 1.3622754491017965, - "grad_norm": 0.08510506250722642, - "learning_rate": 1.1983599632248393e-05, - "loss": 0.0011, + "epoch": 1.456, + "grad_norm": 0.06013855398709422, + "learning_rate": 1.0966197653902319e-05, + "loss": 0.0002, "step": 910 }, { - "epoch": 1.3637724550898205, - "grad_norm": 0.24109722460040886, - "learning_rate": 1.1967749558823436e-05, - "loss": 0.0017, + "epoch": 1.4576, + "grad_norm": 0.02346633226702475, + "learning_rate": 1.0948996581295437e-05, + "loss": 0.0002, "step": 911 }, { - "epoch": 1.3652694610778444, - "grad_norm": 0.06997668553987517, - "learning_rate": 1.1951894341127075e-05, - "loss": 0.0008, + "epoch": 1.4592, + "grad_norm": 0.0682423743158357, + "learning_rate": 1.0931792674840718e-05, + "loss": 0.0006, "step": 912 }, { - "epoch": 1.3667664670658684, - "grad_norm": 0.08339177784324386, - "learning_rate": 1.1936034020609476e-05, - "loss": 0.0013, + "epoch": 1.4607999999999999, + "grad_norm": 0.0036738316491110286, + "learning_rate": 1.0914585985911632e-05, + "loss": 0.0001, "step": 913 }, { - "epoch": 1.3682634730538923, - "grad_norm": 0.37011844961784346, - "learning_rate": 1.192016863873414e-05, - "loss": 0.0031, + "epoch": 1.4624, + "grad_norm": 0.15844151837682197, + "learning_rate": 1.0897376565889972e-05, + "loss": 0.0012, "step": 914 }, { - "epoch": 1.3697604790419162, - "grad_norm": 0.19448555543500273, - "learning_rate": 1.1904298236977808e-05, - "loss": 0.0006, + "epoch": 1.464, + "grad_norm": 0.030187227249574854, + "learning_rate": 1.0880164466165675e-05, + "loss": 0.0002, "step": 915 }, { - "epoch": 1.3712574850299402, - "grad_norm": 0.37638195330236135, - "learning_rate": 1.1888422856830335e-05, - "loss": 0.002, + "epoch": 1.4656, + "grad_norm": 0.017291779864942392, + "learning_rate": 1.0862949738136682e-05, + "loss": 0.0001, "step": 916 }, { - "epoch": 1.372754491017964, - "grad_norm": 0.04845927126998666, - "learning_rate": 1.18725425397946e-05, - "loss": 0.0006, + "epoch": 1.4672, + "grad_norm": 0.06340062218060905, + "learning_rate": 1.084573243320878e-05, + "loss": 0.0002, "step": 917 }, { - "epoch": 1.374251497005988, - "grad_norm": 0.050269398324577504, - "learning_rate": 1.1856657327386381e-05, - "loss": 0.0003, + "epoch": 1.4687999999999999, + "grad_norm": 0.02237257630182562, + "learning_rate": 1.0828512602795462e-05, + "loss": 0.0002, "step": 918 }, { - "epoch": 1.375748502994012, - "grad_norm": 0.16322026432621767, - "learning_rate": 1.1840767261134262e-05, - "loss": 0.0017, + "epoch": 1.4704, + "grad_norm": 0.0307339934395069, + "learning_rate": 1.0811290298317755e-05, + "loss": 0.0002, "step": 919 }, { - "epoch": 1.377245508982036, - "grad_norm": 0.1990865675756091, - "learning_rate": 1.1824872382579511e-05, - "loss": 0.004, + "epoch": 1.472, + "grad_norm": 0.1726287183949661, + "learning_rate": 1.0794065571204073e-05, + "loss": 0.0016, "step": 920 }, { - "epoch": 1.3787425149700598, - "grad_norm": 0.07465156559192355, - "learning_rate": 1.1808972733275977e-05, - "loss": 0.0008, + "epoch": 1.4736, + "grad_norm": 0.0023880120319958176, + "learning_rate": 1.0776838472890065e-05, + "loss": 0.0001, "step": 921 }, { - "epoch": 1.3802395209580838, - "grad_norm": 0.09208435898453236, - "learning_rate": 1.1793068354789978e-05, - "loss": 0.0013, + "epoch": 1.4752, + "grad_norm": 0.0036727102455103296, + "learning_rate": 1.0759609054818459e-05, + "loss": 0.0001, "step": 922 }, { - "epoch": 1.3817365269461077, - "grad_norm": 0.12422665350230562, - "learning_rate": 1.1777159288700203e-05, - "loss": 0.0013, + "epoch": 1.4768, + "grad_norm": 0.015067220396384931, + "learning_rate": 1.0742377368438915e-05, + "loss": 0.0001, "step": 923 }, { - "epoch": 1.3832335329341316, - "grad_norm": 0.2436082341006343, - "learning_rate": 1.176124557659759e-05, - "loss": 0.0017, + "epoch": 1.4784, + "grad_norm": 0.20821142049422797, + "learning_rate": 1.0725143465207868e-05, + "loss": 0.0056, "step": 924 }, { - "epoch": 1.3847305389221556, - "grad_norm": 0.12781022014989502, - "learning_rate": 1.174532726008523e-05, - "loss": 0.0018, + "epoch": 1.48, + "grad_norm": 0.07935528902485146, + "learning_rate": 1.0707907396588362e-05, + "loss": 0.0004, "step": 925 }, { - "epoch": 1.3862275449101795, - "grad_norm": 0.16812228483761696, - "learning_rate": 1.1729404380778241e-05, - "loss": 0.0026, + "epoch": 1.4816, + "grad_norm": 0.009333519486653154, + "learning_rate": 1.069066921404992e-05, + "loss": 0.0001, "step": 926 }, { - "epoch": 1.3877245508982035, - "grad_norm": 0.0920216290898727, - "learning_rate": 1.1713476980303678e-05, - "loss": 0.0012, + "epoch": 1.4832, + "grad_norm": 0.0023647105560942287, + "learning_rate": 1.0673428969068365e-05, + "loss": 0.0001, "step": 927 }, { - "epoch": 1.3892215568862276, - "grad_norm": 0.11482491332593553, - "learning_rate": 1.1697545100300408e-05, - "loss": 0.002, + "epoch": 1.4848, + "grad_norm": 0.00757239255372897, + "learning_rate": 1.065618671312569e-05, + "loss": 0.0001, "step": 928 }, { - "epoch": 1.3907185628742516, - "grad_norm": 0.10002074873744907, - "learning_rate": 1.1681608782419014e-05, - "loss": 0.001, + "epoch": 1.4864, + "grad_norm": 0.003198079535611409, + "learning_rate": 1.063894249770989e-05, + "loss": 0.0001, "step": 929 }, { - "epoch": 1.3922155688622755, - "grad_norm": 0.08746209991474958, - "learning_rate": 1.1665668068321684e-05, - "loss": 0.0009, + "epoch": 1.488, + "grad_norm": 0.0028351381695412175, + "learning_rate": 1.0621696374314807e-05, + "loss": 0.0001, "step": 930 }, { - "epoch": 1.3937125748502994, - "grad_norm": 0.0814270207122144, - "learning_rate": 1.1649722999682094e-05, - "loss": 0.0012, + "epoch": 1.4896, + "grad_norm": 0.022597613005629305, + "learning_rate": 1.0604448394439983e-05, + "loss": 0.0002, "step": 931 }, { - "epoch": 1.3952095808383234, - "grad_norm": 0.09416853678436857, - "learning_rate": 1.1633773618185302e-05, - "loss": 0.001, + "epoch": 1.4912, + "grad_norm": 0.0944509156527911, + "learning_rate": 1.0587198609590505e-05, + "loss": 0.0008, "step": 932 }, { - "epoch": 1.3967065868263473, - "grad_norm": 0.14964696805599081, - "learning_rate": 1.161781996552765e-05, - "loss": 0.0012, + "epoch": 1.4928, + "grad_norm": 0.02252196232378922, + "learning_rate": 1.0569947071276847e-05, + "loss": 0.0001, "step": 933 }, { - "epoch": 1.3982035928143712, - "grad_norm": 0.09483767589434133, - "learning_rate": 1.1601862083416632e-05, - "loss": 0.0013, + "epoch": 1.4944, + "grad_norm": 0.06485245834320515, + "learning_rate": 1.0552693831014726e-05, + "loss": 0.0019, "step": 934 }, { - "epoch": 1.3997005988023952, - "grad_norm": 0.09788376172150742, - "learning_rate": 1.1585900013570815e-05, - "loss": 0.0015, + "epoch": 1.496, + "grad_norm": 0.005034575123535191, + "learning_rate": 1.053543894032493e-05, + "loss": 0.0001, "step": 935 }, { - "epoch": 1.401197604790419, - "grad_norm": 0.10836219152191484, - "learning_rate": 1.1569933797719703e-05, - "loss": 0.0016, + "epoch": 1.4976, + "grad_norm": 0.008163939414128022, + "learning_rate": 1.0518182450733185e-05, + "loss": 0.0001, "step": 936 }, { - "epoch": 1.402694610778443, - "grad_norm": 0.23674230633425863, - "learning_rate": 1.155396347760364e-05, - "loss": 0.0016, + "epoch": 1.4992, + "grad_norm": 0.005588487482831217, + "learning_rate": 1.0500924413769988e-05, + "loss": 0.0001, "step": 937 }, { - "epoch": 1.4041916167664672, - "grad_norm": 0.08196393324885805, - "learning_rate": 1.1537989094973705e-05, - "loss": 0.0014, + "epoch": 1.5008, + "grad_norm": 0.006037605153287934, + "learning_rate": 1.0483664880970456e-05, + "loss": 0.0001, "step": 938 }, { - "epoch": 1.4056886227544911, - "grad_norm": 0.05782826404362468, - "learning_rate": 1.1522010691591595e-05, - "loss": 0.0007, + "epoch": 1.5024, + "grad_norm": 0.004750521037278343, + "learning_rate": 1.0466403903874176e-05, + "loss": 0.0001, "step": 939 }, { - "epoch": 1.407185628742515, - "grad_norm": 0.14623254375235795, - "learning_rate": 1.1506028309229513e-05, - "loss": 0.0019, + "epoch": 1.504, + "grad_norm": 0.06278347831522739, + "learning_rate": 1.0449141534025044e-05, + "loss": 0.0006, "step": 940 }, { - "epoch": 1.408682634730539, - "grad_norm": 0.10434558213727799, - "learning_rate": 1.1490041989670076e-05, - "loss": 0.0012, + "epoch": 1.5056, + "grad_norm": 0.0024527693371791076, + "learning_rate": 1.0431877822971118e-05, + "loss": 0.0001, "step": 941 }, { - "epoch": 1.410179640718563, - "grad_norm": 0.15631392324819193, - "learning_rate": 1.1474051774706181e-05, - "loss": 0.0013, + "epoch": 1.5072, + "grad_norm": 0.002035872923279796, + "learning_rate": 1.0414612822264457e-05, + "loss": 0.0001, "step": 942 }, { - "epoch": 1.4116766467065869, - "grad_norm": 0.13437189533675623, - "learning_rate": 1.145805770614092e-05, - "loss": 0.0022, + "epoch": 1.5088, + "grad_norm": 0.0068014432836293595, + "learning_rate": 1.0397346583460972e-05, + "loss": 0.0001, "step": 943 }, { - "epoch": 1.4131736526946108, - "grad_norm": 0.038973494041355694, - "learning_rate": 1.1442059825787448e-05, - "loss": 0.0004, + "epoch": 1.5104, + "grad_norm": 0.005721254040327972, + "learning_rate": 1.038007915812028e-05, + "loss": 0.0001, "step": 944 }, { - "epoch": 1.4146706586826348, - "grad_norm": 0.07141700721890615, - "learning_rate": 1.1426058175468895e-05, - "loss": 0.0011, + "epoch": 1.512, + "grad_norm": 0.030337274027592388, + "learning_rate": 1.0362810597805526e-05, + "loss": 0.0002, "step": 945 }, { - "epoch": 1.4161676646706587, - "grad_norm": 0.21493859310812677, - "learning_rate": 1.1410052797018245e-05, - "loss": 0.0017, + "epoch": 1.5135999999999998, + "grad_norm": 0.08645131868148463, + "learning_rate": 1.034554095408326e-05, + "loss": 0.0005, "step": 946 }, { - "epoch": 1.4176646706586826, - "grad_norm": 0.1282522648959389, - "learning_rate": 1.1394043732278225e-05, - "loss": 0.001, + "epoch": 1.5152, + "grad_norm": 0.005076246051467128, + "learning_rate": 1.0328270278523256e-05, + "loss": 0.0001, "step": 947 }, { - "epoch": 1.4191616766467066, - "grad_norm": 0.07019102461290384, - "learning_rate": 1.1378031023101199e-05, - "loss": 0.0008, + "epoch": 1.5168, + "grad_norm": 0.008998648164364345, + "learning_rate": 1.031099862269837e-05, + "loss": 0.0001, "step": 948 }, { - "epoch": 1.4206586826347305, - "grad_norm": 0.05464505705186005, - "learning_rate": 1.1362014711349063e-05, - "loss": 0.0004, + "epoch": 1.5184, + "grad_norm": 0.0020866531394263662, + "learning_rate": 1.0293726038184393e-05, + "loss": 0.0001, "step": 949 }, { - "epoch": 1.4221556886227544, - "grad_norm": 0.07090800295191836, - "learning_rate": 1.1345994838893125e-05, - "loss": 0.0004, + "epoch": 1.52, + "grad_norm": 0.004967861683286264, + "learning_rate": 1.0276452576559878e-05, + "loss": 0.0001, "step": 950 }, { - "epoch": 1.4236526946107784, - "grad_norm": 0.1475910596450026, - "learning_rate": 1.1329971447614008e-05, - "loss": 0.0014, + "epoch": 1.5215999999999998, + "grad_norm": 0.00828972554606736, + "learning_rate": 1.0259178289406011e-05, + "loss": 0.0001, "step": 951 }, { - "epoch": 1.4251497005988023, - "grad_norm": 0.11628540378666755, - "learning_rate": 1.1313944579401528e-05, - "loss": 0.0008, + "epoch": 1.5232, + "grad_norm": 0.14247992903632217, + "learning_rate": 1.024190322830643e-05, + "loss": 0.0019, "step": 952 }, { - "epoch": 1.4266467065868262, - "grad_norm": 0.11918182166269133, - "learning_rate": 1.12979142761546e-05, - "loss": 0.0013, + "epoch": 1.5248, + "grad_norm": 0.05069890191122051, + "learning_rate": 1.022462744484709e-05, + "loss": 0.0003, "step": 953 }, { - "epoch": 1.4281437125748502, - "grad_norm": 0.06010179192377916, - "learning_rate": 1.1281880579781108e-05, - "loss": 0.0008, + "epoch": 1.5264, + "grad_norm": 0.004596669597143668, + "learning_rate": 1.0207350990616107e-05, + "loss": 0.0001, "step": 954 }, { - "epoch": 1.4296407185628741, - "grad_norm": 0.07923386193741429, - "learning_rate": 1.1265843532197813e-05, - "loss": 0.001, + "epoch": 1.528, + "grad_norm": 0.005706623148356151, + "learning_rate": 1.019007391720359e-05, + "loss": 0.0001, "step": 955 }, { - "epoch": 1.4311377245508983, - "grad_norm": 0.09159193558417823, - "learning_rate": 1.1249803175330237e-05, - "loss": 0.0015, + "epoch": 1.5295999999999998, + "grad_norm": 0.01754239249700808, + "learning_rate": 1.0172796276201504e-05, + "loss": 0.0001, "step": 956 }, { - "epoch": 1.4326347305389222, - "grad_norm": 0.07308876635837779, - "learning_rate": 1.1233759551112553e-05, - "loss": 0.0004, + "epoch": 1.5312000000000001, + "grad_norm": 0.0033728434205184442, + "learning_rate": 1.0155518119203511e-05, + "loss": 0.0001, "step": 957 }, { - "epoch": 1.4341317365269461, - "grad_norm": 0.0779800871854366, - "learning_rate": 1.1217712701487471e-05, - "loss": 0.0008, + "epoch": 1.5328, + "grad_norm": 0.002274125753851539, + "learning_rate": 1.0138239497804804e-05, + "loss": 0.0001, "step": 958 }, { - "epoch": 1.43562874251497, - "grad_norm": 0.06632363235084476, - "learning_rate": 1.120166266840614e-05, - "loss": 0.0005, + "epoch": 1.5344, + "grad_norm": 0.007272566220733121, + "learning_rate": 1.0120960463601977e-05, + "loss": 0.0001, "step": 959 }, { - "epoch": 1.437125748502994, - "grad_norm": 0.05842006960591898, - "learning_rate": 1.1185609493828033e-05, - "loss": 0.0007, + "epoch": 1.536, + "grad_norm": 0.003240011548304806, + "learning_rate": 1.0103681068192845e-05, + "loss": 0.0001, "step": 960 }, { - "epoch": 1.438622754491018, - "grad_norm": 0.19344221257223246, - "learning_rate": 1.1169553219720828e-05, - "loss": 0.0031, + "epoch": 1.5375999999999999, + "grad_norm": 0.014366781565456368, + "learning_rate": 1.0086401363176306e-05, + "loss": 0.0001, "step": 961 }, { - "epoch": 1.4401197604790419, - "grad_norm": 0.1981812238766314, - "learning_rate": 1.1153493888060304e-05, - "loss": 0.0013, + "epoch": 1.5392000000000001, + "grad_norm": 0.018848763547187536, + "learning_rate": 1.0069121400152182e-05, + "loss": 0.0001, "step": 962 }, { - "epoch": 1.4416167664670658, - "grad_norm": 0.055964112494847354, - "learning_rate": 1.1137431540830248e-05, - "loss": 0.0006, + "epoch": 1.5408, + "grad_norm": 0.005865494954341179, + "learning_rate": 1.0051841230721065e-05, + "loss": 0.0001, "step": 963 }, { - "epoch": 1.4431137724550898, - "grad_norm": 0.09671781357514952, - "learning_rate": 1.1121366220022317e-05, - "loss": 0.0012, + "epoch": 1.5424, + "grad_norm": 0.00576194392886361, + "learning_rate": 1.0034560906484161e-05, + "loss": 0.0001, "step": 964 }, { - "epoch": 1.4446107784431137, - "grad_norm": 0.23802060163109717, - "learning_rate": 1.1105297967635947e-05, - "loss": 0.002, + "epoch": 1.544, + "grad_norm": 0.08250532542920345, + "learning_rate": 1.0017280479043148e-05, + "loss": 0.0003, "step": 965 }, { - "epoch": 1.4461077844311379, - "grad_norm": 0.17887498237379332, - "learning_rate": 1.1089226825678239e-05, - "loss": 0.0014, + "epoch": 1.5455999999999999, + "grad_norm": 0.004488788327332194, + "learning_rate": 1e-05, + "loss": 0.0001, "step": 966 }, { - "epoch": 1.4476047904191618, - "grad_norm": 0.041592588915209894, - "learning_rate": 1.1073152836163842e-05, - "loss": 0.0004, + "epoch": 1.5472000000000001, + "grad_norm": 0.016648640713315763, + "learning_rate": 9.982719520956856e-06, + "loss": 0.0001, "step": 967 }, { - "epoch": 1.4491017964071857, - "grad_norm": 0.1005400146875568, - "learning_rate": 1.105707604111486e-05, - "loss": 0.0006, + "epoch": 1.5488, + "grad_norm": 0.048241822041450236, + "learning_rate": 9.965439093515842e-06, + "loss": 0.0002, "step": 968 }, { - "epoch": 1.4505988023952097, - "grad_norm": 0.0974962168976635, - "learning_rate": 1.1040996482560724e-05, - "loss": 0.0014, + "epoch": 1.5504, + "grad_norm": 0.01499468657612021, + "learning_rate": 9.948158769278939e-06, + "loss": 0.0002, "step": 969 }, { - "epoch": 1.4520958083832336, - "grad_norm": 0.0935782185529421, - "learning_rate": 1.1024914202538088e-05, - "loss": 0.0008, + "epoch": 1.552, + "grad_norm": 0.005852751046341136, + "learning_rate": 9.930878599847822e-06, + "loss": 0.0001, "step": 970 }, { - "epoch": 1.4535928143712575, - "grad_norm": 0.07730096837501058, - "learning_rate": 1.1008829243090724e-05, - "loss": 0.0006, + "epoch": 1.5535999999999999, + "grad_norm": 0.0752892074546245, + "learning_rate": 9.913598636823694e-06, + "loss": 0.0023, "step": 971 }, { - "epoch": 1.4550898203592815, - "grad_norm": 0.12823405242175784, - "learning_rate": 1.099274164626941e-05, - "loss": 0.0009, + "epoch": 1.5552000000000001, + "grad_norm": 0.026611060806342526, + "learning_rate": 9.896318931807155e-06, + "loss": 0.0002, "step": 972 }, { - "epoch": 1.4565868263473054, - "grad_norm": 0.07157978147756917, - "learning_rate": 1.0976651454131814e-05, - "loss": 0.001, + "epoch": 1.5568, + "grad_norm": 0.07719852044515452, + "learning_rate": 9.879039536398023e-06, + "loss": 0.0005, "step": 973 }, { - "epoch": 1.4580838323353293, - "grad_norm": 0.07310373401364316, - "learning_rate": 1.0960558708742401e-05, - "loss": 0.0005, + "epoch": 1.5584, + "grad_norm": 0.003910047594098633, + "learning_rate": 9.861760502195197e-06, + "loss": 0.0001, "step": 974 }, { - "epoch": 1.4595808383233533, - "grad_norm": 0.11186858330092693, - "learning_rate": 1.0944463452172297e-05, - "loss": 0.001, + "epoch": 1.56, + "grad_norm": 0.056305740800321455, + "learning_rate": 9.844481880796492e-06, + "loss": 0.0002, "step": 975 }, { - "epoch": 1.4610778443113772, - "grad_norm": 0.1613611934605809, - "learning_rate": 1.0928365726499194e-05, - "loss": 0.0027, + "epoch": 1.5615999999999999, + "grad_norm": 0.0021305486069807056, + "learning_rate": 9.827203723798498e-06, + "loss": 0.0001, "step": 976 }, { - "epoch": 1.4625748502994012, - "grad_norm": 0.1622550489094645, - "learning_rate": 1.0912265573807249e-05, - "loss": 0.0008, + "epoch": 1.5632000000000001, + "grad_norm": 0.011272710180135622, + "learning_rate": 9.809926082796415e-06, + "loss": 0.0001, "step": 977 }, { - "epoch": 1.464071856287425, - "grad_norm": 0.15392255395991697, - "learning_rate": 1.0896163036186953e-05, - "loss": 0.0014, + "epoch": 1.5648, + "grad_norm": 0.007720417514812891, + "learning_rate": 9.7926490093839e-06, + "loss": 0.0001, "step": 978 }, { - "epoch": 1.465568862275449, - "grad_norm": 0.15486888428359488, - "learning_rate": 1.088005815573504e-05, - "loss": 0.0013, + "epoch": 1.5664, + "grad_norm": 0.010832344610442256, + "learning_rate": 9.775372555152912e-06, + "loss": 0.0001, "step": 979 }, { - "epoch": 1.467065868263473, - "grad_norm": 0.16549731334948978, - "learning_rate": 1.0863950974554372e-05, - "loss": 0.0011, + "epoch": 1.568, + "grad_norm": 0.009070884087999895, + "learning_rate": 9.758096771693574e-06, + "loss": 0.0001, "step": 980 }, { - "epoch": 1.468562874251497, - "grad_norm": 0.06451393525137326, - "learning_rate": 1.084784153475381e-05, - "loss": 0.0004, + "epoch": 1.5695999999999999, + "grad_norm": 0.00442157961174515, + "learning_rate": 9.740821710593989e-06, + "loss": 0.0001, "step": 981 }, { - "epoch": 1.4700598802395208, - "grad_norm": 0.08287888893875361, - "learning_rate": 1.0831729878448131e-05, - "loss": 0.0006, + "epoch": 1.5712000000000002, + "grad_norm": 0.027300721561953286, + "learning_rate": 9.723547423440122e-06, + "loss": 0.0001, "step": 982 }, { - "epoch": 1.471556886227545, - "grad_norm": 0.14774856291997404, - "learning_rate": 1.0815616047757909e-05, - "loss": 0.0015, + "epoch": 1.5728, + "grad_norm": 0.025063083496432152, + "learning_rate": 9.70627396181561e-06, + "loss": 0.0001, "step": 983 }, { - "epoch": 1.473053892215569, - "grad_norm": 0.08417242989544835, - "learning_rate": 1.0799500084809393e-05, - "loss": 0.0009, + "epoch": 1.5744, + "grad_norm": 0.002248942312411431, + "learning_rate": 9.689001377301634e-06, + "loss": 0.0001, "step": 984 }, { - "epoch": 1.4745508982035929, - "grad_norm": 0.05265592604561263, - "learning_rate": 1.0783382031734418e-05, - "loss": 0.0008, + "epoch": 1.576, + "grad_norm": 0.005737897000865526, + "learning_rate": 9.671729721476747e-06, + "loss": 0.0001, "step": 985 }, { - "epoch": 1.4760479041916168, - "grad_norm": 0.11539265697300215, - "learning_rate": 1.0767261930670272e-05, - "loss": 0.0015, + "epoch": 1.5776, + "grad_norm": 0.0020266436310786198, + "learning_rate": 9.654459045916743e-06, + "loss": 0.0001, "step": 986 }, { - "epoch": 1.4775449101796407, - "grad_norm": 0.029457601423187277, - "learning_rate": 1.0751139823759603e-05, - "loss": 0.0002, + "epoch": 1.5792000000000002, + "grad_norm": 0.012861872927739158, + "learning_rate": 9.637189402194477e-06, + "loss": 0.0001, "step": 987 }, { - "epoch": 1.4790419161676647, - "grad_norm": 0.10750596108363268, - "learning_rate": 1.0735015753150305e-05, - "loss": 0.0015, + "epoch": 1.5808, + "grad_norm": 0.022432987874545302, + "learning_rate": 9.619920841879726e-06, + "loss": 0.0002, "step": 988 }, { - "epoch": 1.4805389221556886, - "grad_norm": 0.23998882396666726, - "learning_rate": 1.0718889760995396e-05, - "loss": 0.0016, + "epoch": 1.5824, + "grad_norm": 0.1734129903191722, + "learning_rate": 9.602653416539031e-06, + "loss": 0.0015, "step": 989 }, { - "epoch": 1.4820359281437125, - "grad_norm": 0.1009612926927026, - "learning_rate": 1.070276188945293e-05, - "loss": 0.0014, + "epoch": 1.584, + "grad_norm": 0.11556869758171924, + "learning_rate": 9.585387177735548e-06, + "loss": 0.0005, "step": 990 }, { - "epoch": 1.4835329341317365, - "grad_norm": 0.12007396345906915, - "learning_rate": 1.0686632180685868e-05, - "loss": 0.0015, + "epoch": 1.5856, + "grad_norm": 0.013507656807784091, + "learning_rate": 9.568122177028884e-06, + "loss": 0.0001, "step": 991 }, { - "epoch": 1.4850299401197604, - "grad_norm": 0.1882263167381516, - "learning_rate": 1.067050067686197e-05, - "loss": 0.0013, + "epoch": 1.5872000000000002, + "grad_norm": 0.01190158738889232, + "learning_rate": 9.550858465974958e-06, + "loss": 0.0001, "step": 992 }, { - "epoch": 1.4865269461077844, - "grad_norm": 0.16842368805723756, - "learning_rate": 1.0654367420153696e-05, + "epoch": 1.5888, + "grad_norm": 0.1443249907546037, + "learning_rate": 9.533596096125826e-06, "loss": 0.0017, "step": 993 }, { - "epoch": 1.4880239520958085, - "grad_norm": 0.13102324447775987, - "learning_rate": 1.0638232452738084e-05, - "loss": 0.0009, + "epoch": 1.5904, + "grad_norm": 0.003068551402787213, + "learning_rate": 9.516335119029547e-06, + "loss": 0.0001, "step": 994 }, { - "epoch": 1.4895209580838324, - "grad_norm": 0.14140439476011551, - "learning_rate": 1.0622095816796647e-05, - "loss": 0.0014, + "epoch": 1.592, + "grad_norm": 0.0870000968923958, + "learning_rate": 9.499075586230014e-06, + "loss": 0.0004, "step": 995 }, { - "epoch": 1.4910179640718564, - "grad_norm": 0.048609314581425776, - "learning_rate": 1.0605957554515261e-05, - "loss": 0.0005, + "epoch": 1.5936, + "grad_norm": 0.002410019645070289, + "learning_rate": 9.481817549266817e-06, + "loss": 0.0001, "step": 996 }, { - "epoch": 1.4925149700598803, - "grad_norm": 0.09685439693147369, - "learning_rate": 1.058981770808405e-05, - "loss": 0.0008, + "epoch": 1.5952, + "grad_norm": 0.07699328070870765, + "learning_rate": 9.464561059675073e-06, + "loss": 0.0004, "step": 997 }, { - "epoch": 1.4940119760479043, - "grad_norm": 0.10476662342644127, - "learning_rate": 1.057367631969728e-05, - "loss": 0.0009, + "epoch": 1.5968, + "grad_norm": 0.03003542987962681, + "learning_rate": 9.44730616898528e-06, + "loss": 0.0002, "step": 998 }, { - "epoch": 1.4955089820359282, - "grad_norm": 0.07965591618267445, - "learning_rate": 1.0557533431553251e-05, - "loss": 0.0006, + "epoch": 1.5984, + "grad_norm": 0.00798339829717542, + "learning_rate": 9.430052928723153e-06, + "loss": 0.0001, "step": 999 }, { - "epoch": 1.4970059880239521, - "grad_norm": 0.14132503178186984, - "learning_rate": 1.0541389085854177e-05, - "loss": 0.0014, + "epoch": 1.6, + "grad_norm": 0.013732757324964928, + "learning_rate": 9.412801390409496e-06, + "loss": 0.0002, "step": 1000 }, { - "epoch": 1.498502994011976, - "grad_norm": 0.06381511478435144, - "learning_rate": 1.0525243324806091e-05, - "loss": 0.0003, + "epoch": 1.6016, + "grad_norm": 0.013712467175340954, + "learning_rate": 9.395551605560018e-06, + "loss": 0.0001, "step": 1001 }, { - "epoch": 1.5, - "grad_norm": 0.19969657755752093, - "learning_rate": 1.0509096190618729e-05, - "loss": 0.0012, + "epoch": 1.6032, + "grad_norm": 0.033443514052161075, + "learning_rate": 9.378303625685196e-06, + "loss": 0.0002, "step": 1002 }, { - "epoch": 1.501497005988024, - "grad_norm": 0.16903934501226875, - "learning_rate": 1.0492947725505403e-05, - "loss": 0.0011, + "epoch": 1.6048, + "grad_norm": 0.011735228089194433, + "learning_rate": 9.361057502290112e-06, + "loss": 0.0001, "step": 1003 }, { - "epoch": 1.5029940119760479, - "grad_norm": 0.16313780662072883, - "learning_rate": 1.047679797168291e-05, - "loss": 0.0014, + "epoch": 1.6064, + "grad_norm": 0.05469053236176875, + "learning_rate": 9.343813286874312e-06, + "loss": 0.0002, "step": 1004 }, { - "epoch": 1.5044910179640718, - "grad_norm": 0.08804790182748146, - "learning_rate": 1.0460646971371424e-05, - "loss": 0.0008, + "epoch": 1.608, + "grad_norm": 0.005661733123474927, + "learning_rate": 9.326571030931636e-06, + "loss": 0.0001, "step": 1005 }, { - "epoch": 1.5059880239520957, - "grad_norm": 0.1368065864574523, - "learning_rate": 1.0444494766794368e-05, - "loss": 0.0014, + "epoch": 1.6096, + "grad_norm": 0.0030606199222666273, + "learning_rate": 9.309330785950086e-06, + "loss": 0.0001, "step": 1006 }, { - "epoch": 1.5074850299401197, - "grad_norm": 0.1814958223187234, - "learning_rate": 1.0428341400178317e-05, - "loss": 0.0018, + "epoch": 1.6112, + "grad_norm": 0.003158444323988701, + "learning_rate": 9.292092603411642e-06, + "loss": 0.0001, "step": 1007 }, { - "epoch": 1.5089820359281436, - "grad_norm": 0.10617237101998808, - "learning_rate": 1.0412186913752884e-05, - "loss": 0.001, + "epoch": 1.6128, + "grad_norm": 0.1020769852520482, + "learning_rate": 9.274856534792138e-06, + "loss": 0.0007, "step": 1008 }, { - "epoch": 1.5104790419161676, - "grad_norm": 0.12416915017989684, - "learning_rate": 1.0396031349750612e-05, - "loss": 0.0014, + "epoch": 1.6143999999999998, + "grad_norm": 0.02723430869085718, + "learning_rate": 9.257622631561085e-06, + "loss": 0.0002, "step": 1009 }, { - "epoch": 1.5119760479041915, - "grad_norm": 0.16700967749894446, - "learning_rate": 1.0379874750406858e-05, - "loss": 0.0018, + "epoch": 1.616, + "grad_norm": 0.08514436159786103, + "learning_rate": 9.240390945181543e-06, + "loss": 0.0017, "step": 1010 }, { - "epoch": 1.5134730538922154, - "grad_norm": 0.07833916487876827, - "learning_rate": 1.0363717157959681e-05, - "loss": 0.0007, + "epoch": 1.6176, + "grad_norm": 0.0068380055800940235, + "learning_rate": 9.223161527109938e-06, + "loss": 0.0001, "step": 1011 }, { - "epoch": 1.5149700598802394, - "grad_norm": 0.1046908594504945, - "learning_rate": 1.0347558614649747e-05, - "loss": 0.0007, + "epoch": 1.6192, + "grad_norm": 0.00910581317681862, + "learning_rate": 9.205934428795929e-06, + "loss": 0.0001, "step": 1012 }, { - "epoch": 1.5164670658682635, - "grad_norm": 0.12447015495058295, - "learning_rate": 1.0331399162720197e-05, - "loss": 0.0018, + "epoch": 1.6208, + "grad_norm": 0.004989916059586335, + "learning_rate": 9.188709701682246e-06, + "loss": 0.0001, "step": 1013 }, { - "epoch": 1.5179640718562875, - "grad_norm": 0.12177043291644524, - "learning_rate": 1.0315238844416557e-05, - "loss": 0.0013, + "epoch": 1.6223999999999998, + "grad_norm": 0.031005449949701787, + "learning_rate": 9.17148739720454e-06, + "loss": 0.0002, "step": 1014 }, { - "epoch": 1.5194610778443114, - "grad_norm": 0.07862091445416813, - "learning_rate": 1.0299077701986613e-05, - "loss": 0.0007, + "epoch": 1.624, + "grad_norm": 0.002650399301172036, + "learning_rate": 9.154267566791224e-06, + "loss": 0.0001, "step": 1015 }, { - "epoch": 1.5209580838323353, - "grad_norm": 0.15981593793118748, - "learning_rate": 1.0282915777680307e-05, - "loss": 0.0012, + "epoch": 1.6256, + "grad_norm": 0.1350083424472415, + "learning_rate": 9.137050261863323e-06, + "loss": 0.0006, "step": 1016 }, { - "epoch": 1.5224550898203593, - "grad_norm": 0.09956716437231744, - "learning_rate": 1.0266753113749618e-05, - "loss": 0.001, + "epoch": 1.6272, + "grad_norm": 0.07458336731096532, + "learning_rate": 9.119835533834332e-06, + "loss": 0.0004, "step": 1017 }, { - "epoch": 1.5239520958083832, - "grad_norm": 0.12054415585534563, - "learning_rate": 1.0250589752448472e-05, - "loss": 0.0008, + "epoch": 1.6288, + "grad_norm": 0.029469347285655607, + "learning_rate": 9.102623434110028e-06, + "loss": 0.0002, "step": 1018 }, { - "epoch": 1.5254491017964071, - "grad_norm": 0.047284868067243004, - "learning_rate": 1.0234425736032607e-05, - "loss": 0.0004, + "epoch": 1.6303999999999998, + "grad_norm": 0.011116593067667624, + "learning_rate": 9.085414014088368e-06, + "loss": 0.0001, "step": 1019 }, { - "epoch": 1.5269461077844313, - "grad_norm": 0.15543960260097514, - "learning_rate": 1.0218261106759475e-05, - "loss": 0.0018, + "epoch": 1.6320000000000001, + "grad_norm": 0.01299440060936471, + "learning_rate": 9.068207325159285e-06, + "loss": 0.0001, "step": 1020 }, { - "epoch": 1.5284431137724552, - "grad_norm": 0.09490923813175474, - "learning_rate": 1.020209590688814e-05, - "loss": 0.0007, + "epoch": 1.6336, + "grad_norm": 0.2055820315378792, + "learning_rate": 9.051003418704566e-06, + "loss": 0.0017, "step": 1021 }, { - "epoch": 1.5299401197604792, - "grad_norm": 0.06536745934922997, - "learning_rate": 1.0185930178679146e-05, - "loss": 0.0006, + "epoch": 1.6352, + "grad_norm": 0.005873745936807772, + "learning_rate": 9.033802346097683e-06, + "loss": 0.0001, "step": 1022 }, { - "epoch": 1.531437125748503, - "grad_norm": 0.1301533353797184, - "learning_rate": 1.0169763964394421e-05, - "loss": 0.002, + "epoch": 1.6368, + "grad_norm": 0.013013482079196304, + "learning_rate": 9.016604158703654e-06, + "loss": 0.0001, "step": 1023 }, { - "epoch": 1.532934131736527, - "grad_norm": 0.1502603255655029, - "learning_rate": 1.0153597306297169e-05, - "loss": 0.0011, + "epoch": 1.6383999999999999, + "grad_norm": 0.006144340327088104, + "learning_rate": 8.999408907878877e-06, + "loss": 0.0002, "step": 1024 }, { - "epoch": 1.534431137724551, - "grad_norm": 0.04144441435798231, - "learning_rate": 1.013743024665175e-05, - "loss": 0.0003, + "epoch": 1.6400000000000001, + "grad_norm": 0.13665631531927, + "learning_rate": 8.982216644970978e-06, + "loss": 0.0012, "step": 1025 }, { - "epoch": 1.535928143712575, - "grad_norm": 0.15809185401608605, - "learning_rate": 1.0121262827723573e-05, - "loss": 0.0023, + "epoch": 1.6416, + "grad_norm": 0.006789307264438082, + "learning_rate": 8.965027421318666e-06, + "loss": 0.0001, "step": 1026 }, { - "epoch": 1.5374251497005988, - "grad_norm": 0.17137867242074536, - "learning_rate": 1.0105095091778985e-05, - "loss": 0.0012, + "epoch": 1.6432, + "grad_norm": 0.17167958395609068, + "learning_rate": 8.947841288251568e-06, + "loss": 0.0004, "step": 1027 }, { - "epoch": 1.5389221556886228, - "grad_norm": 0.10676172512123629, - "learning_rate": 1.008892708108517e-05, - "loss": 0.0009, + "epoch": 1.6448, + "grad_norm": 0.0609705821357434, + "learning_rate": 8.930658297090092e-06, + "loss": 0.0003, "step": 1028 }, { - "epoch": 1.5404191616766467, - "grad_norm": 0.10415442540373282, - "learning_rate": 1.007275883791002e-05, - "loss": 0.0014, + "epoch": 1.6463999999999999, + "grad_norm": 0.006587696978795276, + "learning_rate": 8.913478499145255e-06, + "loss": 0.0001, "step": 1029 }, { - "epoch": 1.5419161676646707, - "grad_norm": 0.048845460680296805, - "learning_rate": 1.0056590404522042e-05, - "loss": 0.0006, + "epoch": 1.6480000000000001, + "grad_norm": 0.0026716294435511516, + "learning_rate": 8.896301945718541e-06, + "loss": 0.0001, "step": 1030 }, { - "epoch": 1.5434131736526946, - "grad_norm": 0.09153026786032244, - "learning_rate": 1.0040421823190239e-05, - "loss": 0.0005, + "epoch": 1.6496, + "grad_norm": 0.03505078932135712, + "learning_rate": 8.879128688101749e-06, + "loss": 0.0002, "step": 1031 }, { - "epoch": 1.5449101796407185, - "grad_norm": 0.0979750349520669, - "learning_rate": 1.0024253136183997e-05, - "loss": 0.0008, + "epoch": 1.6512, + "grad_norm": 0.017837314438095914, + "learning_rate": 8.861958777576826e-06, + "loss": 0.0001, "step": 1032 }, { - "epoch": 1.5464071856287425, - "grad_norm": 0.12603703868213395, - "learning_rate": 1.000808438577298e-05, - "loss": 0.0007, + "epoch": 1.6528, + "grad_norm": 0.003291556083983862, + "learning_rate": 8.844792265415738e-06, + "loss": 0.0001, "step": 1033 }, { - "epoch": 1.5479041916167664, - "grad_norm": 0.09260218797278057, - "learning_rate": 9.991915614227022e-06, - "loss": 0.0006, + "epoch": 1.6543999999999999, + "grad_norm": 0.0078013423029036156, + "learning_rate": 8.827629202880294e-06, + "loss": 0.0001, "step": 1034 }, { - "epoch": 1.5494011976047903, - "grad_norm": 0.150535783399099, - "learning_rate": 9.975746863816006e-06, - "loss": 0.0008, + "epoch": 1.6560000000000001, + "grad_norm": 0.021478888567488523, + "learning_rate": 8.810469641222001e-06, + "loss": 0.0002, "step": 1035 }, { - "epoch": 1.5508982035928143, - "grad_norm": 0.0866141689732322, - "learning_rate": 9.959578176809764e-06, - "loss": 0.0014, + "epoch": 1.6576, + "grad_norm": 0.107404212993994, + "learning_rate": 8.793313631681915e-06, + "loss": 0.0015, "step": 1036 }, { - "epoch": 1.5523952095808382, - "grad_norm": 0.14207853844047355, - "learning_rate": 9.943409595477958e-06, - "loss": 0.0014, + "epoch": 1.6592, + "grad_norm": 0.005515426360191595, + "learning_rate": 8.776161225490488e-06, + "loss": 0.0001, "step": 1037 }, { - "epoch": 1.5538922155688621, - "grad_norm": 0.08207842243713191, - "learning_rate": 9.927241162089984e-06, - "loss": 0.0008, + "epoch": 1.6608, + "grad_norm": 0.018418735479344833, + "learning_rate": 8.759012473867407e-06, + "loss": 0.0001, "step": 1038 }, { - "epoch": 1.555389221556886, - "grad_norm": 0.14029627239900275, - "learning_rate": 9.911072918914834e-06, - "loss": 0.0028, + "epoch": 1.6623999999999999, + "grad_norm": 0.009539716928331653, + "learning_rate": 8.741867428021447e-06, + "loss": 0.0001, "step": 1039 }, { - "epoch": 1.55688622754491, - "grad_norm": 0.09422957915613028, - "learning_rate": 9.894904908221018e-06, - "loss": 0.0006, + "epoch": 1.6640000000000001, + "grad_norm": 0.09973663139996064, + "learning_rate": 8.72472613915032e-06, + "loss": 0.0004, "step": 1040 }, { - "epoch": 1.5583832335329342, - "grad_norm": 0.26480681530852446, - "learning_rate": 9.87873717227643e-06, - "loss": 0.002, + "epoch": 1.6656, + "grad_norm": 0.15761894212909275, + "learning_rate": 8.707588658440511e-06, + "loss": 0.0019, "step": 1041 }, { - "epoch": 1.5598802395209581, - "grad_norm": 0.18537320811580985, - "learning_rate": 9.862569753348255e-06, - "loss": 0.001, + "epoch": 1.6672, + "grad_norm": 0.010498595207018875, + "learning_rate": 8.690455037067142e-06, + "loss": 0.0001, "step": 1042 }, { - "epoch": 1.561377245508982, - "grad_norm": 0.12242399214233682, - "learning_rate": 9.846402693702833e-06, - "loss": 0.0009, + "epoch": 1.6688, + "grad_norm": 0.10526976223706591, + "learning_rate": 8.673325326193806e-06, + "loss": 0.0005, "step": 1043 }, { - "epoch": 1.562874251497006, - "grad_norm": 0.10685927064129254, - "learning_rate": 9.830236035605578e-06, - "loss": 0.001, + "epoch": 1.6703999999999999, + "grad_norm": 0.003618607966983597, + "learning_rate": 8.656199576972424e-06, + "loss": 0.0001, "step": 1044 }, { - "epoch": 1.56437125748503, - "grad_norm": 0.13222209724765782, - "learning_rate": 9.814069821320856e-06, - "loss": 0.0023, + "epoch": 1.6720000000000002, + "grad_norm": 0.012464316701956982, + "learning_rate": 8.639077840543078e-06, + "loss": 0.0001, "step": 1045 }, { - "epoch": 1.5658682634730539, - "grad_norm": 0.0968515135585454, - "learning_rate": 9.79790409311186e-06, - "loss": 0.0008, + "epoch": 1.6736, + "grad_norm": 0.0053543772659330035, + "learning_rate": 8.621960168033868e-06, + "loss": 0.0001, "step": 1046 }, { - "epoch": 1.5673652694610778, - "grad_norm": 0.05675857658075679, - "learning_rate": 9.781738893240526e-06, - "loss": 0.0005, + "epoch": 1.6752, + "grad_norm": 0.07531792817890605, + "learning_rate": 8.604846610560771e-06, + "loss": 0.0004, "step": 1047 }, { - "epoch": 1.568862275449102, - "grad_norm": 0.10548064797653442, - "learning_rate": 9.765574263967397e-06, - "loss": 0.001, + "epoch": 1.6768, + "grad_norm": 0.029371851157039955, + "learning_rate": 8.587737219227462e-06, + "loss": 0.0002, "step": 1048 }, { - "epoch": 1.5703592814371259, - "grad_norm": 0.14583652639833644, - "learning_rate": 9.749410247551534e-06, - "loss": 0.0022, + "epoch": 1.6784, + "grad_norm": 0.05109275328856202, + "learning_rate": 8.570632045125185e-06, + "loss": 0.0002, "step": 1049 }, { - "epoch": 1.5718562874251498, - "grad_norm": 0.10085574392360613, - "learning_rate": 9.733246886250384e-06, - "loss": 0.0008, + "epoch": 1.6800000000000002, + "grad_norm": 0.006421087021681481, + "learning_rate": 8.553531139332583e-06, + "loss": 0.0001, "step": 1050 }, { - "epoch": 1.5733532934131738, - "grad_norm": 0.16062888967169808, - "learning_rate": 9.717084222319698e-06, - "loss": 0.0024, + "epoch": 1.6816, + "grad_norm": 0.07963637652867744, + "learning_rate": 8.536434552915555e-06, + "loss": 0.0005, "step": 1051 }, { - "epoch": 1.5748502994011977, - "grad_norm": 0.06334101246324102, - "learning_rate": 9.700922298013388e-06, - "loss": 0.0006, + "epoch": 1.6832, + "grad_norm": 0.0017013180902574591, + "learning_rate": 8.519342336927106e-06, + "loss": 0.0001, "step": 1052 }, { - "epoch": 1.5763473053892216, - "grad_norm": 0.10858659881531557, - "learning_rate": 9.684761155583443e-06, - "loss": 0.001, + "epoch": 1.6848, + "grad_norm": 0.07287617820083703, + "learning_rate": 8.502254542407186e-06, + "loss": 0.0003, "step": 1053 }, { - "epoch": 1.5778443113772456, - "grad_norm": 0.05720430794355147, - "learning_rate": 9.668600837279808e-06, - "loss": 0.0003, + "epoch": 1.6864, + "grad_norm": 0.1463603814085734, + "learning_rate": 8.485171220382545e-06, + "loss": 0.0008, "step": 1054 }, { - "epoch": 1.5793413173652695, - "grad_norm": 0.18596258294319445, - "learning_rate": 9.652441385350258e-06, - "loss": 0.0008, + "epoch": 1.688, + "grad_norm": 0.0434357914290013, + "learning_rate": 8.468092421866575e-06, + "loss": 0.0002, "step": 1055 }, { - "epoch": 1.5808383233532934, - "grad_norm": 0.02161457070852507, - "learning_rate": 9.636282842040324e-06, - "loss": 0.0002, + "epoch": 1.6896, + "grad_norm": 0.0018126604638566501, + "learning_rate": 8.451018197859153e-06, + "loss": 0.0001, "step": 1056 }, { - "epoch": 1.5823353293413174, - "grad_norm": 0.12665814776438672, - "learning_rate": 9.620125249593145e-06, - "loss": 0.002, + "epoch": 1.6912, + "grad_norm": 0.024660687945750964, + "learning_rate": 8.433948599346516e-06, + "loss": 0.0001, "step": 1057 }, { - "epoch": 1.5838323353293413, - "grad_norm": 0.1479035747876992, - "learning_rate": 9.603968650249388e-06, - "loss": 0.0014, + "epoch": 1.6928, + "grad_norm": 0.02621191496190813, + "learning_rate": 8.41688367730107e-06, + "loss": 0.0002, "step": 1058 }, { - "epoch": 1.5853293413173652, - "grad_norm": 0.08875005354112726, - "learning_rate": 9.587813086247118e-06, - "loss": 0.0008, + "epoch": 1.6944, + "grad_norm": 0.003268281028403292, + "learning_rate": 8.399823482681263e-06, + "loss": 0.0001, "step": 1059 }, { - "epoch": 1.5868263473053892, - "grad_norm": 0.034598142239245726, - "learning_rate": 9.571658599821684e-06, - "loss": 0.0002, + "epoch": 1.696, + "grad_norm": 0.0019118513200916743, + "learning_rate": 8.382768066431427e-06, + "loss": 0.0001, "step": 1060 }, { - "epoch": 1.5883233532934131, - "grad_norm": 0.1382510341790496, - "learning_rate": 9.555505233205635e-06, - "loss": 0.0014, + "epoch": 1.6976, + "grad_norm": 0.004530054083164475, + "learning_rate": 8.36571747948162e-06, + "loss": 0.0001, "step": 1061 }, { - "epoch": 1.589820359281437, - "grad_norm": 0.13116321301263414, - "learning_rate": 9.539353028628577e-06, - "loss": 0.001, + "epoch": 1.6992, + "grad_norm": 0.0643530214986294, + "learning_rate": 8.348671772747488e-06, + "loss": 0.0002, "step": 1062 }, { - "epoch": 1.591317365269461, - "grad_norm": 0.05547568359029333, - "learning_rate": 9.523202028317095e-06, - "loss": 0.0007, + "epoch": 1.7008, + "grad_norm": 0.08236509208617357, + "learning_rate": 8.331630997130091e-06, + "loss": 0.0015, "step": 1063 }, { - "epoch": 1.592814371257485, - "grad_norm": 0.031905668491041365, - "learning_rate": 9.5070522744946e-06, - "loss": 0.0003, + "epoch": 1.7024, + "grad_norm": 0.0024735731034196246, + "learning_rate": 8.314595203515781e-06, + "loss": 0.0001, "step": 1064 }, { - "epoch": 1.5943113772455089, - "grad_norm": 0.008727577881278311, - "learning_rate": 9.490903809381273e-06, + "epoch": 1.704, + "grad_norm": 0.042430868716428864, + "learning_rate": 8.297564442776014e-06, "loss": 0.0001, "step": 1065 }, { - "epoch": 1.5958083832335328, - "grad_norm": 0.0712191739355828, - "learning_rate": 9.47475667519391e-06, - "loss": 0.0015, + "epoch": 1.7056, + "grad_norm": 0.018951879993768548, + "learning_rate": 8.280538765767236e-06, + "loss": 0.0001, "step": 1066 }, { - "epoch": 1.5973053892215567, - "grad_norm": 0.03749421756809963, - "learning_rate": 9.458610914145826e-06, - "loss": 0.001, + "epoch": 1.7072, + "grad_norm": 0.05325849886472776, + "learning_rate": 8.263518223330698e-06, + "loss": 0.0002, "step": 1067 }, { - "epoch": 1.5988023952095807, - "grad_norm": 0.02379835401791554, - "learning_rate": 9.442466568446755e-06, - "loss": 0.0003, + "epoch": 1.7088, + "grad_norm": 0.007651066709106985, + "learning_rate": 8.246502866292324e-06, + "loss": 0.0001, "step": 1068 }, { - "epoch": 1.6002994011976048, - "grad_norm": 0.12118141203303913, - "learning_rate": 9.426323680302722e-06, - "loss": 0.0013, + "epoch": 1.7104, + "grad_norm": 0.002899812605506385, + "learning_rate": 8.229492745462551e-06, + "loss": 0.0001, "step": 1069 }, { - "epoch": 1.6017964071856288, - "grad_norm": 0.12388549366182541, - "learning_rate": 9.410182291915956e-06, - "loss": 0.0009, + "epoch": 1.712, + "grad_norm": 0.01347456210816533, + "learning_rate": 8.212487911636185e-06, + "loss": 0.0001, "step": 1070 }, { - "epoch": 1.6032934131736527, - "grad_norm": 0.117253435172126, - "learning_rate": 9.39404244548474e-06, - "loss": 0.0013, + "epoch": 1.7136, + "grad_norm": 0.0023674577831386896, + "learning_rate": 8.195488415592238e-06, + "loss": 0.0001, "step": 1071 }, { - "epoch": 1.6047904191616766, - "grad_norm": 0.08211137201727235, - "learning_rate": 9.377904183203353e-06, - "loss": 0.0009, + "epoch": 1.7151999999999998, + "grad_norm": 0.006504806869534589, + "learning_rate": 8.17849430809379e-06, + "loss": 0.0001, "step": 1072 }, { - "epoch": 1.6062874251497006, - "grad_norm": 0.09352507617778973, - "learning_rate": 9.361767547261918e-06, - "loss": 0.0009, + "epoch": 1.7168, + "grad_norm": 0.004865458855715208, + "learning_rate": 8.161505639887818e-06, + "loss": 0.0001, "step": 1073 }, { - "epoch": 1.6077844311377245, - "grad_norm": 0.04451673559222652, - "learning_rate": 9.345632579846306e-06, - "loss": 0.0003, + "epoch": 1.7184, + "grad_norm": 0.015472810303115406, + "learning_rate": 8.144522461705067e-06, + "loss": 0.0001, "step": 1074 }, { - "epoch": 1.6092814371257484, - "grad_norm": 0.11115723113169731, - "learning_rate": 9.329499323138035e-06, - "loss": 0.0007, + "epoch": 1.72, + "grad_norm": 0.0913008252673765, + "learning_rate": 8.12754482425989e-06, + "loss": 0.0013, "step": 1075 }, { - "epoch": 1.6107784431137726, - "grad_norm": 0.046162578574199704, - "learning_rate": 9.313367819314136e-06, - "loss": 0.0005, + "epoch": 1.7216, + "grad_norm": 0.017356882464452193, + "learning_rate": 8.110572778250086e-06, + "loss": 0.0001, "step": 1076 }, { - "epoch": 1.6122754491017965, - "grad_norm": 0.04801631426459736, - "learning_rate": 9.297238110547075e-06, - "loss": 0.0006, + "epoch": 1.7231999999999998, + "grad_norm": 0.12843015296778934, + "learning_rate": 8.09360637435676e-06, + "loss": 0.0013, "step": 1077 }, { - "epoch": 1.6137724550898205, - "grad_norm": 0.042030541819221175, - "learning_rate": 9.281110239004607e-06, - "loss": 0.0006, + "epoch": 1.7248, + "grad_norm": 0.004656608275681594, + "learning_rate": 8.076645663244168e-06, + "loss": 0.0001, "step": 1078 }, { - "epoch": 1.6152694610778444, - "grad_norm": 0.04359620580995584, - "learning_rate": 9.264984246849699e-06, - "loss": 0.0008, + "epoch": 1.7264, + "grad_norm": 0.004636984115445992, + "learning_rate": 8.05969069555957e-06, + "loss": 0.0001, "step": 1079 }, { - "epoch": 1.6167664670658684, - "grad_norm": 0.12249935790486885, - "learning_rate": 9.2488601762404e-06, - "loss": 0.0012, + "epoch": 1.728, + "grad_norm": 0.011446814949245822, + "learning_rate": 8.042741521933071e-06, + "loss": 0.0001, "step": 1080 }, { - "epoch": 1.6182634730538923, - "grad_norm": 0.08869067151066333, - "learning_rate": 9.23273806932973e-06, - "loss": 0.0006, + "epoch": 1.7296, + "grad_norm": 0.005402631192077296, + "learning_rate": 8.025798192977482e-06, + "loss": 0.0001, "step": 1081 }, { - "epoch": 1.6197604790419162, - "grad_norm": 0.04713954076892878, - "learning_rate": 9.216617968265587e-06, - "loss": 0.0004, + "epoch": 1.7311999999999999, + "grad_norm": 0.025991095533090636, + "learning_rate": 8.008860759288148e-06, + "loss": 0.0002, "step": 1082 }, { - "epoch": 1.6212574850299402, - "grad_norm": 0.022606337617157864, - "learning_rate": 9.200499915190609e-06, + "epoch": 1.7328000000000001, + "grad_norm": 0.01830933422716461, + "learning_rate": 7.991929271442817e-06, "loss": 0.0002, "step": 1083 }, { - "epoch": 1.622754491017964, - "grad_norm": 0.044196202525303686, - "learning_rate": 9.184383952242095e-06, - "loss": 0.0004, + "epoch": 1.7344, + "grad_norm": 0.0021080792693244223, + "learning_rate": 7.975003780001486e-06, + "loss": 0.0001, "step": 1084 }, { - "epoch": 1.624251497005988, - "grad_norm": 0.04585085088431671, - "learning_rate": 9.16827012155187e-06, - "loss": 0.0003, + "epoch": 1.736, + "grad_norm": 0.013074921500412782, + "learning_rate": 7.958084335506239e-06, + "loss": 0.0001, "step": 1085 }, { - "epoch": 1.625748502994012, - "grad_norm": 0.1099988952217994, - "learning_rate": 9.152158465246192e-06, - "loss": 0.0012, + "epoch": 1.7376, + "grad_norm": 0.0079255343223726, + "learning_rate": 7.941170988481108e-06, + "loss": 0.0001, "step": 1086 }, { - "epoch": 1.627245508982036, - "grad_norm": 0.21437678680661543, - "learning_rate": 9.136049025445633e-06, - "loss": 0.0024, + "epoch": 1.7391999999999999, + "grad_norm": 0.004299069808370429, + "learning_rate": 7.924263789431913e-06, + "loss": 0.0001, "step": 1087 }, { - "epoch": 1.6287425149700598, - "grad_norm": 0.07403926272991478, - "learning_rate": 9.11994184426496e-06, - "loss": 0.0005, + "epoch": 1.7408000000000001, + "grad_norm": 0.0027531772000943576, + "learning_rate": 7.907362788846116e-06, + "loss": 0.0001, "step": 1088 }, { - "epoch": 1.6302395209580838, - "grad_norm": 0.059002757939724, - "learning_rate": 9.10383696381305e-06, - "loss": 0.0007, + "epoch": 1.7424, + "grad_norm": 0.007412058248624506, + "learning_rate": 7.89046803719267e-06, + "loss": 0.0001, "step": 1089 }, { - "epoch": 1.6317365269461077, - "grad_norm": 0.12675376681424935, - "learning_rate": 9.087734426192753e-06, - "loss": 0.001, + "epoch": 1.744, + "grad_norm": 0.014519619506843367, + "learning_rate": 7.873579584921869e-06, + "loss": 0.0001, "step": 1090 }, { - "epoch": 1.6332335329341316, - "grad_norm": 0.13626773883445795, - "learning_rate": 9.07163427350081e-06, - "loss": 0.0011, + "epoch": 1.7456, + "grad_norm": 0.005921967451427256, + "learning_rate": 7.856697482465195e-06, + "loss": 0.0001, "step": 1091 }, { - "epoch": 1.6347305389221556, - "grad_norm": 0.09619385565164779, - "learning_rate": 9.055536547827708e-06, - "loss": 0.0012, + "epoch": 1.7471999999999999, + "grad_norm": 0.007296203326684224, + "learning_rate": 7.839821780235168e-06, + "loss": 0.0001, "step": 1092 }, { - "epoch": 1.6362275449101795, - "grad_norm": 0.07077546546688278, - "learning_rate": 9.0394412912576e-06, - "loss": 0.0008, + "epoch": 1.7488000000000001, + "grad_norm": 0.005280268273475599, + "learning_rate": 7.822952528625192e-06, + "loss": 0.0001, "step": 1093 }, { - "epoch": 1.6377245508982035, - "grad_norm": 0.11742541713396401, - "learning_rate": 9.023348545868187e-06, - "loss": 0.0017, + "epoch": 1.7504, + "grad_norm": 0.004460160305069306, + "learning_rate": 7.806089778009421e-06, + "loss": 0.0001, "step": 1094 }, { - "epoch": 1.6392215568862274, - "grad_norm": 0.05392358126836917, - "learning_rate": 9.007258353730594e-06, - "loss": 0.0005, + "epoch": 1.752, + "grad_norm": 0.002065351387446571, + "learning_rate": 7.789233578742583e-06, + "loss": 0.0001, "step": 1095 }, { - "epoch": 1.6407185628742516, - "grad_norm": 0.05286986458798405, - "learning_rate": 8.99117075690928e-06, - "loss": 0.0004, + "epoch": 1.7536, + "grad_norm": 0.016340239360333648, + "learning_rate": 7.77238398115985e-06, + "loss": 0.0001, "step": 1096 }, { - "epoch": 1.6422155688622755, - "grad_norm": 0.1037025844104848, - "learning_rate": 8.975085797461915e-06, - "loss": 0.0009, + "epoch": 1.7551999999999999, + "grad_norm": 0.0057828584962989395, + "learning_rate": 7.755541035576677e-06, + "loss": 0.0001, "step": 1097 }, { - "epoch": 1.6437125748502994, - "grad_norm": 0.026409960690047918, - "learning_rate": 8.959003517439282e-06, - "loss": 0.0003, + "epoch": 1.7568000000000001, + "grad_norm": 0.0027678901722254182, + "learning_rate": 7.738704792288654e-06, + "loss": 0.0001, "step": 1098 }, { - "epoch": 1.6452095808383234, - "grad_norm": 0.06352856914344773, - "learning_rate": 8.942923958885142e-06, - "loss": 0.0003, + "epoch": 1.7584, + "grad_norm": 0.09580771102289547, + "learning_rate": 7.721875301571359e-06, + "loss": 0.0023, "step": 1099 }, { - "epoch": 1.6467065868263473, - "grad_norm": 0.061764806505025756, - "learning_rate": 8.92684716383616e-06, - "loss": 0.0008, + "epoch": 1.76, + "grad_norm": 0.0038614718887813606, + "learning_rate": 7.705052613680212e-06, + "loss": 0.0001, "step": 1100 }, { - "epoch": 1.6482035928143712, - "grad_norm": 0.05291688814800548, - "learning_rate": 8.910773174321765e-06, - "loss": 0.0007, + "epoch": 1.7616, + "grad_norm": 0.0023181148268299626, + "learning_rate": 7.688236778850307e-06, + "loss": 0.0, "step": 1101 }, { - "epoch": 1.6497005988023952, - "grad_norm": 0.07133556607963744, - "learning_rate": 8.894702032364053e-06, - "loss": 0.0011, + "epoch": 1.7631999999999999, + "grad_norm": 0.15826582745285653, + "learning_rate": 7.671427847296274e-06, + "loss": 0.0028, "step": 1102 }, { - "epoch": 1.6511976047904193, - "grad_norm": 0.09968542393075754, - "learning_rate": 8.878633779977686e-06, - "loss": 0.0019, + "epoch": 1.7648000000000001, + "grad_norm": 0.024941432115363694, + "learning_rate": 7.654625869212147e-06, + "loss": 0.0002, "step": 1103 }, { - "epoch": 1.6526946107784433, - "grad_norm": 0.03251881336576024, - "learning_rate": 8.862568459169755e-06, - "loss": 0.0003, + "epoch": 1.7664, + "grad_norm": 0.010491559643191137, + "learning_rate": 7.637830894771176e-06, + "loss": 0.0001, "step": 1104 }, { - "epoch": 1.6541916167664672, - "grad_norm": 0.06370247682649403, - "learning_rate": 8.846506111939699e-06, + "epoch": 1.768, + "grad_norm": 0.10207548229257392, + "learning_rate": 7.621042974125701e-06, "loss": 0.0005, "step": 1105 }, { - "epoch": 1.6556886227544911, - "grad_norm": 0.06213918678886638, - "learning_rate": 8.830446780279175e-06, - "loss": 0.0005, + "epoch": 1.7696, + "grad_norm": 0.005440385147590804, + "learning_rate": 7.604262157407008e-06, + "loss": 0.0001, "step": 1106 }, { - "epoch": 1.657185628742515, - "grad_norm": 0.10415475247431645, - "learning_rate": 8.814390506171967e-06, - "loss": 0.0013, + "epoch": 1.7711999999999999, + "grad_norm": 0.3752625791888844, + "learning_rate": 7.587488494725157e-06, + "loss": 0.0022, "step": 1107 }, { - "epoch": 1.658682634730539, - "grad_norm": 0.07744622474255668, - "learning_rate": 8.79833733159386e-06, - "loss": 0.002, + "epoch": 1.7728000000000002, + "grad_norm": 0.009505820387901138, + "learning_rate": 7.570722036168855e-06, + "loss": 0.0001, "step": 1108 }, { - "epoch": 1.660179640718563, - "grad_norm": 0.04324172934252314, - "learning_rate": 8.78228729851253e-06, - "loss": 0.0004, + "epoch": 1.7744, + "grad_norm": 0.005630646707161456, + "learning_rate": 7.553962831805291e-06, + "loss": 0.0001, "step": 1109 }, { - "epoch": 1.6616766467065869, - "grad_norm": 0.1796309695730341, - "learning_rate": 8.766240448887454e-06, - "loss": 0.002, + "epoch": 1.776, + "grad_norm": 0.0027598416824326363, + "learning_rate": 7.537210931679988e-06, + "loss": 0.0001, "step": 1110 }, { - "epoch": 1.6631736526946108, - "grad_norm": 0.049210623724537356, - "learning_rate": 8.750196824669765e-06, - "loss": 0.0008, + "epoch": 1.7776, + "grad_norm": 0.07417331973262634, + "learning_rate": 7.520466385816672e-06, + "loss": 0.0004, "step": 1111 }, { - "epoch": 1.6646706586826348, - "grad_norm": 0.06374661903865328, - "learning_rate": 8.73415646780219e-06, - "loss": 0.0005, + "epoch": 1.7792, + "grad_norm": 0.07266576078156141, + "learning_rate": 7.5037292442170865e-06, + "loss": 0.0004, "step": 1112 }, { - "epoch": 1.6661676646706587, - "grad_norm": 0.0678542988687008, - "learning_rate": 8.718119420218894e-06, - "loss": 0.0006, + "epoch": 1.7808000000000002, + "grad_norm": 0.1274908087744277, + "learning_rate": 7.48699955686089e-06, + "loss": 0.0021, "step": 1113 }, { - "epoch": 1.6676646706586826, - "grad_norm": 0.05131894267995365, - "learning_rate": 8.702085723845402e-06, - "loss": 0.0005, + "epoch": 1.7824, + "grad_norm": 0.19971373425751254, + "learning_rate": 7.470277373705461e-06, + "loss": 0.0012, "step": 1114 }, { - "epoch": 1.6691616766467066, - "grad_norm": 0.11138493277856698, - "learning_rate": 8.686055420598474e-06, - "loss": 0.0011, + "epoch": 1.784, + "grad_norm": 0.027723381814621518, + "learning_rate": 7.453562744685779e-06, + "loss": 0.0001, "step": 1115 }, { - "epoch": 1.6706586826347305, - "grad_norm": 0.04388393803892101, - "learning_rate": 8.670028552385994e-06, - "loss": 0.0004, + "epoch": 1.7856, + "grad_norm": 0.0185271416119447, + "learning_rate": 7.4368557197142596e-06, + "loss": 0.0002, "step": 1116 }, { - "epoch": 1.6721556886227544, - "grad_norm": 0.11972182187525221, - "learning_rate": 8.654005161106877e-06, - "loss": 0.0011, + "epoch": 1.7872, + "grad_norm": 0.021960034189195798, + "learning_rate": 7.420156348680621e-06, + "loss": 0.0001, "step": 1117 }, { - "epoch": 1.6736526946107784, - "grad_norm": 0.126003813560071, - "learning_rate": 8.637985288650939e-06, - "loss": 0.0009, + "epoch": 1.7888, + "grad_norm": 0.10064763080851377, + "learning_rate": 7.4034646814517155e-06, + "loss": 0.0004, "step": 1118 }, { - "epoch": 1.6751497005988023, - "grad_norm": 0.10943077524662276, - "learning_rate": 8.621968976898804e-06, - "loss": 0.0019, + "epoch": 1.7904, + "grad_norm": 0.004508512269411402, + "learning_rate": 7.3867807678713965e-06, + "loss": 0.0001, "step": 1119 }, { - "epoch": 1.6766467065868262, - "grad_norm": 0.06513537092099632, - "learning_rate": 8.605956267721778e-06, - "loss": 0.0007, + "epoch": 1.792, + "grad_norm": 0.027868998430493137, + "learning_rate": 7.3701046577603605e-06, + "loss": 0.0001, "step": 1120 }, { - "epoch": 1.6781437125748502, - "grad_norm": 0.13419673873932153, - "learning_rate": 8.589947202981755e-06, - "loss": 0.0008, + "epoch": 1.7936, + "grad_norm": 0.01681347007094859, + "learning_rate": 7.353436400916006e-06, + "loss": 0.0001, "step": 1121 }, { - "epoch": 1.6796407185628741, - "grad_norm": 0.04079284884695222, - "learning_rate": 8.573941824531109e-06, - "loss": 0.0003, + "epoch": 1.7952, + "grad_norm": 0.03894472881247136, + "learning_rate": 7.336776047112277e-06, + "loss": 0.0002, "step": 1122 }, { - "epoch": 1.681137724550898, - "grad_norm": 0.0368419755710961, - "learning_rate": 8.557940174212555e-06, - "loss": 0.0003, + "epoch": 1.7968, + "grad_norm": 0.101783856638485, + "learning_rate": 7.32012364609952e-06, + "loss": 0.0016, "step": 1123 }, { - "epoch": 1.6826347305389222, - "grad_norm": 0.11915015080619738, - "learning_rate": 8.541942293859086e-06, - "loss": 0.0015, + "epoch": 1.7984, + "grad_norm": 0.002825888683908071, + "learning_rate": 7.303479247604333e-06, + "loss": 0.0001, "step": 1124 }, { - "epoch": 1.6841317365269461, - "grad_norm": 0.11280338061106095, - "learning_rate": 8.52594822529382e-06, - "loss": 0.001, + "epoch": 1.8, + "grad_norm": 0.006401141464077191, + "learning_rate": 7.286842901329413e-06, + "loss": 0.0001, "step": 1125 }, { - "epoch": 1.68562874251497, - "grad_norm": 0.11129477072458856, - "learning_rate": 8.509958010329931e-06, - "loss": 0.0009, + "epoch": 1.8016, + "grad_norm": 0.05910438388349573, + "learning_rate": 7.270214656953415e-06, + "loss": 0.0003, "step": 1126 }, { - "epoch": 1.687125748502994, - "grad_norm": 0.11817747887791782, - "learning_rate": 8.49397169077049e-06, - "loss": 0.0011, + "epoch": 1.8032, + "grad_norm": 0.003062913108399674, + "learning_rate": 7.253594564130804e-06, + "loss": 0.0001, "step": 1127 }, { - "epoch": 1.688622754491018, - "grad_norm": 0.09555692890154081, - "learning_rate": 8.477989308408409e-06, - "loss": 0.0014, + "epoch": 1.8048, + "grad_norm": 0.015153952472531125, + "learning_rate": 7.236982672491699e-06, + "loss": 0.0001, "step": 1128 }, { - "epoch": 1.6901197604790419, - "grad_norm": 0.06104674622378712, - "learning_rate": 8.462010905026298e-06, - "loss": 0.0004, + "epoch": 1.8064, + "grad_norm": 0.011823113691139737, + "learning_rate": 7.22037903164173e-06, + "loss": 0.0001, "step": 1129 }, { - "epoch": 1.6916167664670658, - "grad_norm": 0.11401940154366785, - "learning_rate": 8.44603652239636e-06, - "loss": 0.0009, + "epoch": 1.808, + "grad_norm": 0.003639482658779054, + "learning_rate": 7.203783691161883e-06, + "loss": 0.0001, "step": 1130 }, { - "epoch": 1.69311377245509, - "grad_norm": 0.1359921086438187, - "learning_rate": 8.430066202280302e-06, - "loss": 0.0009, + "epoch": 1.8096, + "grad_norm": 0.04199695493257313, + "learning_rate": 7.187196700608373e-06, + "loss": 0.0002, "step": 1131 }, { - "epoch": 1.694610778443114, - "grad_norm": 0.07677903643974247, - "learning_rate": 8.414099986429188e-06, - "loss": 0.0005, + "epoch": 1.8112, + "grad_norm": 0.16656344026191952, + "learning_rate": 7.170618109512465e-06, + "loss": 0.0014, "step": 1132 }, { - "epoch": 1.6961077844311379, - "grad_norm": 0.04839849020100479, - "learning_rate": 8.398137916583371e-06, - "loss": 0.0003, + "epoch": 1.8128, + "grad_norm": 0.017200741298937835, + "learning_rate": 7.154047967380353e-06, + "loss": 0.0001, "step": 1133 }, { - "epoch": 1.6976047904191618, - "grad_norm": 0.12381943481528089, - "learning_rate": 8.382180034472353e-06, - "loss": 0.0008, + "epoch": 1.8144, + "grad_norm": 0.015394194260956718, + "learning_rate": 7.137486323692994e-06, + "loss": 0.0001, "step": 1134 }, { - "epoch": 1.6991017964071857, - "grad_norm": 0.07469322717802963, - "learning_rate": 8.366226381814698e-06, - "loss": 0.0007, + "epoch": 1.8159999999999998, + "grad_norm": 0.0393247070778465, + "learning_rate": 7.120933227905971e-06, + "loss": 0.0002, "step": 1135 }, { - "epoch": 1.7005988023952097, - "grad_norm": 0.06504097124135295, - "learning_rate": 8.350277000317911e-06, - "loss": 0.0006, + "epoch": 1.8176, + "grad_norm": 0.004208374779893613, + "learning_rate": 7.104388729449338e-06, + "loss": 0.0001, "step": 1136 }, { - "epoch": 1.7020958083832336, - "grad_norm": 0.05289705787630766, - "learning_rate": 8.334331931678317e-06, - "loss": 0.0005, + "epoch": 1.8192, + "grad_norm": 0.004585551477784112, + "learning_rate": 7.0878528777274814e-06, + "loss": 0.0001, "step": 1137 }, { - "epoch": 1.7035928143712575, - "grad_norm": 0.08661338942838477, - "learning_rate": 8.31839121758099e-06, - "loss": 0.0015, + "epoch": 1.8208, + "grad_norm": 0.14808830235231965, + "learning_rate": 7.0713257221189635e-06, + "loss": 0.0008, "step": 1138 }, { - "epoch": 1.7050898203592815, - "grad_norm": 0.08767555555868468, - "learning_rate": 8.302454899699597e-06, - "loss": 0.0006, + "epoch": 1.8224, + "grad_norm": 0.1436161753324329, + "learning_rate": 7.05480731197638e-06, + "loss": 0.0004, "step": 1139 }, { - "epoch": 1.7065868263473054, - "grad_norm": 0.09256232285744476, - "learning_rate": 8.286523019696327e-06, - "loss": 0.001, + "epoch": 1.8239999999999998, + "grad_norm": 0.0030378585652498986, + "learning_rate": 7.0382976966262065e-06, + "loss": 0.0, "step": 1140 }, { - "epoch": 1.7080838323353293, - "grad_norm": 0.053608939965644864, - "learning_rate": 8.270595619221762e-06, - "loss": 0.0005, + "epoch": 1.8256000000000001, + "grad_norm": 0.006585475630816729, + "learning_rate": 7.021796925368667e-06, + "loss": 0.0001, "step": 1141 }, { - "epoch": 1.7095808383233533, - "grad_norm": 0.03018588325618429, - "learning_rate": 8.254672739914771e-06, - "loss": 0.0005, + "epoch": 1.8272, + "grad_norm": 0.22474168720057988, + "learning_rate": 7.005305047477566e-06, + "loss": 0.0029, "step": 1142 }, { - "epoch": 1.7110778443113772, - "grad_norm": 0.01313607644598902, - "learning_rate": 8.238754423402411e-06, + "epoch": 1.8288, + "grad_norm": 0.0301613637954848, + "learning_rate": 6.988822112200157e-06, "loss": 0.0001, "step": 1143 }, { - "epoch": 1.7125748502994012, - "grad_norm": 0.12588799971374123, - "learning_rate": 8.222840711299798e-06, - "loss": 0.0008, + "epoch": 1.8304, + "grad_norm": 0.004656344661469704, + "learning_rate": 6.9723481687569836e-06, + "loss": 0.0001, "step": 1144 }, { - "epoch": 1.714071856287425, - "grad_norm": 0.08395380803153646, - "learning_rate": 8.206931645210025e-06, - "loss": 0.0007, + "epoch": 1.8319999999999999, + "grad_norm": 0.020491245770373548, + "learning_rate": 6.955883266341741e-06, + "loss": 0.0001, "step": 1145 }, { - "epoch": 1.715568862275449, - "grad_norm": 0.05648989201497437, - "learning_rate": 8.191027266724026e-06, - "loss": 0.0005, + "epoch": 1.8336000000000001, + "grad_norm": 0.0033176458831527247, + "learning_rate": 6.939427454121128e-06, + "loss": 0.0001, "step": 1146 }, { - "epoch": 1.717065868263473, - "grad_norm": 0.299079726997726, - "learning_rate": 8.175127617420489e-06, - "loss": 0.0038, + "epoch": 1.8352, + "grad_norm": 0.0012845644986009292, + "learning_rate": 6.9229807812346985e-06, + "loss": 0.0, "step": 1147 }, { - "epoch": 1.718562874251497, - "grad_norm": 0.2211219838190909, - "learning_rate": 8.15923273886574e-06, - "loss": 0.0017, + "epoch": 1.8368, + "grad_norm": 0.004031855277477734, + "learning_rate": 6.9065432967947145e-06, + "loss": 0.0001, "step": 1148 }, { - "epoch": 1.7200598802395208, - "grad_norm": 0.13423753642877773, - "learning_rate": 8.14334267261362e-06, - "loss": 0.0013, + "epoch": 1.8384, + "grad_norm": 0.10942866351129674, + "learning_rate": 6.890115049885995e-06, + "loss": 0.001, "step": 1149 }, { - "epoch": 1.7215568862275448, - "grad_norm": 0.021323410927144678, - "learning_rate": 8.127457460205403e-06, - "loss": 0.0002, + "epoch": 1.8399999999999999, + "grad_norm": 0.034646498478789645, + "learning_rate": 6.8736960895657854e-06, + "loss": 0.0001, "step": 1150 }, { - "epoch": 1.7230538922155687, - "grad_norm": 0.10537068848194503, - "learning_rate": 8.111577143169667e-06, - "loss": 0.0009, + "epoch": 1.8416000000000001, + "grad_norm": 0.004328068081161076, + "learning_rate": 6.85728646486359e-06, + "loss": 0.0001, "step": 1151 }, { - "epoch": 1.7245508982035929, - "grad_norm": 0.16469004106932947, - "learning_rate": 8.095701763022197e-06, - "loss": 0.0013, + "epoch": 1.8432, + "grad_norm": 0.0045096309471346265, + "learning_rate": 6.840886224781039e-06, + "loss": 0.0001, "step": 1152 }, { - "epoch": 1.7260479041916168, - "grad_norm": 0.17092342034188415, - "learning_rate": 8.079831361265862e-06, - "loss": 0.0017, + "epoch": 1.8448, + "grad_norm": 0.14523052171148082, + "learning_rate": 6.824495418291741e-06, + "loss": 0.0007, "step": 1153 }, { - "epoch": 1.7275449101796407, - "grad_norm": 0.06779177154424461, - "learning_rate": 8.063965979390526e-06, - "loss": 0.0006, + "epoch": 1.8464, + "grad_norm": 0.00296018643526749, + "learning_rate": 6.8081140943411296e-06, + "loss": 0.0001, "step": 1154 }, { - "epoch": 1.7290419161676647, - "grad_norm": 0.1236163086259948, - "learning_rate": 8.048105658872928e-06, - "loss": 0.0019, + "epoch": 1.8479999999999999, + "grad_norm": 0.03137236634318416, + "learning_rate": 6.791742301846325e-06, + "loss": 0.0002, "step": 1155 }, { - "epoch": 1.7305389221556886, - "grad_norm": 0.12610975535688002, - "learning_rate": 8.032250441176566e-06, - "loss": 0.0028, + "epoch": 1.8496000000000001, + "grad_norm": 0.0012996113765978107, + "learning_rate": 6.775380089695986e-06, + "loss": 0.0, "step": 1156 }, { - "epoch": 1.7320359281437125, - "grad_norm": 0.18941444937914406, - "learning_rate": 8.01640036775161e-06, - "loss": 0.0012, + "epoch": 1.8512, + "grad_norm": 0.004411509861164856, + "learning_rate": 6.759027506750159e-06, + "loss": 0.0001, "step": 1157 }, { - "epoch": 1.7335329341317365, - "grad_norm": 0.0831881723747311, - "learning_rate": 8.000555480034774e-06, - "loss": 0.0006, + "epoch": 1.8528, + "grad_norm": 0.014057183121189027, + "learning_rate": 6.742684601840142e-06, + "loss": 0.0001, "step": 1158 }, { - "epoch": 1.7350299401197606, - "grad_norm": 0.11838944511546944, - "learning_rate": 7.984715819449219e-06, - "loss": 0.0009, + "epoch": 1.8544, + "grad_norm": 0.0038426129938542576, + "learning_rate": 6.726351423768323e-06, + "loss": 0.0001, "step": 1159 }, { - "epoch": 1.7365269461077846, - "grad_norm": 0.03888943100450595, - "learning_rate": 7.968881427404433e-06, - "loss": 0.0004, + "epoch": 1.8559999999999999, + "grad_norm": 0.03527688711230369, + "learning_rate": 6.710028021308061e-06, + "loss": 0.0001, "step": 1160 }, { - "epoch": 1.7380239520958085, - "grad_norm": 0.04784679025928149, - "learning_rate": 7.953052345296141e-06, - "loss": 0.0004, + "epoch": 1.8576000000000001, + "grad_norm": 0.014309876719334954, + "learning_rate": 6.693714443203507e-06, + "loss": 0.0001, "step": 1161 }, { - "epoch": 1.7395209580838324, - "grad_norm": 0.09465522756842437, - "learning_rate": 7.937228614506183e-06, - "loss": 0.0011, + "epoch": 1.8592, + "grad_norm": 0.003078997197109562, + "learning_rate": 6.677410738169485e-06, + "loss": 0.0, "step": 1162 }, { - "epoch": 1.7410179640718564, - "grad_norm": 0.02839056527145619, - "learning_rate": 7.921410276402403e-06, - "loss": 0.0002, + "epoch": 1.8608, + "grad_norm": 0.08391880562324879, + "learning_rate": 6.661116954891329e-06, + "loss": 0.0004, "step": 1163 }, { - "epoch": 1.7425149700598803, - "grad_norm": 0.11897825771964533, - "learning_rate": 7.905597372338558e-06, - "loss": 0.0015, + "epoch": 1.8624, + "grad_norm": 0.01101824013877054, + "learning_rate": 6.644833142024752e-06, + "loss": 0.0001, "step": 1164 }, { - "epoch": 1.7440119760479043, - "grad_norm": 0.13433046182833272, - "learning_rate": 7.889789943654185e-06, - "loss": 0.0014, + "epoch": 1.8639999999999999, + "grad_norm": 0.002168915003192342, + "learning_rate": 6.62855934819569e-06, + "loss": 0.0, "step": 1165 }, { - "epoch": 1.7455089820359282, - "grad_norm": 0.042192664781619316, - "learning_rate": 7.87398803167452e-06, - "loss": 0.0003, + "epoch": 1.8656000000000001, + "grad_norm": 0.03937365515123593, + "learning_rate": 6.612295622000162e-06, + "loss": 0.0002, "step": 1166 }, { - "epoch": 1.7470059880239521, - "grad_norm": 0.09786003136143633, - "learning_rate": 7.858191677710364e-06, - "loss": 0.0018, + "epoch": 1.8672, + "grad_norm": 0.005212065805285967, + "learning_rate": 6.59604201200412e-06, + "loss": 0.0001, "step": 1167 }, { - "epoch": 1.748502994011976, - "grad_norm": 0.1278807783300935, - "learning_rate": 7.842400923058e-06, - "loss": 0.0012, + "epoch": 1.8688, + "grad_norm": 0.009421370870975533, + "learning_rate": 6.579798566743314e-06, + "loss": 0.0001, "step": 1168 }, { - "epoch": 1.75, - "grad_norm": 0.18490520684949707, - "learning_rate": 7.826615808999066e-06, - "loss": 0.0015, + "epoch": 1.8704, + "grad_norm": 0.037703987007789874, + "learning_rate": 6.563565334723134e-06, + "loss": 0.0002, "step": 1169 }, { - "epoch": 1.751497005988024, - "grad_norm": 0.10074807032478746, - "learning_rate": 7.810836376800455e-06, - "loss": 0.0007, + "epoch": 1.8719999999999999, + "grad_norm": 0.01482398063639901, + "learning_rate": 6.547342364418482e-06, + "loss": 0.0001, "step": 1170 }, { - "epoch": 1.7529940119760479, - "grad_norm": 0.06251541594987227, - "learning_rate": 7.795062667714211e-06, - "loss": 0.0005, + "epoch": 1.8736000000000002, + "grad_norm": 0.008285397672775559, + "learning_rate": 6.5311297042736046e-06, + "loss": 0.0001, "step": 1171 }, { - "epoch": 1.7544910179640718, - "grad_norm": 0.05781097111200272, - "learning_rate": 7.7792947229774e-06, - "loss": 0.0002, + "epoch": 1.8752, + "grad_norm": 0.0020608071734066684, + "learning_rate": 6.514927402701965e-06, + "loss": 0.0001, "step": 1172 }, { - "epoch": 1.7559880239520957, - "grad_norm": 0.2185198392929054, - "learning_rate": 7.763532583812042e-06, - "loss": 0.0018, + "epoch": 1.8768, + "grad_norm": 0.004513862742587086, + "learning_rate": 6.498735508086094e-06, + "loss": 0.0001, "step": 1173 }, { - "epoch": 1.7574850299401197, - "grad_norm": 0.064144359442749, - "learning_rate": 7.747776291424955e-06, - "loss": 0.0004, + "epoch": 1.8784, + "grad_norm": 0.0009014925985797817, + "learning_rate": 6.482554068777451e-06, + "loss": 0.0, "step": 1174 }, { - "epoch": 1.7589820359281436, - "grad_norm": 0.08498960587781633, - "learning_rate": 7.732025887007693e-06, - "loss": 0.0008, + "epoch": 1.88, + "grad_norm": 0.002032352895805396, + "learning_rate": 6.466383133096268e-06, + "loss": 0.0, "step": 1175 }, { - "epoch": 1.7604790419161676, - "grad_norm": 0.1922473098948509, - "learning_rate": 7.716281411736411e-06, - "loss": 0.0016, + "epoch": 1.8816000000000002, + "grad_norm": 0.0022593537150606435, + "learning_rate": 6.450222749331414e-06, + "loss": 0.0001, "step": 1176 }, { - "epoch": 1.7619760479041915, - "grad_norm": 0.0661949673802176, - "learning_rate": 7.700542906771747e-06, - "loss": 0.0004, + "epoch": 1.8832, + "grad_norm": 0.0033082404611576863, + "learning_rate": 6.4340729657402424e-06, + "loss": 0.0, "step": 1177 }, { - "epoch": 1.7634730538922154, - "grad_norm": 0.10775727570874787, - "learning_rate": 7.684810413258756e-06, - "loss": 0.0015, + "epoch": 1.8848, + "grad_norm": 0.0030771403624746036, + "learning_rate": 6.4179338305484675e-06, + "loss": 0.0001, "step": 1178 }, { - "epoch": 1.7649700598802394, - "grad_norm": 0.041135301287911916, - "learning_rate": 7.669083972326761e-06, - "loss": 0.0004, + "epoch": 1.8864, + "grad_norm": 0.009938184338915211, + "learning_rate": 6.40180539194999e-06, + "loss": 0.0001, "step": 1179 }, { - "epoch": 1.7664670658682635, - "grad_norm": 0.07967634346867503, - "learning_rate": 7.653363625089272e-06, - "loss": 0.0009, + "epoch": 1.888, + "grad_norm": 0.045496591526426336, + "learning_rate": 6.385687698106781e-06, + "loss": 0.0002, "step": 1180 }, { - "epoch": 1.7679640718562875, - "grad_norm": 0.06632082900120188, - "learning_rate": 7.637649412643852e-06, - "loss": 0.0015, + "epoch": 1.8896, + "grad_norm": 0.011906474987920825, + "learning_rate": 6.3695807971487175e-06, + "loss": 0.0001, "step": 1181 }, { - "epoch": 1.7694610778443114, - "grad_norm": 0.0597435675787796, - "learning_rate": 7.621941376072045e-06, - "loss": 0.0005, + "epoch": 1.8912, + "grad_norm": 0.0009566349700188584, + "learning_rate": 6.35348473717345e-06, + "loss": 0.0, "step": 1182 }, { - "epoch": 1.7709580838323353, - "grad_norm": 0.10680414994888268, - "learning_rate": 7.606239556439243e-06, - "loss": 0.0008, + "epoch": 1.8928, + "grad_norm": 0.024164789957125666, + "learning_rate": 6.337399566246257e-06, + "loss": 0.0001, "step": 1183 }, { - "epoch": 1.7724550898203593, - "grad_norm": 0.08131544743100391, - "learning_rate": 7.590543994794575e-06, - "loss": 0.0005, + "epoch": 1.8944, + "grad_norm": 0.007994688357821257, + "learning_rate": 6.321325332399904e-06, + "loss": 0.0001, "step": 1184 }, { - "epoch": 1.7739520958083832, - "grad_norm": 0.14796670864528785, - "learning_rate": 7.574854732170824e-06, - "loss": 0.0011, + "epoch": 1.896, + "grad_norm": 0.004354701970615443, + "learning_rate": 6.305262083634488e-06, + "loss": 0.0001, "step": 1185 }, { - "epoch": 1.7754491017964071, - "grad_norm": 0.05808741649463366, - "learning_rate": 7.559171809584298e-06, - "loss": 0.0004, + "epoch": 1.8976, + "grad_norm": 0.09316375812988109, + "learning_rate": 6.289209867917312e-06, + "loss": 0.0026, "step": 1186 }, { - "epoch": 1.7769461077844313, - "grad_norm": 0.0791406942247839, - "learning_rate": 7.543495268034735e-06, - "loss": 0.0009, + "epoch": 1.8992, + "grad_norm": 0.00831804802774526, + "learning_rate": 6.2731687331827214e-06, + "loss": 0.0001, "step": 1187 }, { - "epoch": 1.7784431137724552, - "grad_norm": 0.047886021234621046, - "learning_rate": 7.527825148505181e-06, - "loss": 0.0003, + "epoch": 1.9008, + "grad_norm": 0.0043503339187173605, + "learning_rate": 6.2571387273319905e-06, + "loss": 0.0001, "step": 1188 }, { - "epoch": 1.7799401197604792, - "grad_norm": 0.1335212254194146, - "learning_rate": 7.512161491961903e-06, - "loss": 0.001, + "epoch": 1.9024, + "grad_norm": 0.15915503150998686, + "learning_rate": 6.2411198982331435e-06, + "loss": 0.0005, "step": 1189 }, { - "epoch": 1.781437125748503, - "grad_norm": 0.03913458823023904, - "learning_rate": 7.496504339354274e-06, - "loss": 0.0006, + "epoch": 1.904, + "grad_norm": 0.005591245871832945, + "learning_rate": 6.225112293720836e-06, + "loss": 0.0001, "step": 1190 }, { - "epoch": 1.782934131736527, - "grad_norm": 0.08672922100866931, - "learning_rate": 7.48085373161465e-06, - "loss": 0.0011, + "epoch": 1.9056, + "grad_norm": 0.04492041883237681, + "learning_rate": 6.209115961596208e-06, + "loss": 0.0003, "step": 1191 }, { - "epoch": 1.784431137724551, - "grad_norm": 0.11931210383133138, - "learning_rate": 7.465209709658294e-06, - "loss": 0.0011, + "epoch": 1.9072, + "grad_norm": 0.06192810990131338, + "learning_rate": 6.193130949626731e-06, + "loss": 0.0003, "step": 1192 }, { - "epoch": 1.785928143712575, - "grad_norm": 0.05929701873982781, - "learning_rate": 7.449572314383237e-06, - "loss": 0.0006, + "epoch": 1.9088, + "grad_norm": 0.008427562962625855, + "learning_rate": 6.177157305546077e-06, + "loss": 0.0001, "step": 1193 }, { - "epoch": 1.7874251497005988, - "grad_norm": 0.04505152324525866, - "learning_rate": 7.4339415866701945e-06, - "loss": 0.0003, + "epoch": 1.9104, + "grad_norm": 0.007826368970274592, + "learning_rate": 6.1611950770539766e-06, + "loss": 0.0001, "step": 1194 }, { - "epoch": 1.7889221556886228, - "grad_norm": 0.12017810117570427, - "learning_rate": 7.418317567382447e-06, - "loss": 0.0008, + "epoch": 1.912, + "grad_norm": 0.06847520283496732, + "learning_rate": 6.145244311816063e-06, + "loss": 0.0002, "step": 1195 }, { - "epoch": 1.7904191616766467, - "grad_norm": 0.09158715938134543, - "learning_rate": 7.402700297365741e-06, - "loss": 0.0007, + "epoch": 1.9136, + "grad_norm": 0.0173206162455603, + "learning_rate": 6.129305057463741e-06, + "loss": 0.0001, "step": 1196 }, { - "epoch": 1.7919161676646707, - "grad_norm": 0.1014983459541966, - "learning_rate": 7.38708981744818e-06, - "loss": 0.0006, + "epoch": 1.9152, + "grad_norm": 0.001479780943036349, + "learning_rate": 6.113377361594048e-06, + "loss": 0.0, "step": 1197 }, { - "epoch": 1.7934131736526946, - "grad_norm": 0.08269083942405764, - "learning_rate": 7.3714861684401025e-06, - "loss": 0.0004, + "epoch": 1.9167999999999998, + "grad_norm": 0.12552314051053987, + "learning_rate": 6.0974612717695e-06, + "loss": 0.0005, "step": 1198 }, { - "epoch": 1.7949101796407185, - "grad_norm": 0.06644722617806384, - "learning_rate": 7.355889391134011e-06, - "loss": 0.0006, + "epoch": 1.9184, + "grad_norm": 0.0019474675781217055, + "learning_rate": 6.081556835517955e-06, + "loss": 0.0, "step": 1199 }, { - "epoch": 1.7964071856287425, - "grad_norm": 0.05523988911350223, - "learning_rate": 7.340299526304422e-06, - "loss": 0.0004, + "epoch": 1.92, + "grad_norm": 0.03905313180296663, + "learning_rate": 6.065664100332478e-06, + "loss": 0.0002, "step": 1200 }, { - "epoch": 1.7979041916167664, - "grad_norm": 0.08351184232628797, - "learning_rate": 7.324716614707794e-06, - "loss": 0.0009, + "epoch": 1.9216, + "grad_norm": 0.0015254580692129185, + "learning_rate": 6.049783113671184e-06, + "loss": 0.0, "step": 1201 }, { - "epoch": 1.7994011976047903, - "grad_norm": 0.006774670042731655, - "learning_rate": 7.309140697082402e-06, + "epoch": 1.9232, + "grad_norm": 0.004929310744791136, + "learning_rate": 6.033913922957112e-06, "loss": 0.0001, "step": 1202 }, { - "epoch": 1.8008982035928143, - "grad_norm": 0.04928550988537502, - "learning_rate": 7.293571814148241e-06, - "loss": 0.0003, + "epoch": 1.9247999999999998, + "grad_norm": 0.12635043606836968, + "learning_rate": 6.018056575578075e-06, + "loss": 0.0005, "step": 1203 }, { - "epoch": 1.8023952095808382, - "grad_norm": 0.06584370826534322, - "learning_rate": 7.2780100066069174e-06, - "loss": 0.0003, + "epoch": 1.9264000000000001, + "grad_norm": 0.0019222745708322344, + "learning_rate": 6.002211118886514e-06, + "loss": 0.0, "step": 1204 }, { - "epoch": 1.8038922155688621, - "grad_norm": 0.18345445829638807, - "learning_rate": 7.262455315141526e-06, - "loss": 0.001, + "epoch": 1.928, + "grad_norm": 0.014047020016825352, + "learning_rate": 5.986377600199371e-06, + "loss": 0.0001, "step": 1205 }, { - "epoch": 1.805389221556886, - "grad_norm": 0.07244929387533793, - "learning_rate": 7.246907780416574e-06, - "loss": 0.0005, + "epoch": 1.9296, + "grad_norm": 0.0031841005373336863, + "learning_rate": 5.970556066797941e-06, + "loss": 0.0, "step": 1206 }, { - "epoch": 1.80688622754491, - "grad_norm": 0.09817729337327456, - "learning_rate": 7.231367443077851e-06, - "loss": 0.0007, + "epoch": 1.9312, + "grad_norm": 0.0010223404299522473, + "learning_rate": 5.9547465659277215e-06, + "loss": 0.0, "step": 1207 }, { - "epoch": 1.8083832335329342, - "grad_norm": 0.05483140894713459, - "learning_rate": 7.215834343752337e-06, - "loss": 0.0004, + "epoch": 1.9327999999999999, + "grad_norm": 0.0035052486724118986, + "learning_rate": 5.93894914479828e-06, + "loss": 0.0, "step": 1208 }, { - "epoch": 1.8098802395209581, - "grad_norm": 0.03551972805679641, - "learning_rate": 7.2003085230480785e-06, - "loss": 0.0003, + "epoch": 1.9344000000000001, + "grad_norm": 0.002527851996072802, + "learning_rate": 5.923163850583114e-06, + "loss": 0.0001, "step": 1209 }, { - "epoch": 1.811377245508982, - "grad_norm": 0.06212947300716921, - "learning_rate": 7.184790021554102e-06, - "loss": 0.0004, + "epoch": 1.936, + "grad_norm": 0.013050221029306884, + "learning_rate": 5.907390730419506e-06, + "loss": 0.0001, "step": 1210 }, { - "epoch": 1.812874251497006, - "grad_norm": 0.11663130238724655, - "learning_rate": 7.169278879840304e-06, - "loss": 0.001, + "epoch": 1.9376, + "grad_norm": 0.08448027001276927, + "learning_rate": 5.891629831408392e-06, + "loss": 0.0003, "step": 1211 }, { - "epoch": 1.81437125748503, - "grad_norm": 0.1655611404625784, - "learning_rate": 7.153775138457326e-06, - "loss": 0.0014, + "epoch": 1.9392, + "grad_norm": 0.0021380535708301327, + "learning_rate": 5.875881200614208e-06, + "loss": 0.0, "step": 1212 }, { - "epoch": 1.8158682634730539, - "grad_norm": 0.016811102246245015, - "learning_rate": 7.138278837936475e-06, + "epoch": 1.9407999999999999, + "grad_norm": 0.004995336312321058, + "learning_rate": 5.8601448850647515e-06, "loss": 0.0001, "step": 1213 }, { - "epoch": 1.8173652694610778, - "grad_norm": 0.08174583277697046, - "learning_rate": 7.122790018789597e-06, - "loss": 0.0008, + "epoch": 1.9424000000000001, + "grad_norm": 0.008996039230761935, + "learning_rate": 5.8444209317510515e-06, + "loss": 0.0001, "step": 1214 }, { - "epoch": 1.818862275449102, - "grad_norm": 0.09229932047043227, - "learning_rate": 7.107308721508992e-06, - "loss": 0.0005, + "epoch": 1.944, + "grad_norm": 0.004159545890870004, + "learning_rate": 5.828709387627219e-06, + "loss": 0.0001, "step": 1215 }, { - "epoch": 1.8203592814371259, - "grad_norm": 0.05703431462629337, - "learning_rate": 7.091834986567278e-06, - "loss": 0.0003, + "epoch": 1.9456, + "grad_norm": 0.02845200728681492, + "learning_rate": 5.813010299610313e-06, + "loss": 0.0001, "step": 1216 }, { - "epoch": 1.8218562874251498, - "grad_norm": 0.045543500277733254, - "learning_rate": 7.076368854417319e-06, - "loss": 0.0004, + "epoch": 1.9472, + "grad_norm": 0.10556405252712346, + "learning_rate": 5.797323714580192e-06, + "loss": 0.0005, "step": 1217 }, { - "epoch": 1.8233532934131738, - "grad_norm": 0.04621091298397737, - "learning_rate": 7.0609103654920955e-06, - "loss": 0.0006, + "epoch": 1.9487999999999999, + "grad_norm": 0.05178852241859, + "learning_rate": 5.781649679379379e-06, + "loss": 0.0003, "step": 1218 }, { - "epoch": 1.8248502994011977, - "grad_norm": 0.12194672605605335, - "learning_rate": 7.045459560204604e-06, - "loss": 0.0008, + "epoch": 1.9504000000000001, + "grad_norm": 0.0019259899274537585, + "learning_rate": 5.7659882408129204e-06, + "loss": 0.0, "step": 1219 }, { - "epoch": 1.8263473053892216, - "grad_norm": 0.04569140798030763, - "learning_rate": 7.030016478947763e-06, - "loss": 0.0003, + "epoch": 1.952, + "grad_norm": 0.16739476698823094, + "learning_rate": 5.750339445648252e-06, + "loss": 0.0013, "step": 1220 }, { - "epoch": 1.8278443113772456, - "grad_norm": 0.09081360546587813, - "learning_rate": 7.014581162094284e-06, - "loss": 0.0003, + "epoch": 1.9536, + "grad_norm": 0.03504969820142291, + "learning_rate": 5.7347033406150494e-06, + "loss": 0.0001, "step": 1221 }, { - "epoch": 1.8293413173652695, - "grad_norm": 0.07508995179575473, - "learning_rate": 6.999153649996595e-06, - "loss": 0.0007, + "epoch": 1.9552, + "grad_norm": 0.029114280813010592, + "learning_rate": 5.7190799724050924e-06, + "loss": 0.0001, "step": 1222 }, { - "epoch": 1.8308383233532934, - "grad_norm": 0.0236647586499013, - "learning_rate": 6.983733982986709e-06, - "loss": 0.0002, + "epoch": 1.9567999999999999, + "grad_norm": 0.007452639771015537, + "learning_rate": 5.703469387672138e-06, + "loss": 0.0001, "step": 1223 }, { - "epoch": 1.8323353293413174, - "grad_norm": 0.0348458963052325, - "learning_rate": 6.968322201376136e-06, - "loss": 0.0003, + "epoch": 1.9584000000000001, + "grad_norm": 0.049837640333634387, + "learning_rate": 5.687871633031754e-06, + "loss": 0.0002, "step": 1224 }, { - "epoch": 1.8338323353293413, - "grad_norm": 0.06740266996280521, - "learning_rate": 6.952918345455772e-06, - "loss": 0.0005, + "epoch": 1.96, + "grad_norm": 0.0024424688157115458, + "learning_rate": 5.672286755061212e-06, + "loss": 0.0, "step": 1225 }, { - "epoch": 1.8353293413173652, - "grad_norm": 0.1410919073179155, - "learning_rate": 6.937522455495787e-06, - "loss": 0.0025, + "epoch": 1.9616, + "grad_norm": 0.014652874845066959, + "learning_rate": 5.656714800299317e-06, + "loss": 0.0001, "step": 1226 }, { - "epoch": 1.8368263473053892, - "grad_norm": 0.050967621048530726, - "learning_rate": 6.92213457174553e-06, - "loss": 0.0003, + "epoch": 1.9632, + "grad_norm": 0.005720054051263878, + "learning_rate": 5.64115581524629e-06, + "loss": 0.0001, "step": 1227 }, { - "epoch": 1.8383233532934131, - "grad_norm": 0.054601006610630415, - "learning_rate": 6.906754734433414e-06, - "loss": 0.0006, + "epoch": 1.9647999999999999, + "grad_norm": 0.03706288619215235, + "learning_rate": 5.625609846363622e-06, + "loss": 0.0002, "step": 1228 }, { - "epoch": 1.839820359281437, - "grad_norm": 0.12698206601810638, - "learning_rate": 6.891382983766828e-06, - "loss": 0.0013, + "epoch": 1.9664000000000001, + "grad_norm": 0.003003921877286788, + "learning_rate": 5.610076940073939e-06, + "loss": 0.0001, "step": 1229 }, { - "epoch": 1.841317365269461, - "grad_norm": 0.08636527463657546, - "learning_rate": 6.876019359932004e-06, - "loss": 0.0012, + "epoch": 1.968, + "grad_norm": 0.0028651414160785287, + "learning_rate": 5.594557142760853e-06, + "loss": 0.0001, "step": 1230 }, { - "epoch": 1.842814371257485, - "grad_norm": 0.05071079998230555, - "learning_rate": 6.860663903093943e-06, - "loss": 0.0003, + "epoch": 1.9696, + "grad_norm": 0.0027324425467368193, + "learning_rate": 5.579050500768837e-06, + "loss": 0.0, "step": 1231 }, { - "epoch": 1.8443113772455089, - "grad_norm": 0.018134041251772544, - "learning_rate": 6.8453166533962925e-06, + "epoch": 1.9712, + "grad_norm": 0.0024127431551241845, + "learning_rate": 5.563557060403071e-06, "loss": 0.0001, "step": 1232 }, { - "epoch": 1.8458083832335328, - "grad_norm": 0.10446778142724229, - "learning_rate": 6.829977650961229e-06, - "loss": 0.0016, + "epoch": 1.9727999999999999, + "grad_norm": 0.0026731181344807624, + "learning_rate": 5.548076867929331e-06, + "loss": 0.0001, "step": 1233 }, { - "epoch": 1.8473053892215567, - "grad_norm": 0.055625311823021856, - "learning_rate": 6.814646935889389e-06, - "loss": 0.0004, + "epoch": 1.9744000000000002, + "grad_norm": 0.018776996113024025, + "learning_rate": 5.53260996957381e-06, + "loss": 0.0001, "step": 1234 }, { - "epoch": 1.8488023952095807, - "grad_norm": 0.07096498187159628, - "learning_rate": 6.79932454825973e-06, - "loss": 0.0004, + "epoch": 1.976, + "grad_norm": 0.029136136943676304, + "learning_rate": 5.517156411523026e-06, + "loss": 0.0001, "step": 1235 }, { - "epoch": 1.8502994011976048, - "grad_norm": 0.07863020430436107, - "learning_rate": 6.7840105281294485e-06, - "loss": 0.0004, + "epoch": 1.9776, + "grad_norm": 0.009965281255625782, + "learning_rate": 5.501716239923642e-06, + "loss": 0.0001, "step": 1236 }, { - "epoch": 1.8517964071856288, - "grad_norm": 0.044616260463696586, - "learning_rate": 6.768704915533852e-06, - "loss": 0.0004, + "epoch": 1.9792, + "grad_norm": 0.0016554955923781512, + "learning_rate": 5.486289500882355e-06, + "loss": 0.0, "step": 1237 }, { - "epoch": 1.8532934131736527, - "grad_norm": 0.12621641545858503, - "learning_rate": 6.753407750486284e-06, - "loss": 0.0017, + "epoch": 1.9808, + "grad_norm": 0.11607324819037852, + "learning_rate": 5.47087624046575e-06, + "loss": 0.0007, "step": 1238 }, { - "epoch": 1.8547904191616766, - "grad_norm": 0.060596994339849504, - "learning_rate": 6.738119072977996e-06, - "loss": 0.0006, + "epoch": 1.9824000000000002, + "grad_norm": 0.055443812103154765, + "learning_rate": 5.455476504700161e-06, + "loss": 0.0004, "step": 1239 }, { - "epoch": 1.8562874251497006, - "grad_norm": 0.014807449760783308, - "learning_rate": 6.722838922978046e-06, - "loss": 0.0002, + "epoch": 1.984, + "grad_norm": 0.20593686476304568, + "learning_rate": 5.440090339571537e-06, + "loss": 0.0021, "step": 1240 }, { - "epoch": 1.8577844311377245, - "grad_norm": 0.0663517266563627, - "learning_rate": 6.707567340433212e-06, - "loss": 0.0006, + "epoch": 1.9856, + "grad_norm": 0.0030729637895142103, + "learning_rate": 5.424717791025302e-06, + "loss": 0.0001, "step": 1241 }, { - "epoch": 1.8592814371257484, - "grad_norm": 0.1209263861163919, - "learning_rate": 6.692304365267859e-06, - "loss": 0.0008, + "epoch": 1.9872, + "grad_norm": 0.010897666599821741, + "learning_rate": 5.4093589049662175e-06, + "loss": 0.0001, "step": 1242 }, { - "epoch": 1.8607784431137726, - "grad_norm": 0.027554525474874584, - "learning_rate": 6.677050037383858e-06, - "loss": 0.0003, + "epoch": 1.9888, + "grad_norm": 0.011043396569230967, + "learning_rate": 5.3940137272582534e-06, + "loss": 0.0001, "step": 1243 }, { - "epoch": 1.8622754491017965, - "grad_norm": 0.04775088764673739, - "learning_rate": 6.661804396660479e-06, - "loss": 0.0005, + "epoch": 1.9904, + "grad_norm": 0.029818851963151396, + "learning_rate": 5.378682303724435e-06, + "loss": 0.0001, "step": 1244 }, { - "epoch": 1.8637724550898205, - "grad_norm": 0.056886026682939766, - "learning_rate": 6.646567482954263e-06, - "loss": 0.0003, + "epoch": 1.992, + "grad_norm": 0.018070519093172074, + "learning_rate": 5.3633646801467255e-06, + "loss": 0.0002, "step": 1245 }, { - "epoch": 1.8652694610778444, - "grad_norm": 0.13424552882664162, - "learning_rate": 6.631339336098956e-06, - "loss": 0.0013, + "epoch": 1.9936, + "grad_norm": 0.1723775065387246, + "learning_rate": 5.348060902265871e-06, + "loss": 0.0008, "step": 1246 }, { - "epoch": 1.8667664670658684, - "grad_norm": 0.12731628473965262, - "learning_rate": 6.616119995905375e-06, - "loss": 0.0012, + "epoch": 1.9952, + "grad_norm": 0.011930808129548384, + "learning_rate": 5.332771015781275e-06, + "loss": 0.0001, "step": 1247 }, { - "epoch": 1.8682634730538923, - "grad_norm": 0.05387273939125406, - "learning_rate": 6.600909502161315e-06, + "epoch": 1.9968, + "grad_norm": 0.021970927811545714, + "learning_rate": 5.31749506635086e-06, "loss": 0.0003, "step": 1248 }, { - "epoch": 1.8697604790419162, - "grad_norm": 0.11655593860565898, - "learning_rate": 6.585707894631441e-06, - "loss": 0.0008, + "epoch": 1.9984, + "grad_norm": 0.159920156495135, + "learning_rate": 5.302233099590928e-06, + "loss": 0.0006, "step": 1249 }, { - "epoch": 1.8712574850299402, - "grad_norm": 0.03130014477728136, - "learning_rate": 6.570515213057192e-06, - "loss": 0.0002, + "epoch": 2.0, + "grad_norm": 0.007002186599926069, + "learning_rate": 5.286985161076029e-06, + "loss": 0.0001, "step": 1250 }, { - "epoch": 1.872754491017964, - "grad_norm": 0.06789407097061075, - "learning_rate": 6.555331497156671e-06, - "loss": 0.0006, + "epoch": 2.0016, + "grad_norm": 0.03948487381609094, + "learning_rate": 5.271751296338823e-06, + "loss": 0.0001, "step": 1251 }, { - "epoch": 1.874251497005988, - "grad_norm": 0.1053736315966829, - "learning_rate": 6.5401567866245405e-06, - "loss": 0.0006, + "epoch": 2.0032, + "grad_norm": 0.012853692816418372, + "learning_rate": 5.2565315508699374e-06, + "loss": 0.0001, "step": 1252 }, { - "epoch": 1.875748502994012, - "grad_norm": 0.042293639897476594, - "learning_rate": 6.5249911211319225e-06, - "loss": 0.0003, + "epoch": 2.0048, + "grad_norm": 0.04693735985976921, + "learning_rate": 5.241325970117851e-06, + "loss": 0.0002, "step": 1253 }, { - "epoch": 1.877245508982036, - "grad_norm": 0.08285464714326594, - "learning_rate": 6.509834540326285e-06, - "loss": 0.0004, + "epoch": 2.0064, + "grad_norm": 0.0010564428981468033, + "learning_rate": 5.226134599488728e-06, + "loss": 0.0, "step": 1254 }, { - "epoch": 1.8787425149700598, - "grad_norm": 0.11049018140607483, - "learning_rate": 6.494687083831362e-06, - "loss": 0.0009, + "epoch": 2.008, + "grad_norm": 0.16120154231483275, + "learning_rate": 5.210957484346314e-06, + "loss": 0.0007, "step": 1255 }, { - "epoch": 1.8802395209580838, - "grad_norm": 0.01829057968863813, - "learning_rate": 6.47954879124701e-06, + "epoch": 2.0096, + "grad_norm": 0.0060265765842624055, + "learning_rate": 5.195794670011775e-06, "loss": 0.0001, "step": 1256 }, { - "epoch": 1.8817365269461077, - "grad_norm": 0.033052338927285374, - "learning_rate": 6.4644197021491515e-06, - "loss": 0.0003, + "epoch": 2.0112, + "grad_norm": 0.0032434068815901823, + "learning_rate": 5.1806462017635775e-06, + "loss": 0.0001, "step": 1257 }, { - "epoch": 1.8832335329341316, - "grad_norm": 0.13174251290530642, - "learning_rate": 6.4492998560896395e-06, - "loss": 0.0013, + "epoch": 2.0128, + "grad_norm": 0.03108346973796724, + "learning_rate": 5.165512124837344e-06, + "loss": 0.0002, "step": 1258 }, { - "epoch": 1.8847305389221556, - "grad_norm": 0.11307650321461288, - "learning_rate": 6.434189292596158e-06, - "loss": 0.0009, + "epoch": 2.0144, + "grad_norm": 0.07334038179465477, + "learning_rate": 5.150392484425728e-06, + "loss": 0.0003, "step": 1259 }, { - "epoch": 1.8862275449101795, - "grad_norm": 0.039672940708096134, - "learning_rate": 6.419088051172133e-06, - "loss": 0.0003, + "epoch": 2.016, + "grad_norm": 0.027860109850781214, + "learning_rate": 5.135287325678271e-06, + "loss": 0.0001, "step": 1260 }, { - "epoch": 1.8877245508982035, - "grad_norm": 0.0947573552544148, - "learning_rate": 6.40399617129661e-06, - "loss": 0.001, + "epoch": 2.0176, + "grad_norm": 0.003642680967855806, + "learning_rate": 5.120196693701267e-06, + "loss": 0.0001, "step": 1261 }, { - "epoch": 1.8892215568862274, - "grad_norm": 0.018111227856452812, - "learning_rate": 6.3889136924241704e-06, - "loss": 0.0002, + "epoch": 2.0192, + "grad_norm": 0.0015039372448527133, + "learning_rate": 5.105120633557634e-06, + "loss": 0.0, "step": 1262 }, { - "epoch": 1.8907185628742516, - "grad_norm": 0.06351616622351396, - "learning_rate": 6.373840653984811e-06, - "loss": 0.0005, + "epoch": 2.0208, + "grad_norm": 0.001065057755579274, + "learning_rate": 5.090059190266779e-06, + "loss": 0.0, "step": 1263 }, { - "epoch": 1.8922155688622755, - "grad_norm": 0.08247922342515682, - "learning_rate": 6.35877709538385e-06, - "loss": 0.0006, + "epoch": 2.0224, + "grad_norm": 0.0018136533690353725, + "learning_rate": 5.075012408804458e-06, + "loss": 0.0, "step": 1264 }, { - "epoch": 1.8937125748502994, - "grad_norm": 0.1613355922056432, - "learning_rate": 6.3437230560018335e-06, - "loss": 0.0011, + "epoch": 2.024, + "grad_norm": 0.05887000158742606, + "learning_rate": 5.059980334102637e-06, + "loss": 0.0002, "step": 1265 }, { - "epoch": 1.8952095808383234, - "grad_norm": 0.17460284441167057, - "learning_rate": 6.3286785751944e-06, - "loss": 0.0033, + "epoch": 2.0256, + "grad_norm": 0.0038137736873319044, + "learning_rate": 5.044963011049384e-06, + "loss": 0.0001, "step": 1266 }, { - "epoch": 1.8967065868263473, - "grad_norm": 0.06534489203227545, - "learning_rate": 6.313643692292219e-06, - "loss": 0.0005, + "epoch": 2.0272, + "grad_norm": 0.0019694602635275628, + "learning_rate": 5.0299604844886985e-06, + "loss": 0.0001, "step": 1267 }, { - "epoch": 1.8982035928143712, - "grad_norm": 0.08598292764832548, - "learning_rate": 6.298618446600856e-06, - "loss": 0.0009, + "epoch": 2.0288, + "grad_norm": 0.004869063315621386, + "learning_rate": 5.0149727992204034e-06, + "loss": 0.0001, "step": 1268 }, { - "epoch": 1.8997005988023952, - "grad_norm": 0.07498272399084252, - "learning_rate": 6.2836028774006945e-06, + "epoch": 2.0304, + "grad_norm": 0.14391547248553516, + "learning_rate": 5.000000000000003e-06, "loss": 0.0005, "step": 1269 }, { - "epoch": 1.9011976047904193, - "grad_norm": 0.07854073646784032, - "learning_rate": 6.268597023946801e-06, - "loss": 0.001, + "epoch": 2.032, + "grad_norm": 0.18742481535002514, + "learning_rate": 4.985042131538545e-06, + "loss": 0.0009, "step": 1270 }, { - "epoch": 1.9026946107784433, - "grad_norm": 0.08325592956362902, - "learning_rate": 6.253600925468861e-06, - "loss": 0.0009, + "epoch": 2.0336, + "grad_norm": 0.005019826620883686, + "learning_rate": 4.970099238502494e-06, + "loss": 0.0001, "step": 1271 }, { - "epoch": 1.9041916167664672, - "grad_norm": 0.06261744481965902, - "learning_rate": 6.238614621171055e-06, - "loss": 0.0004, + "epoch": 2.0352, + "grad_norm": 0.08200275784421962, + "learning_rate": 4.955171365513603e-06, + "loss": 0.0002, "step": 1272 }, { - "epoch": 1.9056886227544911, - "grad_norm": 0.15054085004158702, - "learning_rate": 6.223638150231945e-06, - "loss": 0.0008, + "epoch": 2.0368, + "grad_norm": 0.005278096656013719, + "learning_rate": 4.940258557148765e-06, + "loss": 0.0001, "step": 1273 }, { - "epoch": 1.907185628742515, - "grad_norm": 0.15292361187679987, - "learning_rate": 6.208671551804404e-06, - "loss": 0.0009, + "epoch": 2.0384, + "grad_norm": 0.0065573406554249114, + "learning_rate": 4.925360857939886e-06, + "loss": 0.0001, "step": 1274 }, { - "epoch": 1.908682634730539, - "grad_norm": 0.06302399358176151, - "learning_rate": 6.193714865015479e-06, - "loss": 0.0007, + "epoch": 2.04, + "grad_norm": 0.021604013246013607, + "learning_rate": 4.910478312373757e-06, + "loss": 0.0001, "step": 1275 }, { - "epoch": 1.910179640718563, - "grad_norm": 0.08082819893731763, - "learning_rate": 6.178768128966319e-06, - "loss": 0.0011, + "epoch": 2.0416, + "grad_norm": 0.002484955128533375, + "learning_rate": 4.895610964891923e-06, + "loss": 0.0, "step": 1276 }, { - "epoch": 1.9116766467065869, - "grad_norm": 0.10017565699972938, - "learning_rate": 6.163831382732046e-06, - "loss": 0.0005, + "epoch": 2.0432, + "grad_norm": 0.0027986394634020926, + "learning_rate": 4.8807588598905364e-06, + "loss": 0.0001, "step": 1277 }, { - "epoch": 1.9131736526946108, - "grad_norm": 0.1603341983015068, - "learning_rate": 6.148904665361677e-06, - "loss": 0.0008, + "epoch": 2.0448, + "grad_norm": 0.0027430637359838494, + "learning_rate": 4.865922041720239e-06, + "loss": 0.0, "step": 1278 }, { - "epoch": 1.9146706586826348, - "grad_norm": 0.07977074549410447, - "learning_rate": 6.133988015878005e-06, - "loss": 0.0003, + "epoch": 2.0464, + "grad_norm": 0.002737074032174217, + "learning_rate": 4.8511005546860214e-06, + "loss": 0.0, "step": 1279 }, { - "epoch": 1.9161676646706587, - "grad_norm": 0.08604266347649189, - "learning_rate": 6.119081473277502e-06, - "loss": 0.0015, + "epoch": 2.048, + "grad_norm": 0.008016519753927618, + "learning_rate": 4.836294443047088e-06, + "loss": 0.0001, "step": 1280 }, { - "epoch": 1.9176646706586826, - "grad_norm": 0.07316072392265781, - "learning_rate": 6.104185076530224e-06, - "loss": 0.0019, + "epoch": 2.0496, + "grad_norm": 0.12372213692952685, + "learning_rate": 4.821503751016746e-06, + "loss": 0.0007, "step": 1281 }, { - "epoch": 1.9191616766467066, - "grad_norm": 0.07219162163636049, - "learning_rate": 6.0892988645796894e-06, - "loss": 0.0011, + "epoch": 2.0512, + "grad_norm": 0.0013784424610091336, + "learning_rate": 4.806728522762241e-06, + "loss": 0.0, "step": 1282 }, { - "epoch": 1.9206586826347305, - "grad_norm": 0.03498121695768265, - "learning_rate": 6.074422876342808e-06, - "loss": 0.0002, + "epoch": 2.0528, + "grad_norm": 0.11916867318013918, + "learning_rate": 4.791968802404648e-06, + "loss": 0.0004, "step": 1283 }, { - "epoch": 1.9221556886227544, - "grad_norm": 0.18083663339991857, - "learning_rate": 6.059557150709745e-06, - "loss": 0.0009, + "epoch": 2.0544, + "grad_norm": 0.011827412648333358, + "learning_rate": 4.777224634018732e-06, + "loss": 0.0002, "step": 1284 }, { - "epoch": 1.9236526946107784, - "grad_norm": 0.059187615256778826, - "learning_rate": 6.0447017265438464e-06, - "loss": 0.0004, + "epoch": 2.056, + "grad_norm": 0.010541327479658821, + "learning_rate": 4.762496061632814e-06, + "loss": 0.0001, "step": 1285 }, { - "epoch": 1.9251497005988023, - "grad_norm": 0.12966709587781838, - "learning_rate": 6.029856642681528e-06, - "loss": 0.001, + "epoch": 2.0576, + "grad_norm": 0.0059639958088174546, + "learning_rate": 4.7477831292286555e-06, + "loss": 0.0001, "step": 1286 }, { - "epoch": 1.9266467065868262, - "grad_norm": 0.03319551814066764, - "learning_rate": 6.015021937932166e-06, - "loss": 0.0003, + "epoch": 2.0592, + "grad_norm": 0.07898598055159177, + "learning_rate": 4.733085880741301e-06, + "loss": 0.0006, "step": 1287 }, { - "epoch": 1.9281437125748502, - "grad_norm": 0.08693549001284917, - "learning_rate": 6.000197651078012e-06, - "loss": 0.0002, + "epoch": 2.0608, + "grad_norm": 0.010649954432394063, + "learning_rate": 4.7184043600589655e-06, + "loss": 0.0001, "step": 1288 }, { - "epoch": 1.9296407185628741, - "grad_norm": 0.11435904368412542, - "learning_rate": 5.985383820874069e-06, - "loss": 0.0008, + "epoch": 2.0624, + "grad_norm": 0.01850359909026641, + "learning_rate": 4.703738611022899e-06, + "loss": 0.0001, "step": 1289 }, { - "epoch": 1.931137724550898, - "grad_norm": 0.03605212305635656, - "learning_rate": 5.970580486048016e-06, - "loss": 0.0002, + "epoch": 2.064, + "grad_norm": 0.013855878203025425, + "learning_rate": 4.689088677427249e-06, + "loss": 0.0001, "step": 1290 }, { - "epoch": 1.9326347305389222, - "grad_norm": 0.07980050042842693, - "learning_rate": 5.955787685300085e-06, - "loss": 0.0005, + "epoch": 2.0656, + "grad_norm": 0.0029063101645990727, + "learning_rate": 4.674454603018949e-06, + "loss": 0.0001, "step": 1291 }, { - "epoch": 1.9341317365269461, - "grad_norm": 0.11431738205279493, - "learning_rate": 5.941005457302975e-06, - "loss": 0.0007, + "epoch": 2.0672, + "grad_norm": 0.002771446021717218, + "learning_rate": 4.659836431497563e-06, + "loss": 0.0, "step": 1292 }, { - "epoch": 1.93562874251497, - "grad_norm": 0.2420720686661468, - "learning_rate": 5.926233840701747e-06, - "loss": 0.0018, + "epoch": 2.0688, + "grad_norm": 0.007558521815404343, + "learning_rate": 4.645234206515171e-06, + "loss": 0.0001, "step": 1293 }, { - "epoch": 1.937125748502994, - "grad_norm": 0.17499882775445486, - "learning_rate": 5.911472874113709e-06, - "loss": 0.0015, + "epoch": 2.0704, + "grad_norm": 0.0020190105256480957, + "learning_rate": 4.630647971676232e-06, + "loss": 0.0, "step": 1294 }, { - "epoch": 1.938622754491018, - "grad_norm": 0.08874284785143925, - "learning_rate": 5.896722596128337e-06, - "loss": 0.0004, + "epoch": 2.072, + "grad_norm": 0.011165801591369819, + "learning_rate": 4.616077770537453e-06, + "loss": 0.0001, "step": 1295 }, { - "epoch": 1.9401197604790419, - "grad_norm": 0.0714199007843593, - "learning_rate": 5.881983045307158e-06, - "loss": 0.0003, + "epoch": 2.0736, + "grad_norm": 0.0022416122666900945, + "learning_rate": 4.601523646607675e-06, + "loss": 0.0001, "step": 1296 }, { - "epoch": 1.9416167664670658, - "grad_norm": 0.011510865246892025, - "learning_rate": 5.867254260183665e-06, + "epoch": 2.0752, + "grad_norm": 0.007794185704477617, + "learning_rate": 4.586985643347716e-06, "loss": 0.0001, "step": 1297 }, { - "epoch": 1.94311377245509, - "grad_norm": 0.10193764772516307, - "learning_rate": 5.85253627926319e-06, - "loss": 0.0006, + "epoch": 2.0768, + "grad_norm": 0.0015124048307453561, + "learning_rate": 4.572463804170263e-06, + "loss": 0.0, "step": 1298 }, { - "epoch": 1.944610778443114, - "grad_norm": 0.15050408491671105, - "learning_rate": 5.837829141022831e-06, - "loss": 0.0007, + "epoch": 2.0784, + "grad_norm": 0.0030354832046186786, + "learning_rate": 4.557958172439726e-06, + "loss": 0.0001, "step": 1299 }, { - "epoch": 1.9461077844311379, - "grad_norm": 0.10898094115408612, - "learning_rate": 5.823132883911349e-06, - "loss": 0.0009, + "epoch": 2.08, + "grad_norm": 0.0020373494626846336, + "learning_rate": 4.543468791472131e-06, + "loss": 0.0, "step": 1300 }, { - "epoch": 1.9476047904191618, - "grad_norm": 0.13535481273196698, - "learning_rate": 5.80844754634903e-06, - "loss": 0.001, + "epoch": 2.0816, + "grad_norm": 0.001046926322424395, + "learning_rate": 4.5289957045349655e-06, + "loss": 0.0, "step": 1301 }, { - "epoch": 1.9491017964071857, - "grad_norm": 0.10884632784883908, - "learning_rate": 5.793773166727648e-06, - "loss": 0.0012, + "epoch": 2.0832, + "grad_norm": 0.0007689101693175983, + "learning_rate": 4.5145389548470645e-06, + "loss": 0.0, "step": 1302 }, { - "epoch": 1.9505988023952097, - "grad_norm": 0.06725195023294889, - "learning_rate": 5.779109783410295e-06, - "loss": 0.0004, + "epoch": 2.0848, + "grad_norm": 0.004590852983945126, + "learning_rate": 4.500098585578475e-06, + "loss": 0.0001, "step": 1303 }, { - "epoch": 1.9520958083832336, - "grad_norm": 0.03344699498459077, - "learning_rate": 5.764457434731345e-06, - "loss": 0.0003, + "epoch": 2.0864, + "grad_norm": 0.016623279739457517, + "learning_rate": 4.485674639850334e-06, + "loss": 0.0001, "step": 1304 }, { - "epoch": 1.9535928143712575, - "grad_norm": 0.07364922411512191, - "learning_rate": 5.749816158996309e-06, - "loss": 0.0012, + "epoch": 2.088, + "grad_norm": 0.12871540044523444, + "learning_rate": 4.471267160734731e-06, + "loss": 0.0006, "step": 1305 }, { - "epoch": 1.9550898203592815, - "grad_norm": 0.1706215633793, - "learning_rate": 5.735185994481748e-06, - "loss": 0.0015, + "epoch": 2.0896, + "grad_norm": 0.004518980147346147, + "learning_rate": 4.456876191254582e-06, + "loss": 0.0001, "step": 1306 }, { - "epoch": 1.9565868263473054, - "grad_norm": 0.16089826885989925, - "learning_rate": 5.720566979435193e-06, - "loss": 0.0014, + "epoch": 2.0912, + "grad_norm": 0.0023069709864382807, + "learning_rate": 4.4425017743835155e-06, + "loss": 0.0, "step": 1307 }, { - "epoch": 1.9580838323353293, - "grad_norm": 0.13197070265133728, - "learning_rate": 5.705959152074998e-06, - "loss": 0.0011, + "epoch": 2.0928, + "grad_norm": 0.011612607081832236, + "learning_rate": 4.4281439530457174e-06, + "loss": 0.0001, "step": 1308 }, { - "epoch": 1.9595808383233533, - "grad_norm": 0.10737878089775195, - "learning_rate": 5.6913625505902966e-06, - "loss": 0.0018, + "epoch": 2.0944, + "grad_norm": 0.004032416921738347, + "learning_rate": 4.413802770115816e-06, + "loss": 0.0001, "step": 1309 }, { - "epoch": 1.9610778443113772, - "grad_norm": 0.09551442058751701, - "learning_rate": 5.6767772131408606e-06, - "loss": 0.0006, + "epoch": 2.096, + "grad_norm": 0.020078465032642864, + "learning_rate": 4.399478268418771e-06, + "loss": 0.0001, "step": 1310 }, { - "epoch": 1.9625748502994012, - "grad_norm": 0.1495760754554237, - "learning_rate": 5.662203177857015e-06, - "loss": 0.0018, + "epoch": 2.0976, + "grad_norm": 0.009372946495378932, + "learning_rate": 4.385170490729712e-06, + "loss": 0.0001, "step": 1311 }, { - "epoch": 1.964071856287425, - "grad_norm": 0.030762250838208226, - "learning_rate": 5.647640482839543e-06, - "loss": 0.0003, + "epoch": 2.0992, + "grad_norm": 0.004075995720607409, + "learning_rate": 4.370879479773837e-06, + "loss": 0.0001, "step": 1312 }, { - "epoch": 1.965568862275449, - "grad_norm": 0.0627412308766797, - "learning_rate": 5.633089166159571e-06, - "loss": 0.0014, + "epoch": 2.1008, + "grad_norm": 0.005641261867447899, + "learning_rate": 4.356605278226274e-06, + "loss": 0.0001, "step": 1313 }, { - "epoch": 1.967065868263473, - "grad_norm": 0.08508365773836313, - "learning_rate": 5.618549265858499e-06, - "loss": 0.0008, + "epoch": 2.1024, + "grad_norm": 0.019357229759758513, + "learning_rate": 4.342347928711953e-06, + "loss": 0.0001, "step": 1314 }, { - "epoch": 1.968562874251497, - "grad_norm": 0.11170302861015123, - "learning_rate": 5.604020819947853e-06, - "loss": 0.0005, + "epoch": 2.104, + "grad_norm": 0.01434237461770615, + "learning_rate": 4.328107473805487e-06, + "loss": 0.0001, "step": 1315 }, { - "epoch": 1.9700598802395208, - "grad_norm": 0.13346911790654048, - "learning_rate": 5.589503866409238e-06, - "loss": 0.0017, + "epoch": 2.1056, + "grad_norm": 0.013157265563646457, + "learning_rate": 4.313883956031031e-06, + "loss": 0.0001, "step": 1316 }, { - "epoch": 1.9715568862275448, - "grad_norm": 0.048207609953502435, - "learning_rate": 5.574998443194206e-06, - "loss": 0.0003, + "epoch": 2.1072, + "grad_norm": 0.0026463734058776384, + "learning_rate": 4.299677417862174e-06, + "loss": 0.0, "step": 1317 }, { - "epoch": 1.9730538922155687, - "grad_norm": 0.1790415818027207, - "learning_rate": 5.560504588224162e-06, - "loss": 0.0022, + "epoch": 2.1088, + "grad_norm": 0.009144315666465027, + "learning_rate": 4.28548790172179e-06, + "loss": 0.0001, "step": 1318 }, { - "epoch": 1.9745508982035929, - "grad_norm": 0.053657875706085344, - "learning_rate": 5.546022339390271e-06, - "loss": 0.0007, + "epoch": 2.1104, + "grad_norm": 0.050698061673316835, + "learning_rate": 4.2713154499819345e-06, + "loss": 0.0003, "step": 1319 }, { - "epoch": 1.9760479041916168, - "grad_norm": 0.08409549221939727, - "learning_rate": 5.531551734553354e-06, - "loss": 0.0012, + "epoch": 2.112, + "grad_norm": 0.005293028607209848, + "learning_rate": 4.257160104963695e-06, + "loss": 0.0001, "step": 1320 }, { - "epoch": 1.9775449101796407, - "grad_norm": 0.10964353914589475, - "learning_rate": 5.517092811543801e-06, - "loss": 0.0007, + "epoch": 2.1136, + "grad_norm": 0.006772389868685981, + "learning_rate": 4.243021908937083e-06, + "loss": 0.0001, "step": 1321 }, { - "epoch": 1.9790419161676647, - "grad_norm": 0.15383349246769662, - "learning_rate": 5.502645608161451e-06, - "loss": 0.0008, + "epoch": 2.1152, + "grad_norm": 0.0022400613349784475, + "learning_rate": 4.228900904120895e-06, + "loss": 0.0001, "step": 1322 }, { - "epoch": 1.9805389221556886, - "grad_norm": 0.09629278498120927, - "learning_rate": 5.488210162175508e-06, - "loss": 0.0005, + "epoch": 2.1168, + "grad_norm": 0.004894576564314422, + "learning_rate": 4.214797132682597e-06, + "loss": 0.0001, "step": 1323 }, { - "epoch": 1.9820359281437125, - "grad_norm": 0.08906179596805544, - "learning_rate": 5.473786511324439e-06, - "loss": 0.0006, + "epoch": 2.1184, + "grad_norm": 0.0011541246411294337, + "learning_rate": 4.200710636738189e-06, + "loss": 0.0, "step": 1324 }, { - "epoch": 1.9835329341317365, - "grad_norm": 0.06933054825478269, - "learning_rate": 5.459374693315876e-06, - "loss": 0.0003, + "epoch": 2.12, + "grad_norm": 0.024462842828927446, + "learning_rate": 4.186641458352088e-06, + "loss": 0.0002, "step": 1325 }, { - "epoch": 1.9850299401197606, - "grad_norm": 0.035068650708494103, - "learning_rate": 5.44497474582651e-06, - "loss": 0.0002, + "epoch": 2.1216, + "grad_norm": 0.008815829602056808, + "learning_rate": 4.172589639536992e-06, + "loss": 0.0001, "step": 1326 }, { - "epoch": 1.9865269461077846, - "grad_norm": 0.09362975516235468, - "learning_rate": 5.430586706502014e-06, - "loss": 0.001, + "epoch": 2.1232, + "grad_norm": 0.0043029685313057, + "learning_rate": 4.158555222253772e-06, + "loss": 0.0001, "step": 1327 }, { - "epoch": 1.9880239520958085, - "grad_norm": 0.06835759304910806, - "learning_rate": 5.416210612956918e-06, - "loss": 0.0009, + "epoch": 2.1248, + "grad_norm": 0.00504839010368318, + "learning_rate": 4.144538248411321e-06, + "loss": 0.0001, "step": 1328 }, { - "epoch": 1.9895209580838324, - "grad_norm": 0.019786881306191252, - "learning_rate": 5.401846502774522e-06, + "epoch": 2.1264, + "grad_norm": 0.012591004904414362, + "learning_rate": 4.130538759866457e-06, "loss": 0.0001, "step": 1329 }, { - "epoch": 1.9910179640718564, - "grad_norm": 0.1299238026590814, - "learning_rate": 5.3874944135068e-06, - "loss": 0.0015, + "epoch": 2.128, + "grad_norm": 0.01673392343030602, + "learning_rate": 4.116556798423776e-06, + "loss": 0.0001, "step": 1330 }, { - "epoch": 1.9925149700598803, - "grad_norm": 0.14300823664119922, - "learning_rate": 5.373154382674301e-06, - "loss": 0.0009, + "epoch": 2.1296, + "grad_norm": 0.05585050121872357, + "learning_rate": 4.102592405835536e-06, + "loss": 0.0002, "step": 1331 }, { - "epoch": 1.9940119760479043, - "grad_norm": 0.09070831138488968, - "learning_rate": 5.3588264477660525e-06, - "loss": 0.0007, + "epoch": 2.1312, + "grad_norm": 0.02670201452769496, + "learning_rate": 4.088645623801534e-06, + "loss": 0.0002, "step": 1332 }, { - "epoch": 1.9955089820359282, - "grad_norm": 0.3279851722644398, - "learning_rate": 5.344510646239447e-06, - "loss": 0.0007, + "epoch": 2.1328, + "grad_norm": 0.0018710927812524058, + "learning_rate": 4.074716493968976e-06, + "loss": 0.0, "step": 1333 }, { - "epoch": 1.9970059880239521, - "grad_norm": 0.13371612237597238, - "learning_rate": 5.330207015520176e-06, - "loss": 0.0005, + "epoch": 2.1344, + "grad_norm": 0.0017912785338384182, + "learning_rate": 4.060805057932359e-06, + "loss": 0.0, "step": 1334 }, { - "epoch": 1.998502994011976, - "grad_norm": 0.05563881939066664, - "learning_rate": 5.3159155930021e-06, - "loss": 0.0004, + "epoch": 2.136, + "grad_norm": 0.00206502439748784, + "learning_rate": 4.046911357233343e-06, + "loss": 0.0, "step": 1335 }, { - "epoch": 2.0, - "grad_norm": 0.05524915070416573, - "learning_rate": 5.301636416047166e-06, - "loss": 0.0003, + "epoch": 2.1376, + "grad_norm": 0.004367991071358672, + "learning_rate": 4.033035433360624e-06, + "loss": 0.0, "step": 1336 }, { - "epoch": 2.001497005988024, - "grad_norm": 0.078998616135492, - "learning_rate": 5.287369521985308e-06, - "loss": 0.0005, + "epoch": 2.1391999999999998, + "grad_norm": 0.10915011758089804, + "learning_rate": 4.019177327749822e-06, + "loss": 0.0003, "step": 1337 }, { - "epoch": 2.002994011976048, - "grad_norm": 0.11087032843271219, - "learning_rate": 5.273114948114346e-06, - "loss": 0.0006, + "epoch": 2.1408, + "grad_norm": 0.011046214540442804, + "learning_rate": 4.00533708178334e-06, + "loss": 0.0001, "step": 1338 }, { - "epoch": 2.004491017964072, - "grad_norm": 0.01134812052606357, - "learning_rate": 5.258872731699907e-06, - "loss": 0.0002, + "epoch": 2.1424, + "grad_norm": 0.0008909985680504715, + "learning_rate": 3.991514736790259e-06, + "loss": 0.0, "step": 1339 }, { - "epoch": 2.0059880239520957, - "grad_norm": 0.053992660508964285, - "learning_rate": 5.244642909975282e-06, - "loss": 0.0005, + "epoch": 2.144, + "grad_norm": 0.0064867553974109725, + "learning_rate": 3.977710334046193e-06, + "loss": 0.0001, "step": 1340 }, { - "epoch": 2.0074850299401197, - "grad_norm": 0.06896338730150291, - "learning_rate": 5.23042552014139e-06, - "loss": 0.0003, + "epoch": 2.1456, + "grad_norm": 0.03821471376278644, + "learning_rate": 3.9639239147731865e-06, + "loss": 0.0002, "step": 1341 }, { - "epoch": 2.0089820359281436, - "grad_norm": 0.03331639781525915, - "learning_rate": 5.216220599366631e-06, - "loss": 0.0003, + "epoch": 2.1471999999999998, + "grad_norm": 0.10116537076929259, + "learning_rate": 3.950155520139581e-06, + "loss": 0.0002, "step": 1342 }, { - "epoch": 2.0104790419161676, - "grad_norm": 0.03048656266020426, - "learning_rate": 5.202028184786807e-06, - "loss": 0.0003, + "epoch": 2.1488, + "grad_norm": 0.0013363768944560507, + "learning_rate": 3.936405191259891e-06, + "loss": 0.0, "step": 1343 }, { - "epoch": 2.0119760479041915, - "grad_norm": 0.10036632313494923, - "learning_rate": 5.187848313505042e-06, - "loss": 0.0008, + "epoch": 2.1504, + "grad_norm": 0.008247262958237406, + "learning_rate": 3.9226729691946865e-06, + "loss": 0.0001, "step": 1344 }, { - "epoch": 2.0134730538922154, - "grad_norm": 0.04333086369466123, - "learning_rate": 5.173681022591643e-06, - "loss": 0.0004, + "epoch": 2.152, + "grad_norm": 0.07692749652969899, + "learning_rate": 3.908958894950465e-06, + "loss": 0.0022, "step": 1345 }, { - "epoch": 2.0149700598802394, - "grad_norm": 0.0999560008915293, - "learning_rate": 5.159526349084051e-06, - "loss": 0.0009, + "epoch": 2.1536, + "grad_norm": 0.0026683879579601478, + "learning_rate": 3.895263009479534e-06, + "loss": 0.0, "step": 1346 }, { - "epoch": 2.0164670658682633, - "grad_norm": 0.09927472161629344, - "learning_rate": 5.145384329986709e-06, - "loss": 0.0006, + "epoch": 2.1552, + "grad_norm": 0.0386498226958223, + "learning_rate": 3.881585353679891e-06, + "loss": 0.0002, "step": 1347 }, { - "epoch": 2.0179640718562872, - "grad_norm": 0.046997627545573885, - "learning_rate": 5.13125500227098e-06, - "loss": 0.0003, + "epoch": 2.1568, + "grad_norm": 0.00107301484913968, + "learning_rate": 3.867925968395085e-06, + "loss": 0.0, "step": 1348 }, { - "epoch": 2.019461077844311, - "grad_norm": 0.11233978950395591, - "learning_rate": 5.117138402875048e-06, - "loss": 0.0011, + "epoch": 2.1584, + "grad_norm": 0.0009612208926005227, + "learning_rate": 3.854284894414122e-06, + "loss": 0.0, "step": 1349 }, { - "epoch": 2.020958083832335, - "grad_norm": 0.04511729239488089, - "learning_rate": 5.103034568703819e-06, - "loss": 0.0002, + "epoch": 2.16, + "grad_norm": 0.08101908726754564, + "learning_rate": 3.840662172471315e-06, + "loss": 0.0003, "step": 1350 }, { - "epoch": 2.0224550898203595, - "grad_norm": 0.08572458482338798, - "learning_rate": 5.088943536628843e-06, - "loss": 0.0006, + "epoch": 2.1616, + "grad_norm": 0.007755857170339103, + "learning_rate": 3.827057843246181e-06, + "loss": 0.0001, "step": 1351 }, { - "epoch": 2.0239520958083834, - "grad_norm": 0.07982885735619828, - "learning_rate": 5.0748653434881735e-06, - "loss": 0.0009, + "epoch": 2.1632, + "grad_norm": 0.003052486130222796, + "learning_rate": 3.8134719473633098e-06, + "loss": 0.0001, "step": 1352 }, { - "epoch": 2.0254491017964074, - "grad_norm": 0.05293702214000303, - "learning_rate": 5.060800026086322e-06, + "epoch": 2.1648, + "grad_norm": 0.0633405164190523, + "learning_rate": 3.799904525392251e-06, "loss": 0.0004, "step": 1353 }, { - "epoch": 2.0269461077844313, - "grad_norm": 0.10137109013871722, - "learning_rate": 5.046747621194132e-06, - "loss": 0.0004, + "epoch": 2.1664, + "grad_norm": 0.026255366446261995, + "learning_rate": 3.786355617847385e-06, + "loss": 0.0001, "step": 1354 }, { - "epoch": 2.0284431137724552, - "grad_norm": 0.17093193553427508, - "learning_rate": 5.032708165548682e-06, - "loss": 0.0007, + "epoch": 2.168, + "grad_norm": 0.02745119083760212, + "learning_rate": 3.7728252651878018e-06, + "loss": 0.0001, "step": 1355 }, { - "epoch": 2.029940119760479, - "grad_norm": 0.05939541972194821, - "learning_rate": 5.018681695853219e-06, - "loss": 0.0003, + "epoch": 2.1696, + "grad_norm": 0.010410191546191274, + "learning_rate": 3.759313507817196e-06, + "loss": 0.0001, "step": 1356 }, { - "epoch": 2.031437125748503, - "grad_norm": 0.05500060420421118, - "learning_rate": 5.00466824877701e-06, - "loss": 0.0003, + "epoch": 2.1712, + "grad_norm": 0.01186546330776542, + "learning_rate": 3.745820386083724e-06, + "loss": 0.0001, "step": 1357 }, { - "epoch": 2.032934131736527, - "grad_norm": 0.04529500584356061, - "learning_rate": 4.99066786095531e-06, - "loss": 0.0004, + "epoch": 2.1728, + "grad_norm": 0.004319934470598465, + "learning_rate": 3.7323459402798936e-06, + "loss": 0.0001, "step": 1358 }, { - "epoch": 2.034431137724551, - "grad_norm": 0.1107914946140763, - "learning_rate": 4.976680568989203e-06, - "loss": 0.0008, + "epoch": 2.1744, + "grad_norm": 0.002464210021865245, + "learning_rate": 3.718890210642442e-06, + "loss": 0.0, "step": 1359 }, { - "epoch": 2.035928143712575, - "grad_norm": 0.13392674083255784, - "learning_rate": 4.9627064094455594e-06, - "loss": 0.0009, + "epoch": 2.176, + "grad_norm": 0.005163126523659202, + "learning_rate": 3.705453237352227e-06, + "loss": 0.0001, "step": 1360 }, { - "epoch": 2.037425149700599, - "grad_norm": 0.10073833115439051, - "learning_rate": 4.948745418856908e-06, - "loss": 0.0009, + "epoch": 2.1776, + "grad_norm": 0.0055654021392102775, + "learning_rate": 3.6920350605340883e-06, + "loss": 0.0001, "step": 1361 }, { - "epoch": 2.038922155688623, - "grad_norm": 0.06806623307714545, - "learning_rate": 4.934797633721343e-06, - "loss": 0.0003, + "epoch": 2.1792, + "grad_norm": 0.005168560794998334, + "learning_rate": 3.6786357202567367e-06, + "loss": 0.0001, "step": 1362 }, { - "epoch": 2.0404191616766467, - "grad_norm": 0.08373463162355546, - "learning_rate": 4.92086309050246e-06, - "loss": 0.001, + "epoch": 2.1808, + "grad_norm": 0.0028929438154425002, + "learning_rate": 3.6652552565326382e-06, + "loss": 0.0, "step": 1363 }, { - "epoch": 2.0419161676646707, - "grad_norm": 0.06828569633730956, - "learning_rate": 4.906941825629203e-06, - "loss": 0.0005, + "epoch": 2.1824, + "grad_norm": 0.016388761955282143, + "learning_rate": 3.6518937093178873e-06, + "loss": 0.0001, "step": 1364 }, { - "epoch": 2.0434131736526946, - "grad_norm": 0.030176625061175792, - "learning_rate": 4.893033875495831e-06, - "loss": 0.0002, + "epoch": 2.184, + "grad_norm": 0.0022084420735664547, + "learning_rate": 3.638551118512089e-06, + "loss": 0.0, "step": 1365 }, { - "epoch": 2.0449101796407185, - "grad_norm": 0.13266862164504664, - "learning_rate": 4.879139276461779e-06, - "loss": 0.0009, + "epoch": 2.1856, + "grad_norm": 0.005667837762482932, + "learning_rate": 3.6252275239582522e-06, + "loss": 0.0001, "step": 1366 }, { - "epoch": 2.0464071856287425, - "grad_norm": 0.039122073400303665, - "learning_rate": 4.865258064851579e-06, - "loss": 0.0003, + "epoch": 2.1872, + "grad_norm": 0.0037123820490449293, + "learning_rate": 3.611922965442648e-06, + "loss": 0.0001, "step": 1367 }, { - "epoch": 2.0479041916167664, - "grad_norm": 0.08056457623508984, - "learning_rate": 4.851390276954779e-06, - "loss": 0.001, + "epoch": 2.1888, + "grad_norm": 0.01350224261078892, + "learning_rate": 3.5986374826947067e-06, + "loss": 0.0001, "step": 1368 }, { - "epoch": 2.0494011976047903, - "grad_norm": 0.12580066367234927, - "learning_rate": 4.837535949025807e-06, - "loss": 0.0007, + "epoch": 2.1904, + "grad_norm": 0.0035749356635064347, + "learning_rate": 3.5853711153868962e-06, + "loss": 0.0001, "step": 1369 }, { - "epoch": 2.0508982035928143, - "grad_norm": 0.1074019479468816, - "learning_rate": 4.823695117283929e-06, - "loss": 0.0007, + "epoch": 2.192, + "grad_norm": 0.0021972056937981984, + "learning_rate": 3.5721239031346067e-06, + "loss": 0.0, "step": 1370 }, { - "epoch": 2.052395209580838, - "grad_norm": 0.14807294780861768, - "learning_rate": 4.809867817913114e-06, - "loss": 0.0012, + "epoch": 2.1936, + "grad_norm": 0.0013664134799337483, + "learning_rate": 3.558895885496023e-06, + "loss": 0.0, "step": 1371 }, { - "epoch": 2.053892215568862, - "grad_norm": 0.04323366086908764, - "learning_rate": 4.796054087061955e-06, - "loss": 0.0003, + "epoch": 2.1952, + "grad_norm": 0.038637633743094854, + "learning_rate": 3.545687101972013e-06, + "loss": 0.0002, "step": 1372 }, { - "epoch": 2.055389221556886, - "grad_norm": 0.11724138562817155, - "learning_rate": 4.782253960843577e-06, - "loss": 0.0007, + "epoch": 2.1968, + "grad_norm": 0.019984493497667098, + "learning_rate": 3.53249759200601e-06, + "loss": 0.0002, "step": 1373 }, { - "epoch": 2.05688622754491, - "grad_norm": 0.0943855227419094, - "learning_rate": 4.768467475335531e-06, - "loss": 0.0007, + "epoch": 2.1984, + "grad_norm": 0.004848472861931771, + "learning_rate": 3.519327394983888e-06, + "loss": 0.0001, "step": 1374 }, { - "epoch": 2.058383233532934, - "grad_norm": 0.03896694150333076, - "learning_rate": 4.7546946665797215e-06, + "epoch": 2.2, + "grad_norm": 0.06446766626275639, + "learning_rate": 3.506176550233863e-06, "loss": 0.0003, "step": 1375 }, { - "epoch": 2.059880239520958, - "grad_norm": 0.058883121619071395, - "learning_rate": 4.740935570582276e-06, - "loss": 0.0004, + "epoch": 2.2016, + "grad_norm": 0.005012667702005252, + "learning_rate": 3.4930450970263485e-06, + "loss": 0.0001, "step": 1376 }, { - "epoch": 2.061377245508982, - "grad_norm": 0.11793570437876384, - "learning_rate": 4.7271902233134946e-06, - "loss": 0.001, + "epoch": 2.2032, + "grad_norm": 0.002424923752008124, + "learning_rate": 3.479933074573858e-06, + "loss": 0.0, "step": 1377 }, { - "epoch": 2.062874251497006, - "grad_norm": 0.15311600066554015, - "learning_rate": 4.713458660707723e-06, - "loss": 0.0006, + "epoch": 2.2048, + "grad_norm": 0.025725036875343558, + "learning_rate": 3.4668405220308797e-06, + "loss": 0.0001, "step": 1378 }, { - "epoch": 2.06437125748503, - "grad_norm": 0.04019764610619817, - "learning_rate": 4.699740918663271e-06, - "loss": 0.0002, + "epoch": 2.2064, + "grad_norm": 0.0015935682070169054, + "learning_rate": 3.453767478493761e-06, + "loss": 0.0, "step": 1379 }, { - "epoch": 2.065868263473054, - "grad_norm": 0.1371190868854585, - "learning_rate": 4.686037033042319e-06, - "loss": 0.0009, + "epoch": 2.208, + "grad_norm": 0.012371608443721404, + "learning_rate": 3.440713983000601e-06, + "loss": 0.0001, "step": 1380 }, { - "epoch": 2.067365269461078, - "grad_norm": 0.19257550530901746, - "learning_rate": 4.672347039670817e-06, - "loss": 0.0022, + "epoch": 2.2096, + "grad_norm": 0.0060595021828897914, + "learning_rate": 3.4276800745311135e-06, + "loss": 0.0001, "step": 1381 }, { - "epoch": 2.068862275449102, - "grad_norm": 0.19645409716014256, - "learning_rate": 4.65867097433841e-06, - "loss": 0.0014, + "epoch": 2.2112, + "grad_norm": 0.0018400367957329426, + "learning_rate": 3.4146657920065286e-06, + "loss": 0.0001, "step": 1382 }, { - "epoch": 2.070359281437126, - "grad_norm": 0.05481103486692159, - "learning_rate": 4.645008872798318e-06, - "loss": 0.0002, + "epoch": 2.2128, + "grad_norm": 0.03072977747731515, + "learning_rate": 3.401671174289469e-06, + "loss": 0.0001, "step": 1383 }, { - "epoch": 2.07185628742515, - "grad_norm": 0.05062409226086992, - "learning_rate": 4.63136077076726e-06, - "loss": 0.0004, + "epoch": 2.2144, + "grad_norm": 0.022035702869944843, + "learning_rate": 3.3886962601838327e-06, + "loss": 0.0002, "step": 1384 }, { - "epoch": 2.0733532934131738, - "grad_norm": 0.06611132196602185, - "learning_rate": 4.617726703925357e-06, - "loss": 0.0004, + "epoch": 2.216, + "grad_norm": 0.11692361168448522, + "learning_rate": 3.37574108843469e-06, + "loss": 0.0009, "step": 1385 }, { - "epoch": 2.0748502994011977, - "grad_norm": 0.12704112517705748, - "learning_rate": 4.604106707916038e-06, - "loss": 0.0007, + "epoch": 2.2176, + "grad_norm": 0.13711198884703038, + "learning_rate": 3.3628056977281456e-06, + "loss": 0.0003, "step": 1386 }, { - "epoch": 2.0763473053892216, - "grad_norm": 0.11551006878341365, - "learning_rate": 4.590500818345943e-06, - "loss": 0.0012, + "epoch": 2.2192, + "grad_norm": 0.012664165920126235, + "learning_rate": 3.3498901266912397e-06, + "loss": 0.0001, "step": 1387 }, { - "epoch": 2.0778443113772456, - "grad_norm": 0.21126725853949585, - "learning_rate": 4.576909070784836e-06, - "loss": 0.001, + "epoch": 2.2208, + "grad_norm": 0.004337854227471113, + "learning_rate": 3.3369944138918286e-06, + "loss": 0.0001, "step": 1388 }, { - "epoch": 2.0793413173652695, - "grad_norm": 0.04271579782342062, - "learning_rate": 4.5633315007655165e-06, - "loss": 0.0004, + "epoch": 2.2224, + "grad_norm": 0.0011994567469251882, + "learning_rate": 3.3241185978384636e-06, + "loss": 0.0, "step": 1389 }, { - "epoch": 2.0808383233532934, - "grad_norm": 0.035707781270513814, - "learning_rate": 4.549768143783709e-06, - "loss": 0.0002, + "epoch": 2.224, + "grad_norm": 0.020569906279623306, + "learning_rate": 3.3112627169802948e-06, + "loss": 0.0001, "step": 1390 }, { - "epoch": 2.0823353293413174, - "grad_norm": 0.08987053581533556, - "learning_rate": 4.5362190352979875e-06, - "loss": 0.0006, + "epoch": 2.2256, + "grad_norm": 0.029085218592862228, + "learning_rate": 3.2984268097069284e-06, + "loss": 0.0001, "step": 1391 }, { - "epoch": 2.0838323353293413, - "grad_norm": 0.022527899659366517, - "learning_rate": 4.522684210729673e-06, - "loss": 0.0002, + "epoch": 2.2272, + "grad_norm": 0.030632534886056245, + "learning_rate": 3.2856109143483316e-06, + "loss": 0.0001, "step": 1392 }, { - "epoch": 2.0853293413173652, - "grad_norm": 0.09811043078353192, - "learning_rate": 4.509163705462746e-06, - "loss": 0.0006, + "epoch": 2.2288, + "grad_norm": 0.04838872988537248, + "learning_rate": 3.2728150691747117e-06, + "loss": 0.0002, "step": 1393 }, { - "epoch": 2.086826347305389, - "grad_norm": 0.10474502941427184, - "learning_rate": 4.495657554843747e-06, - "loss": 0.0012, + "epoch": 2.2304, + "grad_norm": 0.020080576934613407, + "learning_rate": 3.2600393123964114e-06, + "loss": 0.0001, "step": 1394 }, { - "epoch": 2.088323353293413, - "grad_norm": 0.05740099228948984, - "learning_rate": 4.482165794181702e-06, - "loss": 0.0003, + "epoch": 2.232, + "grad_norm": 0.019423872222649144, + "learning_rate": 3.2472836821637744e-06, + "loss": 0.0001, "step": 1395 }, { - "epoch": 2.089820359281437, - "grad_norm": 0.07473234668055903, - "learning_rate": 4.468688458748006e-06, - "loss": 0.0005, + "epoch": 2.2336, + "grad_norm": 0.0018257495221678708, + "learning_rate": 3.2345482165670493e-06, + "loss": 0.0, "step": 1396 }, { - "epoch": 2.091317365269461, - "grad_norm": 0.034739985538301855, - "learning_rate": 4.455225583776346e-06, - "loss": 0.0003, + "epoch": 2.2352, + "grad_norm": 0.002354558151618293, + "learning_rate": 3.22183295363627e-06, + "loss": 0.0, "step": 1397 }, { - "epoch": 2.092814371257485, - "grad_norm": 0.06386517981095234, - "learning_rate": 4.441777204462603e-06, - "loss": 0.0004, + "epoch": 2.2368, + "grad_norm": 0.03718599597546754, + "learning_rate": 3.209137931341143e-06, + "loss": 0.0001, "step": 1398 }, { - "epoch": 2.094311377245509, - "grad_norm": 0.02542788250908643, - "learning_rate": 4.4283433559647615e-06, - "loss": 0.0002, + "epoch": 2.2384, + "grad_norm": 0.001302556047675094, + "learning_rate": 3.196463187590929e-06, + "loss": 0.0, "step": 1399 }, { - "epoch": 2.095808383233533, - "grad_norm": 0.05524457421603561, - "learning_rate": 4.4149240734028305e-06, - "loss": 0.0007, + "epoch": 2.24, + "grad_norm": 0.012379528967152616, + "learning_rate": 3.183808760234335e-06, + "loss": 0.0001, "step": 1400 }, { - "epoch": 2.0973053892215567, - "grad_norm": 0.08145735954485667, - "learning_rate": 4.401519391858716e-06, - "loss": 0.0005, + "epoch": 2.2416, + "grad_norm": 0.007406360902967913, + "learning_rate": 3.1711746870594083e-06, + "loss": 0.0001, "step": 1401 }, { - "epoch": 2.0988023952095807, - "grad_norm": 0.13001843472521485, - "learning_rate": 4.388129346376177e-06, - "loss": 0.0009, + "epoch": 2.2432, + "grad_norm": 0.011146375592944902, + "learning_rate": 3.1585610057934022e-06, + "loss": 0.0001, "step": 1402 }, { - "epoch": 2.1002994011976046, - "grad_norm": 0.04328997396656813, - "learning_rate": 4.374753971960695e-06, - "loss": 0.0004, + "epoch": 2.2448, + "grad_norm": 0.0031344599212710265, + "learning_rate": 3.145967754102691e-06, + "loss": 0.0001, "step": 1403 }, { - "epoch": 2.1017964071856285, - "grad_norm": 0.050812707704053536, - "learning_rate": 4.3613933035794e-06, - "loss": 0.0003, + "epoch": 2.2464, + "grad_norm": 0.0027151041816300832, + "learning_rate": 3.1333949695926323e-06, + "loss": 0.0, "step": 1404 }, { - "epoch": 2.1032934131736525, - "grad_norm": 0.12459385212805167, - "learning_rate": 4.348047376160977e-06, - "loss": 0.0011, + "epoch": 2.248, + "grad_norm": 0.04285351348573601, + "learning_rate": 3.1208426898074685e-06, + "loss": 0.0003, "step": 1405 }, { - "epoch": 2.1047904191616764, - "grad_norm": 0.09492623023746591, - "learning_rate": 4.334716224595571e-06, - "loss": 0.0006, + "epoch": 2.2496, + "grad_norm": 0.0039732391753378405, + "learning_rate": 3.1083109522302124e-06, + "loss": 0.0001, "step": 1406 }, { - "epoch": 2.106287425149701, - "grad_norm": 0.07989452008235769, - "learning_rate": 4.321399883734712e-06, - "loss": 0.0005, + "epoch": 2.2512, + "grad_norm": 0.04832900212033141, + "learning_rate": 3.0957997942825337e-06, + "loss": 0.0002, "step": 1407 }, { - "epoch": 2.1077844311377247, - "grad_norm": 0.07205507467214367, - "learning_rate": 4.308098388391184e-06, - "loss": 0.0004, + "epoch": 2.2528, + "grad_norm": 0.1470983997960054, + "learning_rate": 3.083309253324651e-06, + "loss": 0.0007, "step": 1408 }, { - "epoch": 2.1092814371257487, - "grad_norm": 0.017970693243257126, - "learning_rate": 4.2948117733389896e-06, - "loss": 0.0002, + "epoch": 2.2544, + "grad_norm": 0.08966233019627902, + "learning_rate": 3.070839366655215e-06, + "loss": 0.0003, "step": 1409 }, { - "epoch": 2.1107784431137726, - "grad_norm": 0.08565640149022209, - "learning_rate": 4.28154007331321e-06, - "loss": 0.0006, + "epoch": 2.2560000000000002, + "grad_norm": 0.0217570675497833, + "learning_rate": 3.0583901715111965e-06, + "loss": 0.0001, "step": 1410 }, { - "epoch": 2.1122754491017965, - "grad_norm": 0.014993195128250375, - "learning_rate": 4.268283323009941e-06, - "loss": 0.0002, + "epoch": 2.2576, + "grad_norm": 0.0023984671122352836, + "learning_rate": 3.045961705067787e-06, + "loss": 0.0, "step": 1411 }, { - "epoch": 2.1137724550898205, - "grad_norm": 0.04396523431843643, - "learning_rate": 4.255041557086202e-06, - "loss": 0.0005, + "epoch": 2.2592, + "grad_norm": 0.03646686408219286, + "learning_rate": 3.0335540044382693e-06, + "loss": 0.0002, "step": 1412 }, { - "epoch": 2.1152694610778444, - "grad_norm": 0.07072673143683754, - "learning_rate": 4.2418148101598215e-06, - "loss": 0.0004, + "epoch": 2.2608, + "grad_norm": 0.019759144087226437, + "learning_rate": 3.021167106673928e-06, + "loss": 0.0001, "step": 1413 }, { - "epoch": 2.1167664670658684, - "grad_norm": 0.055515949400535856, - "learning_rate": 4.228603116809382e-06, - "loss": 0.0006, + "epoch": 2.2624, + "grad_norm": 0.0013618437184803305, + "learning_rate": 3.008801048763914e-06, + "loss": 0.0, "step": 1414 }, { - "epoch": 2.1182634730538923, - "grad_norm": 0.05168252557459792, - "learning_rate": 4.215406511574104e-06, - "loss": 0.0005, + "epoch": 2.2640000000000002, + "grad_norm": 0.019446950152418513, + "learning_rate": 2.996455867635155e-06, + "loss": 0.0001, "step": 1415 }, { - "epoch": 2.1197604790419162, - "grad_norm": 0.11254802035543135, - "learning_rate": 4.202225028953758e-06, - "loss": 0.0013, + "epoch": 2.2656, + "grad_norm": 0.15992625903637472, + "learning_rate": 2.9841316001522345e-06, + "loss": 0.0011, "step": 1416 }, { - "epoch": 2.12125748502994, - "grad_norm": 0.032013853884326086, - "learning_rate": 4.189058703408596e-06, + "epoch": 2.2672, + "grad_norm": 0.033021831659926115, + "learning_rate": 2.9718282831172885e-06, "loss": 0.0002, "step": 1417 }, { - "epoch": 2.122754491017964, - "grad_norm": 0.0976156994347326, - "learning_rate": 4.175907569359219e-06, - "loss": 0.0007, + "epoch": 2.2688, + "grad_norm": 0.0025513788859806105, + "learning_rate": 2.9595459532698854e-06, + "loss": 0.0, "step": 1418 }, { - "epoch": 2.124251497005988, - "grad_norm": 0.05995922146986086, - "learning_rate": 4.162771661186544e-06, - "loss": 0.0004, + "epoch": 2.2704, + "grad_norm": 0.0010193538052206404, + "learning_rate": 2.94728464728693e-06, + "loss": 0.0, "step": 1419 }, { - "epoch": 2.125748502994012, - "grad_norm": 0.1156120014758531, - "learning_rate": 4.149651013231651e-06, - "loss": 0.0007, + "epoch": 2.2720000000000002, + "grad_norm": 0.03361224556996153, + "learning_rate": 2.9350444017825385e-06, + "loss": 0.0002, "step": 1420 }, { - "epoch": 2.127245508982036, - "grad_norm": 0.0728234683105191, - "learning_rate": 4.136545659795754e-06, - "loss": 0.0003, + "epoch": 2.2736, + "grad_norm": 0.0018094678979295072, + "learning_rate": 2.922825253307947e-06, + "loss": 0.0, "step": 1421 }, { - "epoch": 2.12874251497006, - "grad_norm": 0.04225258816552858, - "learning_rate": 4.123455635140066e-06, - "loss": 0.0002, + "epoch": 2.2752, + "grad_norm": 0.006488564779726169, + "learning_rate": 2.910627238351383e-06, + "loss": 0.0001, "step": 1422 }, { - "epoch": 2.1302395209580838, - "grad_norm": 0.07510640021056275, - "learning_rate": 4.110380973485726e-06, + "epoch": 2.2768, + "grad_norm": 0.16412732998683924, + "learning_rate": 2.898450393337977e-06, "loss": 0.0007, "step": 1423 }, { - "epoch": 2.1317365269461077, - "grad_norm": 0.11472823224242246, - "learning_rate": 4.097321709013725e-06, - "loss": 0.0009, + "epoch": 2.2784, + "grad_norm": 0.009226294554457398, + "learning_rate": 2.886294754629632e-06, + "loss": 0.0001, "step": 1424 }, { - "epoch": 2.1332335329341316, - "grad_norm": 0.04334119016243774, - "learning_rate": 4.084277875864776e-06, - "loss": 0.0002, + "epoch": 2.2800000000000002, + "grad_norm": 0.0016057887977407946, + "learning_rate": 2.8741603585249312e-06, + "loss": 0.0, "step": 1425 }, { - "epoch": 2.1347305389221556, - "grad_norm": 0.03141663452074846, - "learning_rate": 4.071249508139275e-06, - "loss": 0.0003, + "epoch": 2.2816, + "grad_norm": 0.025719950014896933, + "learning_rate": 2.8620472412590227e-06, + "loss": 0.0002, "step": 1426 }, { - "epoch": 2.1362275449101795, - "grad_norm": 0.06277111693317153, - "learning_rate": 4.058236639897171e-06, - "loss": 0.0004, + "epoch": 2.2832, + "grad_norm": 0.09835938693598928, + "learning_rate": 2.8499554390035144e-06, + "loss": 0.0013, "step": 1427 }, { - "epoch": 2.1377245508982035, - "grad_norm": 0.016403479959175616, - "learning_rate": 4.0452393051579e-06, + "epoch": 2.2848, + "grad_norm": 0.002955627209750324, + "learning_rate": 2.837884987866363e-06, "loss": 0.0001, "step": 1428 }, { - "epoch": 2.1392215568862274, - "grad_norm": 0.07804687214286263, - "learning_rate": 4.032257537900286e-06, - "loss": 0.0006, + "epoch": 2.2864, + "grad_norm": 0.0016515335057602617, + "learning_rate": 2.8258359238917665e-06, + "loss": 0.0, "step": 1429 }, { - "epoch": 2.1407185628742513, - "grad_norm": 0.07304022000926429, - "learning_rate": 4.019291372062452e-06, - "loss": 0.0009, + "epoch": 2.288, + "grad_norm": 0.006663918000619816, + "learning_rate": 2.8138082830600556e-06, + "loss": 0.0, "step": 1430 }, { - "epoch": 2.1422155688622753, - "grad_norm": 0.04838062003714075, - "learning_rate": 4.006340841541748e-06, - "loss": 0.0004, + "epoch": 2.2896, + "grad_norm": 0.032230510931899596, + "learning_rate": 2.8018021012875994e-06, + "loss": 0.0002, "step": 1431 }, { - "epoch": 2.143712574850299, - "grad_norm": 0.012920730564890766, - "learning_rate": 3.993405980194627e-06, + "epoch": 2.2912, + "grad_norm": 0.025771170503503277, + "learning_rate": 2.789817414426673e-06, "loss": 0.0001, "step": 1432 }, { - "epoch": 2.1452095808383236, - "grad_norm": 0.02060350725486784, - "learning_rate": 3.980486821836599e-06, - "loss": 0.0002, + "epoch": 2.2928, + "grad_norm": 0.0012859432517395015, + "learning_rate": 2.7778542582653746e-06, + "loss": 0.0, "step": 1433 }, { - "epoch": 2.1467065868263475, - "grad_norm": 0.1090178395185067, - "learning_rate": 3.967583400242109e-06, - "loss": 0.0012, + "epoch": 2.2944, + "grad_norm": 0.01411440299705994, + "learning_rate": 2.7659126685275028e-06, + "loss": 0.0001, "step": 1434 }, { - "epoch": 2.1482035928143715, - "grad_norm": 0.08624476103626773, - "learning_rate": 3.954695749144466e-06, - "loss": 0.0004, + "epoch": 2.296, + "grad_norm": 0.07607062065238816, + "learning_rate": 2.753992680872457e-06, + "loss": 0.001, "step": 1435 }, { - "epoch": 2.1497005988023954, - "grad_norm": 0.02414965108037553, - "learning_rate": 3.941823902235751e-06, - "loss": 0.0002, + "epoch": 2.2976, + "grad_norm": 0.09799565332045054, + "learning_rate": 2.7420943308951287e-06, + "loss": 0.0008, "step": 1436 }, { - "epoch": 2.1511976047904193, - "grad_norm": 0.01602205069339523, - "learning_rate": 3.928967893166721e-06, + "epoch": 2.2992, + "grad_norm": 0.017320946061261017, + "learning_rate": 2.7302176541257984e-06, "loss": 0.0001, "step": 1437 }, { - "epoch": 2.1526946107784433, - "grad_norm": 0.15078451246023045, - "learning_rate": 3.916127755546743e-06, - "loss": 0.0008, + "epoch": 2.3008, + "grad_norm": 0.010239412224088713, + "learning_rate": 2.718362686030025e-06, + "loss": 0.0001, "step": 1438 }, { - "epoch": 2.154191616766467, - "grad_norm": 0.09937561764681273, - "learning_rate": 3.903303522943679e-06, - "loss": 0.0005, + "epoch": 2.3024, + "grad_norm": 0.03367078960024932, + "learning_rate": 2.7065294620085425e-06, + "loss": 0.0001, "step": 1439 }, { - "epoch": 2.155688622754491, - "grad_norm": 0.2778830186344834, - "learning_rate": 3.890495228883814e-06, - "loss": 0.0013, + "epoch": 2.304, + "grad_norm": 0.036310253948025895, + "learning_rate": 2.694718017397151e-06, + "loss": 0.0002, "step": 1440 }, { - "epoch": 2.157185628742515, - "grad_norm": 0.1014703272227765, - "learning_rate": 3.877702906851765e-06, - "loss": 0.0011, + "epoch": 2.3056, + "grad_norm": 0.08174961939548375, + "learning_rate": 2.6829283874666236e-06, + "loss": 0.0006, "step": 1441 }, { - "epoch": 2.158682634730539, - "grad_norm": 0.03910173601850433, - "learning_rate": 3.8649265902903935e-06, - "loss": 0.0003, + "epoch": 2.3072, + "grad_norm": 0.0020749643937138526, + "learning_rate": 2.6711606074225783e-06, + "loss": 0.0, "step": 1442 }, { - "epoch": 2.160179640718563, - "grad_norm": 0.0758231387962015, - "learning_rate": 3.852166312600715e-06, - "loss": 0.0008, + "epoch": 2.3088, + "grad_norm": 0.0008781990810354615, + "learning_rate": 2.6594147124053983e-06, + "loss": 0.0, "step": 1443 }, { - "epoch": 2.161676646706587, - "grad_norm": 0.0307677226066544, - "learning_rate": 3.839422107141826e-06, - "loss": 0.0002, + "epoch": 2.3104, + "grad_norm": 0.020486784780718033, + "learning_rate": 2.6476907374901062e-06, + "loss": 0.0001, "step": 1444 }, { - "epoch": 2.163173652694611, - "grad_norm": 0.09752284917524724, - "learning_rate": 3.826694007230792e-06, - "loss": 0.0008, + "epoch": 2.312, + "grad_norm": 0.013170158375038647, + "learning_rate": 2.635988717686272e-06, + "loss": 0.0001, "step": 1445 }, { - "epoch": 2.1646706586826348, - "grad_norm": 0.05457238739954349, - "learning_rate": 3.813982046142581e-06, - "loss": 0.0004, + "epoch": 2.3136, + "grad_norm": 0.0024200822912347746, + "learning_rate": 2.6243086879379e-06, + "loss": 0.0, "step": 1446 }, { - "epoch": 2.1661676646706587, - "grad_norm": 0.07303152617434286, - "learning_rate": 3.8012862571099673e-06, - "loss": 0.0005, + "epoch": 2.3152, + "grad_norm": 0.008108913518453966, + "learning_rate": 2.6126506831233343e-06, + "loss": 0.0001, "step": 1447 }, { - "epoch": 2.1676646706586826, - "grad_norm": 0.016540715207413653, - "learning_rate": 3.788606673323447e-06, - "loss": 0.0002, + "epoch": 2.3168, + "grad_norm": 0.10868201803231775, + "learning_rate": 2.6010147380551474e-06, + "loss": 0.0005, "step": 1448 }, { - "epoch": 2.1691616766467066, - "grad_norm": 0.0802709349097072, - "learning_rate": 3.7759433279311487e-06, - "loss": 0.0005, + "epoch": 2.3184, + "grad_norm": 0.004058238469458349, + "learning_rate": 2.5894008874800323e-06, + "loss": 0.0001, "step": 1449 }, { - "epoch": 2.1706586826347305, - "grad_norm": 0.034645966560659984, - "learning_rate": 3.7632962540387575e-06, - "loss": 0.0003, + "epoch": 2.32, + "grad_norm": 0.002981692857772985, + "learning_rate": 2.577809166078716e-06, + "loss": 0.0001, "step": 1450 }, { - "epoch": 2.1721556886227544, - "grad_norm": 0.008870689066133116, - "learning_rate": 3.7506654847094105e-06, - "loss": 0.0002, + "epoch": 2.3216, + "grad_norm": 0.019086869293865865, + "learning_rate": 2.5662396084658383e-06, + "loss": 0.0001, "step": 1451 }, { - "epoch": 2.1736526946107784, - "grad_norm": 0.07045440620146966, - "learning_rate": 3.738051052963624e-06, - "loss": 0.0004, + "epoch": 2.3232, + "grad_norm": 0.04338204063553248, + "learning_rate": 2.5546922491898497e-06, + "loss": 0.0001, "step": 1452 }, { - "epoch": 2.1751497005988023, - "grad_norm": 0.028807941828131546, - "learning_rate": 3.7254529917792025e-06, - "loss": 0.0002, + "epoch": 2.3247999999999998, + "grad_norm": 0.004913806069794926, + "learning_rate": 2.543167122732918e-06, + "loss": 0.0001, "step": 1453 }, { - "epoch": 2.1766467065868262, - "grad_norm": 0.04049065453847004, - "learning_rate": 3.712871334091154e-06, - "loss": 0.0004, + "epoch": 2.3264, + "grad_norm": 0.0030636921144399165, + "learning_rate": 2.5316642635108247e-06, + "loss": 0.0, "step": 1454 }, { - "epoch": 2.17814371257485, - "grad_norm": 0.07895027308565043, - "learning_rate": 3.7003061127915973e-06, - "loss": 0.0005, + "epoch": 2.328, + "grad_norm": 0.0076995128467695325, + "learning_rate": 2.5201837058728506e-06, + "loss": 0.0001, "step": 1455 }, { - "epoch": 2.179640718562874, - "grad_norm": 0.07783525776749904, - "learning_rate": 3.6877573607296935e-06, - "loss": 0.0007, + "epoch": 2.3296, + "grad_norm": 0.07273840684610987, + "learning_rate": 2.508725484101684e-06, + "loss": 0.0004, "step": 1456 }, { - "epoch": 2.181137724550898, - "grad_norm": 0.05173489235073667, - "learning_rate": 3.67522511071154e-06, - "loss": 0.0005, + "epoch": 2.3312, + "grad_norm": 0.11186530685574775, + "learning_rate": 2.4972896324133143e-06, + "loss": 0.0007, "step": 1457 }, { - "epoch": 2.182634730538922, - "grad_norm": 0.04498118384069647, - "learning_rate": 3.6627093955000916e-06, - "loss": 0.0006, + "epoch": 2.3327999999999998, + "grad_norm": 0.0020329487891239295, + "learning_rate": 2.485876184956928e-06, + "loss": 0.0, "step": 1458 }, { - "epoch": 2.184131736526946, - "grad_norm": 0.09963326809445305, - "learning_rate": 3.6502102478150814e-06, - "loss": 0.0006, + "epoch": 2.3344, + "grad_norm": 0.1810254816293864, + "learning_rate": 2.474485175814816e-06, + "loss": 0.0009, "step": 1459 }, { - "epoch": 2.18562874251497, - "grad_norm": 0.020623105889192212, - "learning_rate": 3.6377277003329238e-06, - "loss": 0.0002, + "epoch": 2.336, + "grad_norm": 0.008796881961622925, + "learning_rate": 2.4631166390022574e-06, + "loss": 0.0001, "step": 1460 }, { - "epoch": 2.187125748502994, - "grad_norm": 0.0842055840763743, - "learning_rate": 3.625261785686649e-06, - "loss": 0.0006, + "epoch": 2.3376, + "grad_norm": 0.028234386011551076, + "learning_rate": 2.451770608467432e-06, + "loss": 0.0002, "step": 1461 }, { - "epoch": 2.1886227544910177, - "grad_norm": 0.05511784114857564, - "learning_rate": 3.6128125364657806e-06, - "loss": 0.0003, + "epoch": 2.3392, + "grad_norm": 0.0009550241744107475, + "learning_rate": 2.440447118091306e-06, + "loss": 0.0, "step": 1462 }, { - "epoch": 2.190119760479042, - "grad_norm": 0.09680779392796715, - "learning_rate": 3.6003799852163e-06, - "loss": 0.0006, + "epoch": 2.3407999999999998, + "grad_norm": 0.06985935816975702, + "learning_rate": 2.429146201687538e-06, + "loss": 0.0004, "step": 1463 }, { - "epoch": 2.191616766467066, - "grad_norm": 0.3290860685579756, - "learning_rate": 3.5879641644405183e-06, - "loss": 0.0016, + "epoch": 2.3424, + "grad_norm": 0.001863839046838397, + "learning_rate": 2.417867893002387e-06, + "loss": 0.0, "step": 1464 }, { - "epoch": 2.19311377245509, - "grad_norm": 0.010421333234893785, - "learning_rate": 3.575565106597014e-06, + "epoch": 2.344, + "grad_norm": 0.011244576439991636, + "learning_rate": 2.4066122257145898e-06, "loss": 0.0001, "step": 1465 }, { - "epoch": 2.194610778443114, - "grad_norm": 0.15309336269175572, - "learning_rate": 3.563182844100541e-06, - "loss": 0.0011, + "epoch": 2.3456, + "grad_norm": 0.010636737740380322, + "learning_rate": 2.3953792334352787e-06, + "loss": 0.0001, "step": 1466 }, { - "epoch": 2.196107784431138, - "grad_norm": 0.02091267883568856, - "learning_rate": 3.5508174093219427e-06, - "loss": 0.0001, + "epoch": 2.3472, + "grad_norm": 0.0014849518550035968, + "learning_rate": 2.3841689497078746e-06, + "loss": 0.0, "step": 1467 }, { - "epoch": 2.197604790419162, - "grad_norm": 0.06446281945069611, - "learning_rate": 3.5384688345880826e-06, - "loss": 0.0004, + "epoch": 2.3487999999999998, + "grad_norm": 0.001440058625093697, + "learning_rate": 2.3729814080079815e-06, + "loss": 0.0, "step": 1468 }, { - "epoch": 2.1991017964071857, - "grad_norm": 0.052567869194541594, - "learning_rate": 3.5261371521817247e-06, - "loss": 0.0004, + "epoch": 2.3504, + "grad_norm": 0.007474250959261391, + "learning_rate": 2.361816641743303e-06, + "loss": 0.0001, "step": 1469 }, { - "epoch": 2.2005988023952097, - "grad_norm": 0.049578551613729545, - "learning_rate": 3.513822394341493e-06, - "loss": 0.0006, + "epoch": 2.352, + "grad_norm": 0.0012983438990565295, + "learning_rate": 2.3506746842535244e-06, + "loss": 0.0, "step": 1470 }, { - "epoch": 2.2020958083832336, - "grad_norm": 0.033783781354055624, - "learning_rate": 3.501524593261756e-06, - "loss": 0.0002, + "epoch": 2.3536, + "grad_norm": 0.006703399425770209, + "learning_rate": 2.339555568810221e-06, + "loss": 0.0001, "step": 1471 }, { - "epoch": 2.2035928143712575, - "grad_norm": 0.11505653536880667, - "learning_rate": 3.4892437810925484e-06, - "loss": 0.0008, + "epoch": 2.3552, + "grad_norm": 0.002723319057583775, + "learning_rate": 2.328459328616759e-06, + "loss": 0.0, "step": 1472 }, { - "epoch": 2.2050898203592815, - "grad_norm": 0.05951967201522422, - "learning_rate": 3.476979989939506e-06, - "loss": 0.0002, + "epoch": 2.3568, + "grad_norm": 0.003221205372001595, + "learning_rate": 2.317385996808195e-06, + "loss": 0.0, "step": 1473 }, { - "epoch": 2.2065868263473054, - "grad_norm": 0.1056924616624737, - "learning_rate": 3.4647332518637454e-06, - "loss": 0.0005, + "epoch": 2.3584, + "grad_norm": 0.022913595669498688, + "learning_rate": 2.306335606451181e-06, + "loss": 0.0001, "step": 1474 }, { - "epoch": 2.2080838323353293, - "grad_norm": 0.06831981985885952, - "learning_rate": 3.4525035988818232e-06, - "loss": 0.0007, + "epoch": 2.36, + "grad_norm": 0.0014718472698621076, + "learning_rate": 2.295308190543859e-06, + "loss": 0.0, "step": 1475 }, { - "epoch": 2.2095808383233533, - "grad_norm": 0.08416246640850832, - "learning_rate": 3.440291062965608e-06, - "loss": 0.0004, + "epoch": 2.3616, + "grad_norm": 0.003057361648101914, + "learning_rate": 2.2843037820157678e-06, + "loss": 0.0001, "step": 1476 }, { - "epoch": 2.211077844311377, - "grad_norm": 0.009399176800184218, - "learning_rate": 3.428095676042242e-06, + "epoch": 2.3632, + "grad_norm": 0.01739809318878192, + "learning_rate": 2.2733224137277366e-06, "loss": 0.0001, "step": 1477 }, { - "epoch": 2.212574850299401, - "grad_norm": 0.06148287830535551, - "learning_rate": 3.41591746999402e-06, - "loss": 0.0005, + "epoch": 2.3648, + "grad_norm": 0.004763148347921701, + "learning_rate": 2.2623641184718048e-06, + "loss": 0.0001, "step": 1478 }, { - "epoch": 2.214071856287425, - "grad_norm": 0.15044803640835408, - "learning_rate": 3.403756476658323e-06, - "loss": 0.0006, + "epoch": 2.3664, + "grad_norm": 0.004518028811779055, + "learning_rate": 2.251428928971102e-06, + "loss": 0.0, "step": 1479 }, { - "epoch": 2.215568862275449, - "grad_norm": 0.039856311845892195, - "learning_rate": 3.3916127278275444e-06, - "loss": 0.0003, + "epoch": 2.368, + "grad_norm": 0.003053904000256683, + "learning_rate": 2.240516877879765e-06, + "loss": 0.0, "step": 1480 }, { - "epoch": 2.217065868263473, - "grad_norm": 0.03298698632561797, - "learning_rate": 3.379486255248975e-06, - "loss": 0.0004, + "epoch": 2.3696, + "grad_norm": 0.269895598557991, + "learning_rate": 2.229627997782834e-06, + "loss": 0.0009, "step": 1481 }, { - "epoch": 2.218562874251497, - "grad_norm": 0.05989471368709794, - "learning_rate": 3.3673770906247595e-06, - "loss": 0.0003, + "epoch": 2.3712, + "grad_norm": 0.0076793204496350435, + "learning_rate": 2.218762321196156e-06, + "loss": 0.0001, "step": 1482 }, { - "epoch": 2.220059880239521, - "grad_norm": 0.05031206889739225, - "learning_rate": 3.355285265611784e-06, - "loss": 0.0006, + "epoch": 2.3728, + "grad_norm": 0.0033461779993014668, + "learning_rate": 2.2079198805662917e-06, + "loss": 0.0, "step": 1483 }, { - "epoch": 2.2215568862275448, - "grad_norm": 0.039637111322910513, - "learning_rate": 3.3432108118216046e-06, - "loss": 0.0004, + "epoch": 2.3744, + "grad_norm": 0.001427115516108656, + "learning_rate": 2.1971007082704167e-06, + "loss": 0.0, "step": 1484 }, { - "epoch": 2.2230538922155687, - "grad_norm": 0.02790884081359175, - "learning_rate": 3.3311537608203747e-06, - "loss": 0.0002, + "epoch": 2.376, + "grad_norm": 0.002586368385462468, + "learning_rate": 2.186304836616221e-06, + "loss": 0.0, "step": 1485 }, { - "epoch": 2.2245508982035926, - "grad_norm": 0.03455227197831783, - "learning_rate": 3.31911414412873e-06, - "loss": 0.0003, + "epoch": 2.3776, + "grad_norm": 0.017664811900103907, + "learning_rate": 2.1755322978418134e-06, + "loss": 0.0001, "step": 1486 }, { - "epoch": 2.2260479041916166, - "grad_norm": 0.06635476538309946, - "learning_rate": 3.3070919932217517e-06, - "loss": 0.0003, + "epoch": 2.3792, + "grad_norm": 0.01694283336066184, + "learning_rate": 2.1647831241156304e-06, + "loss": 0.0001, "step": 1487 }, { - "epoch": 2.2275449101796405, - "grad_norm": 0.14675486412500174, - "learning_rate": 3.2950873395288454e-06, - "loss": 0.0014, + "epoch": 2.3808, + "grad_norm": 0.032297864205079956, + "learning_rate": 2.1540573475363402e-06, + "loss": 0.0002, "step": 1488 }, { - "epoch": 2.229041916167665, - "grad_norm": 0.1360738382194748, - "learning_rate": 3.2831002144336776e-06, - "loss": 0.0007, + "epoch": 2.3824, + "grad_norm": 0.0018598696691681533, + "learning_rate": 2.1433550001327376e-06, + "loss": 0.0, "step": 1489 }, { - "epoch": 2.230538922155689, - "grad_norm": 0.05674441854621314, - "learning_rate": 3.2711306492740914e-06, - "loss": 0.0004, + "epoch": 2.384, + "grad_norm": 0.002102643984560031, + "learning_rate": 2.1326761138636555e-06, + "loss": 0.0, "step": 1490 }, { - "epoch": 2.2320359281437128, - "grad_norm": 0.022436227700209574, - "learning_rate": 3.2591786753420187e-06, - "loss": 0.0001, + "epoch": 2.3856, + "grad_norm": 0.0018286896919109657, + "learning_rate": 2.122020720617869e-06, + "loss": 0.0, "step": 1491 }, { - "epoch": 2.2335329341317367, - "grad_norm": 0.03489642218056514, - "learning_rate": 3.247244323883415e-06, - "loss": 0.0003, + "epoch": 2.3872, + "grad_norm": 0.008289093464070325, + "learning_rate": 2.111388852214001e-06, + "loss": 0.0001, "step": 1492 }, { - "epoch": 2.2350299401197606, - "grad_norm": 0.2050412352519987, - "learning_rate": 3.2353276260981447e-06, - "loss": 0.0014, + "epoch": 2.3888, + "grad_norm": 0.0023336863343605095, + "learning_rate": 2.1007805404004247e-06, + "loss": 0.0, "step": 1493 }, { - "epoch": 2.2365269461077846, - "grad_norm": 0.04952916174910589, - "learning_rate": 3.2234286131399415e-06, - "loss": 0.0003, + "epoch": 2.3904, + "grad_norm": 0.0028441181808218953, + "learning_rate": 2.090195816855164e-06, + "loss": 0.0, "step": 1494 }, { - "epoch": 2.2380239520958085, - "grad_norm": 0.15678094109556026, - "learning_rate": 3.2115473161162936e-06, - "loss": 0.0011, + "epoch": 2.392, + "grad_norm": 0.001212807742393933, + "learning_rate": 2.0796347131858187e-06, + "loss": 0.0, "step": 1495 }, { - "epoch": 2.2395209580838324, - "grad_norm": 0.0359162039363321, - "learning_rate": 3.1996837660883763e-06, - "loss": 0.0003, + "epoch": 2.3936, + "grad_norm": 0.005296837578609509, + "learning_rate": 2.069097260929439e-06, + "loss": 0.0001, "step": 1496 }, { - "epoch": 2.2410179640718564, - "grad_norm": 0.12068999773483105, - "learning_rate": 3.1878379940709726e-06, - "loss": 0.0007, + "epoch": 2.3952, + "grad_norm": 0.0017896475593978372, + "learning_rate": 2.058583491552465e-06, + "loss": 0.0, "step": 1497 }, { - "epoch": 2.2425149700598803, - "grad_norm": 0.07153246649925311, - "learning_rate": 3.1760100310323815e-06, - "loss": 0.0013, + "epoch": 2.3968, + "grad_norm": 0.0011797322701871146, + "learning_rate": 2.048093436450603e-06, + "loss": 0.0, "step": 1498 }, { - "epoch": 2.2440119760479043, - "grad_norm": 0.02898441751748169, - "learning_rate": 3.1641999078943566e-06, - "loss": 0.0002, + "epoch": 2.3984, + "grad_norm": 0.07918396984152237, + "learning_rate": 2.037627126948751e-06, + "loss": 0.0006, "step": 1499 }, { - "epoch": 2.245508982035928, - "grad_norm": 0.11063959172977524, - "learning_rate": 3.1524076555320007e-06, - "loss": 0.0007, + "epoch": 2.4, + "grad_norm": 0.0041036247525871, + "learning_rate": 2.0271845943008984e-06, + "loss": 0.0001, "step": 1500 }, { - "epoch": 2.247005988023952, - "grad_norm": 0.10311560949120008, - "learning_rate": 3.1406333047737012e-06, - "loss": 0.0003, + "epoch": 2.4016, + "grad_norm": 0.009007184596335682, + "learning_rate": 2.0167658696900317e-06, + "loss": 0.0001, "step": 1501 }, { - "epoch": 2.248502994011976, - "grad_norm": 0.09788152701017722, - "learning_rate": 3.128876886401049e-06, - "loss": 0.0003, + "epoch": 2.4032, + "grad_norm": 0.01675758427096426, + "learning_rate": 2.006370984228043e-06, + "loss": 0.0001, "step": 1502 }, { - "epoch": 2.25, - "grad_norm": 0.15031384544339235, - "learning_rate": 3.1171384311487487e-06, - "loss": 0.0012, + "epoch": 2.4048, + "grad_norm": 0.005890697530268193, + "learning_rate": 1.9959999689556407e-06, + "loss": 0.0001, "step": 1503 }, { - "epoch": 2.251497005988024, - "grad_norm": 0.12304740690296884, - "learning_rate": 3.105417969704544e-06, - "loss": 0.0008, + "epoch": 2.4064, + "grad_norm": 0.002924527914154743, + "learning_rate": 1.985652854842247e-06, + "loss": 0.0, "step": 1504 }, { - "epoch": 2.252994011976048, - "grad_norm": 0.10203703250907652, - "learning_rate": 3.093715532709147e-06, - "loss": 0.0005, + "epoch": 2.408, + "grad_norm": 0.003731462856825938, + "learning_rate": 1.9753296727859195e-06, + "loss": 0.0, "step": 1505 }, { - "epoch": 2.254491017964072, - "grad_norm": 0.10315509055548146, - "learning_rate": 3.0820311507561395e-06, - "loss": 0.0007, + "epoch": 2.4096, + "grad_norm": 0.01630410409142845, + "learning_rate": 1.9650304536132426e-06, + "loss": 0.0001, "step": 1506 }, { - "epoch": 2.2559880239520957, - "grad_norm": 0.1340732142733853, - "learning_rate": 3.0703648543919053e-06, - "loss": 0.0006, + "epoch": 2.4112, + "grad_norm": 0.013784772819053917, + "learning_rate": 1.9547552280792528e-06, + "loss": 0.0001, "step": 1507 }, { - "epoch": 2.2574850299401197, - "grad_norm": 0.05868722622806031, - "learning_rate": 3.058716674115544e-06, - "loss": 0.0002, + "epoch": 2.4128, + "grad_norm": 0.0021612071817767383, + "learning_rate": 1.9445040268673297e-06, + "loss": 0.0, "step": 1508 }, { - "epoch": 2.2589820359281436, - "grad_norm": 0.03557542415759387, - "learning_rate": 3.047086640378798e-06, - "loss": 0.0002, + "epoch": 2.4144, + "grad_norm": 0.10756611202533951, + "learning_rate": 1.9342768805891176e-06, + "loss": 0.0003, "step": 1509 }, { - "epoch": 2.2604790419161676, - "grad_norm": 0.07662241536677104, - "learning_rate": 3.0354747835859686e-06, - "loss": 0.0004, + "epoch": 2.416, + "grad_norm": 0.003626199791092706, + "learning_rate": 1.924073819784428e-06, + "loss": 0.0001, "step": 1510 }, { - "epoch": 2.2619760479041915, - "grad_norm": 0.04174555140373921, - "learning_rate": 3.0238811340938347e-06, - "loss": 0.0004, + "epoch": 2.4176, + "grad_norm": 0.00467415340632512, + "learning_rate": 1.9138948749211473e-06, + "loss": 0.0001, "step": 1511 }, { - "epoch": 2.2634730538922154, - "grad_norm": 0.11334824278824039, - "learning_rate": 3.0123057222115835e-06, - "loss": 0.001, + "epoch": 2.4192, + "grad_norm": 0.002556602627541502, + "learning_rate": 1.9037400763951508e-06, + "loss": 0.0, "step": 1512 }, { - "epoch": 2.2649700598802394, - "grad_norm": 0.07557692002090421, - "learning_rate": 3.000748578200716e-06, - "loss": 0.0007, + "epoch": 2.4208, + "grad_norm": 0.021045982566475125, + "learning_rate": 1.8936094545302098e-06, + "loss": 0.0001, "step": 1513 }, { - "epoch": 2.2664670658682633, - "grad_norm": 0.07097266734420278, - "learning_rate": 2.989209732274979e-06, - "loss": 0.0003, + "epoch": 2.4224, + "grad_norm": 0.0016611376914392635, + "learning_rate": 1.8835030395778941e-06, + "loss": 0.0, "step": 1514 }, { - "epoch": 2.2679640718562872, - "grad_norm": 0.09527091457006696, - "learning_rate": 2.97768921460028e-06, - "loss": 0.0005, + "epoch": 2.424, + "grad_norm": 0.0017170133170774445, + "learning_rate": 1.8734208617174986e-06, + "loss": 0.0, "step": 1515 }, { - "epoch": 2.269461077844311, - "grad_norm": 0.0815127360640386, - "learning_rate": 2.9661870552946114e-06, - "loss": 0.0005, + "epoch": 2.4256, + "grad_norm": 0.0014993770241049835, + "learning_rate": 1.8633629510559315e-06, + "loss": 0.0, "step": 1516 }, { - "epoch": 2.270958083832335, - "grad_norm": 0.0802974444453441, - "learning_rate": 2.9547032844279824e-06, - "loss": 0.0005, + "epoch": 2.4272, + "grad_norm": 0.0015271567701904886, + "learning_rate": 1.8533293376276473e-06, + "loss": 0.0, "step": 1517 }, { - "epoch": 2.272455089820359, - "grad_norm": 0.04150750081228997, - "learning_rate": 2.943237932022307e-06, - "loss": 0.0002, + "epoch": 2.4288, + "grad_norm": 0.0013171104723947522, + "learning_rate": 1.8433200513945338e-06, + "loss": 0.0, "step": 1518 }, { - "epoch": 2.2739520958083834, - "grad_norm": 0.031089773582056647, - "learning_rate": 2.9317910280513705e-06, - "loss": 0.0002, + "epoch": 2.4304, + "grad_norm": 0.0015319013883092294, + "learning_rate": 1.8333351222458407e-06, + "loss": 0.0, "step": 1519 }, { - "epoch": 2.2754491017964074, - "grad_norm": 0.06361121560474334, - "learning_rate": 2.9203626024407185e-06, - "loss": 0.0004, + "epoch": 2.432, + "grad_norm": 0.07619118696699975, + "learning_rate": 1.8233745799980818e-06, + "loss": 0.0002, "step": 1520 }, { - "epoch": 2.2769461077844313, - "grad_norm": 0.05424769651819266, - "learning_rate": 2.9089526850675875e-06, - "loss": 0.0004, + "epoch": 2.4336, + "grad_norm": 0.0016675929742683841, + "learning_rate": 1.813438454394948e-06, + "loss": 0.0, "step": 1521 }, { - "epoch": 2.2784431137724552, - "grad_norm": 0.048908206558848324, - "learning_rate": 2.8975613057608297e-06, - "loss": 0.0008, + "epoch": 2.4352, + "grad_norm": 0.02036471538115778, + "learning_rate": 1.8035267751072172e-06, + "loss": 0.0001, "step": 1522 }, { - "epoch": 2.279940119760479, - "grad_norm": 0.12535647237158476, - "learning_rate": 2.8861884943008335e-06, - "loss": 0.0005, + "epoch": 2.4368, + "grad_norm": 0.0019359518892397129, + "learning_rate": 1.7936395717326705e-06, + "loss": 0.0, "step": 1523 }, { - "epoch": 2.281437125748503, - "grad_norm": 0.13595998922503674, - "learning_rate": 2.874834280419452e-06, - "loss": 0.0012, + "epoch": 2.4384, + "grad_norm": 0.018370237499706017, + "learning_rate": 1.7837768737959937e-06, + "loss": 0.0001, "step": 1524 }, { - "epoch": 2.282934131736527, - "grad_norm": 0.0193676454435138, - "learning_rate": 2.863498693799901e-06, - "loss": 0.0002, + "epoch": 2.44, + "grad_norm": 0.0035952528928215655, + "learning_rate": 1.773938710748706e-06, + "loss": 0.0001, "step": 1525 }, { - "epoch": 2.284431137724551, - "grad_norm": 0.09642186052650302, - "learning_rate": 2.8521817640767225e-06, - "loss": 0.0004, + "epoch": 2.4416, + "grad_norm": 0.02360457405560866, + "learning_rate": 1.7641251119690505e-06, + "loss": 0.0001, "step": 1526 }, { - "epoch": 2.285928143712575, - "grad_norm": 0.08690858061005316, - "learning_rate": 2.8408835208356667e-06, - "loss": 0.0006, + "epoch": 2.4432, + "grad_norm": 0.002167045765832743, + "learning_rate": 1.7543361067619269e-06, + "loss": 0.0001, "step": 1527 }, { - "epoch": 2.287425149700599, - "grad_norm": 0.09629388756889663, - "learning_rate": 2.8296039936136355e-06, - "loss": 0.0005, + "epoch": 2.4448, + "grad_norm": 0.027689086747607245, + "learning_rate": 1.7445717243587889e-06, + "loss": 0.0001, "step": 1528 }, { - "epoch": 2.288922155688623, - "grad_norm": 0.02465511736806249, - "learning_rate": 2.818343211898612e-06, - "loss": 0.0002, + "epoch": 2.4464, + "grad_norm": 0.1787294579533905, + "learning_rate": 1.734831993917564e-06, + "loss": 0.0006, "step": 1529 }, { - "epoch": 2.2904191616766467, - "grad_norm": 0.09994990956621684, - "learning_rate": 2.807101205129553e-06, - "loss": 0.0009, + "epoch": 2.448, + "grad_norm": 0.09937824016232794, + "learning_rate": 1.7251169445225658e-06, + "loss": 0.0017, "step": 1530 }, { - "epoch": 2.2919161676646707, - "grad_norm": 0.08966911947040915, - "learning_rate": 2.795878002696354e-06, - "loss": 0.0004, + "epoch": 2.4496, + "grad_norm": 0.0046358430248506985, + "learning_rate": 1.715426605184407e-06, + "loss": 0.0001, "step": 1531 }, { - "epoch": 2.2934131736526946, - "grad_norm": 0.008082694473274558, - "learning_rate": 2.7846736339397362e-06, - "loss": 0.0001, + "epoch": 2.4512, + "grad_norm": 0.002018934016152693, + "learning_rate": 1.705761004839911e-06, + "loss": 0.0, "step": 1532 }, { - "epoch": 2.2949101796407185, - "grad_norm": 0.037330647180128684, - "learning_rate": 2.773488128151186e-06, - "loss": 0.0002, + "epoch": 2.4528, + "grad_norm": 0.0009162379025401923, + "learning_rate": 1.6961201723520248e-06, + "loss": 0.0, "step": 1533 }, { - "epoch": 2.2964071856287425, - "grad_norm": 0.15808722242025455, - "learning_rate": 2.7623215145728866e-06, - "loss": 0.001, + "epoch": 2.4544, + "grad_norm": 0.03424925237246253, + "learning_rate": 1.6865041365097434e-06, + "loss": 0.0002, "step": 1534 }, { - "epoch": 2.2979041916167664, - "grad_norm": 0.10096871768612158, - "learning_rate": 2.7511738223976126e-06, - "loss": 0.0009, + "epoch": 2.456, + "grad_norm": 0.01168682482158326, + "learning_rate": 1.676912926028007e-06, + "loss": 0.0001, "step": 1535 }, { - "epoch": 2.2994011976047903, - "grad_norm": 0.06280335460620405, - "learning_rate": 2.740045080768694e-06, - "loss": 0.0003, + "epoch": 2.4576000000000002, + "grad_norm": 0.007125610436890888, + "learning_rate": 1.6673465695476233e-06, + "loss": 0.0001, "step": 1536 }, { - "epoch": 2.3008982035928143, - "grad_norm": 0.07328063719390991, - "learning_rate": 2.7289353187798982e-06, - "loss": 0.0004, + "epoch": 2.4592, + "grad_norm": 0.0023125107062913182, + "learning_rate": 1.6578050956351887e-06, + "loss": 0.0, "step": 1537 }, { - "epoch": 2.302395209580838, - "grad_norm": 0.07595297277452129, - "learning_rate": 2.7178445654753916e-06, - "loss": 0.0005, + "epoch": 2.4608, + "grad_norm": 0.018211054734330913, + "learning_rate": 1.6482885327829912e-06, + "loss": 0.0001, "step": 1538 }, { - "epoch": 2.303892215568862, - "grad_norm": 0.11974836266924484, - "learning_rate": 2.706772849849636e-06, - "loss": 0.0009, + "epoch": 2.4624, + "grad_norm": 0.0038736004464934217, + "learning_rate": 1.6387969094089318e-06, + "loss": 0.0001, "step": 1539 }, { - "epoch": 2.305389221556886, - "grad_norm": 0.0076400019711527585, - "learning_rate": 2.6957202008473238e-06, + "epoch": 2.464, + "grad_norm": 0.006647792105609032, + "learning_rate": 1.6293302538564381e-06, "loss": 0.0001, "step": 1540 }, { - "epoch": 2.30688622754491, - "grad_norm": 0.09483349610761937, - "learning_rate": 2.6846866473633126e-06, - "loss": 0.0005, + "epoch": 2.4656000000000002, + "grad_norm": 0.009903476509994772, + "learning_rate": 1.619888594394382e-06, + "loss": 0.0001, "step": 1541 }, { - "epoch": 2.308383233532934, - "grad_norm": 0.011742806672084745, - "learning_rate": 2.673672218242518e-06, + "epoch": 2.4672, + "grad_norm": 0.020716801874626822, + "learning_rate": 1.6104719592169905e-06, "loss": 0.0001, "step": 1542 }, { - "epoch": 2.309880239520958, - "grad_norm": 0.027406703176830523, - "learning_rate": 2.662676942279879e-06, - "loss": 0.0002, + "epoch": 2.4688, + "grad_norm": 0.0017791585271250928, + "learning_rate": 1.6010803764437633e-06, + "loss": 0.0, "step": 1543 }, { - "epoch": 2.3113772455089823, - "grad_norm": 0.04835437820138478, - "learning_rate": 2.651700848220253e-06, - "loss": 0.0003, + "epoch": 2.4704, + "grad_norm": 0.04399381417400462, + "learning_rate": 1.5917138741193972e-06, + "loss": 0.0002, "step": 1544 }, { - "epoch": 2.312874251497006, - "grad_norm": 0.06402404819188177, - "learning_rate": 2.640743964758349e-06, - "loss": 0.0003, + "epoch": 2.472, + "grad_norm": 0.0017668274917530136, + "learning_rate": 1.5823724802136863e-06, + "loss": 0.0, "step": 1545 }, { - "epoch": 2.31437125748503, - "grad_norm": 0.06160368088836378, - "learning_rate": 2.629806320538665e-06, - "loss": 0.0002, + "epoch": 2.4736000000000002, + "grad_norm": 0.008234676324611317, + "learning_rate": 1.5730562226214529e-06, + "loss": 0.0001, "step": 1546 }, { - "epoch": 2.315868263473054, - "grad_norm": 0.08575582684771083, - "learning_rate": 2.6188879441553818e-06, - "loss": 0.0005, + "epoch": 2.4752, + "grad_norm": 0.01419089057865367, + "learning_rate": 1.5637651291624522e-06, + "loss": 0.0001, "step": 1547 }, { - "epoch": 2.317365269461078, - "grad_norm": 0.06630142016737109, - "learning_rate": 2.60798886415233e-06, - "loss": 0.0004, + "epoch": 2.4768, + "grad_norm": 0.07111358162237315, + "learning_rate": 1.5544992275813053e-06, + "loss": 0.0003, "step": 1548 }, { - "epoch": 2.318862275449102, - "grad_norm": 0.12139207820757994, - "learning_rate": 2.5971091090228817e-06, - "loss": 0.0018, + "epoch": 2.4784, + "grad_norm": 0.0027156424970821223, + "learning_rate": 1.545258545547398e-06, + "loss": 0.0, "step": 1549 }, { - "epoch": 2.320359281437126, - "grad_norm": 0.06552332582410145, - "learning_rate": 2.5862487072098898e-06, - "loss": 0.0005, + "epoch": 2.48, + "grad_norm": 0.0024029200917369113, + "learning_rate": 1.536043110654809e-06, + "loss": 0.0, "step": 1550 }, { - "epoch": 2.32185628742515, - "grad_norm": 0.04409020071690396, - "learning_rate": 2.5754076871056144e-06, - "loss": 0.0002, + "epoch": 2.4816, + "grad_norm": 0.0010976426679610582, + "learning_rate": 1.5268529504222262e-06, + "loss": 0.0, "step": 1551 }, { - "epoch": 2.3233532934131738, - "grad_norm": 0.10364683798236486, - "learning_rate": 2.564586077051641e-06, - "loss": 0.0003, + "epoch": 2.4832, + "grad_norm": 0.05562937434060917, + "learning_rate": 1.5176880922928615e-06, + "loss": 0.0002, "step": 1552 }, { - "epoch": 2.3248502994011977, - "grad_norm": 0.155282637159344, - "learning_rate": 2.5537839053388225e-06, - "loss": 0.0009, + "epoch": 2.4848, + "grad_norm": 0.001756978753692583, + "learning_rate": 1.5085485636343755e-06, + "loss": 0.0, "step": 1553 }, { - "epoch": 2.3263473053892216, - "grad_norm": 0.06569277881888244, - "learning_rate": 2.543001200207178e-06, - "loss": 0.0003, + "epoch": 2.4864, + "grad_norm": 0.007200012050074666, + "learning_rate": 1.4994343917387854e-06, + "loss": 0.0001, "step": 1554 }, { - "epoch": 2.3278443113772456, - "grad_norm": 0.07710485032834485, - "learning_rate": 2.5322379898458527e-06, - "loss": 0.0005, + "epoch": 2.488, + "grad_norm": 0.00545621490829972, + "learning_rate": 1.4903456038223941e-06, + "loss": 0.0001, "step": 1555 }, { - "epoch": 2.3293413173652695, - "grad_norm": 0.07966793603491328, - "learning_rate": 2.5214943023930138e-06, - "loss": 0.0005, + "epoch": 2.4896, + "grad_norm": 0.0013280783981415845, + "learning_rate": 1.481282227025701e-06, + "loss": 0.0, "step": 1556 }, { - "epoch": 2.3308383233532934, - "grad_norm": 0.049815482522710106, - "learning_rate": 2.5107701659357963e-06, - "loss": 0.0002, + "epoch": 2.4912, + "grad_norm": 0.005087134285962172, + "learning_rate": 1.4722442884133214e-06, + "loss": 0.0001, "step": 1557 }, { - "epoch": 2.3323353293413174, - "grad_norm": 0.11800603839640363, - "learning_rate": 2.5000656085102215e-06, - "loss": 0.0006, + "epoch": 2.4928, + "grad_norm": 0.12036336528820847, + "learning_rate": 1.4632318149739177e-06, + "loss": 0.0021, "step": 1558 }, { - "epoch": 2.3338323353293413, - "grad_norm": 0.01937251400307602, - "learning_rate": 2.4893806581011215e-06, - "loss": 0.0002, + "epoch": 2.4944, + "grad_norm": 0.016016100224801787, + "learning_rate": 1.4542448336201021e-06, + "loss": 0.0001, "step": 1559 }, { - "epoch": 2.3353293413173652, - "grad_norm": 0.05368059532814563, - "learning_rate": 2.4787153426420794e-06, - "loss": 0.0003, + "epoch": 2.496, + "grad_norm": 0.01170453067098676, + "learning_rate": 1.4452833711883629e-06, + "loss": 0.0001, "step": 1560 }, { - "epoch": 2.336826347305389, - "grad_norm": 0.14182083918266286, - "learning_rate": 2.46806969001534e-06, - "loss": 0.0009, + "epoch": 2.4976, + "grad_norm": 0.017877091503009403, + "learning_rate": 1.4363474544389876e-06, + "loss": 0.0001, "step": 1561 }, { - "epoch": 2.338323353293413, - "grad_norm": 0.026485749870856008, - "learning_rate": 2.4574437280517437e-06, - "loss": 0.0003, + "epoch": 2.4992, + "grad_norm": 0.0023130446241290763, + "learning_rate": 1.4274371100559792e-06, + "loss": 0.0, "step": 1562 }, { - "epoch": 2.339820359281437, - "grad_norm": 0.08886356114902899, - "learning_rate": 2.446837484530655e-06, - "loss": 0.0005, + "epoch": 2.5008, + "grad_norm": 0.10763984336472367, + "learning_rate": 1.4185523646469822e-06, + "loss": 0.0003, "step": 1563 }, { - "epoch": 2.341317365269461, - "grad_norm": 0.02014395320911489, - "learning_rate": 2.436250987179889e-06, - "loss": 0.0001, + "epoch": 2.5023999999999997, + "grad_norm": 0.0013005506478659377, + "learning_rate": 1.409693244743192e-06, + "loss": 0.0, "step": 1564 }, { - "epoch": 2.342814371257485, - "grad_norm": 0.021864827664785275, - "learning_rate": 2.4256842636756372e-06, - "loss": 0.0002, + "epoch": 2.504, + "grad_norm": 0.020425218709959836, + "learning_rate": 1.4008597767992872e-06, + "loss": 0.0001, "step": 1565 }, { - "epoch": 2.344311377245509, - "grad_norm": 0.02967983647931681, - "learning_rate": 2.415137341642397e-06, - "loss": 0.0001, + "epoch": 2.5056000000000003, + "grad_norm": 0.08103328060955477, + "learning_rate": 1.3920519871933425e-06, + "loss": 0.0005, "step": 1566 }, { - "epoch": 2.345808383233533, - "grad_norm": 0.06117231385528192, - "learning_rate": 2.4046102486529033e-06, - "loss": 0.0004, + "epoch": 2.5072, + "grad_norm": 0.00386152032655592, + "learning_rate": 1.3832699022267516e-06, + "loss": 0.0, "step": 1567 }, { - "epoch": 2.3473053892215567, - "grad_norm": 0.019912926187557994, - "learning_rate": 2.394103012228045e-06, - "loss": 0.0002, + "epoch": 2.5088, + "grad_norm": 0.0034205593845012317, + "learning_rate": 1.3745135481241602e-06, + "loss": 0.0001, "step": 1568 }, { - "epoch": 2.3488023952095807, - "grad_norm": 0.024535582706188355, - "learning_rate": 2.3836156598368055e-06, - "loss": 0.0003, + "epoch": 2.5103999999999997, + "grad_norm": 0.01165295536569006, + "learning_rate": 1.3657829510333653e-06, + "loss": 0.0001, "step": 1569 }, { - "epoch": 2.3502994011976046, - "grad_norm": 0.03376993608094255, - "learning_rate": 2.373148218896182e-06, - "loss": 0.0003, + "epoch": 2.512, + "grad_norm": 0.010197076721640802, + "learning_rate": 1.3570781370252584e-06, + "loss": 0.0001, "step": 1570 }, { - "epoch": 2.3517964071856285, - "grad_norm": 0.0384213575217128, - "learning_rate": 2.362700716771118e-06, - "loss": 0.0005, + "epoch": 2.5136, + "grad_norm": 0.030098303958029184, + "learning_rate": 1.3483991320937307e-06, + "loss": 0.0002, "step": 1571 }, { - "epoch": 2.3532934131736525, - "grad_norm": 0.046833073744106835, - "learning_rate": 2.3522731807744317e-06, - "loss": 0.0005, + "epoch": 2.5152, + "grad_norm": 0.04392294653380117, + "learning_rate": 1.339745962155613e-06, + "loss": 0.0002, "step": 1572 }, { - "epoch": 2.3547904191616764, - "grad_norm": 0.015881510742633775, - "learning_rate": 2.341865638166747e-06, - "loss": 0.0001, + "epoch": 2.5168, + "grad_norm": 0.0021643913012770366, + "learning_rate": 1.3311186530505838e-06, + "loss": 0.0, "step": 1573 }, { - "epoch": 2.3562874251497004, - "grad_norm": 0.23245452164430105, - "learning_rate": 2.331478116156416e-06, - "loss": 0.0025, + "epoch": 2.5183999999999997, + "grad_norm": 0.002235451729477788, + "learning_rate": 1.322517230541096e-06, + "loss": 0.0, "step": 1574 }, { - "epoch": 2.3577844311377247, - "grad_norm": 0.04796842296752511, - "learning_rate": 2.3211106418994512e-06, - "loss": 0.0002, + "epoch": 2.52, + "grad_norm": 0.0018122429134230715, + "learning_rate": 1.313941720312303e-06, + "loss": 0.0, "step": 1575 }, { - "epoch": 2.3592814371257487, - "grad_norm": 0.11392590399503923, - "learning_rate": 2.3107632424994532e-06, - "loss": 0.0015, + "epoch": 2.5216, + "grad_norm": 0.017416784512229562, + "learning_rate": 1.30539214797198e-06, + "loss": 0.0001, "step": 1576 }, { - "epoch": 2.3607784431137726, - "grad_norm": 0.01691641893665238, - "learning_rate": 2.300435945007541e-06, + "epoch": 2.5232, + "grad_norm": 0.0067630498958140784, + "learning_rate": 1.2968685390504465e-06, "loss": 0.0001, "step": 1577 }, { - "epoch": 2.3622754491017965, - "grad_norm": 0.02404128261366343, - "learning_rate": 2.290128776422292e-06, - "loss": 0.0002, + "epoch": 2.5248, + "grad_norm": 0.006677237805470845, + "learning_rate": 1.2883709190004956e-06, + "loss": 0.0001, "step": 1578 }, { - "epoch": 2.3637724550898205, - "grad_norm": 0.06174460774186989, - "learning_rate": 2.2798417636896396e-06, - "loss": 0.0012, + "epoch": 2.5263999999999998, + "grad_norm": 0.0036461037673314223, + "learning_rate": 1.2798993131973093e-06, + "loss": 0.0, "step": 1579 }, { - "epoch": 2.3652694610778444, - "grad_norm": 0.08570446822273553, - "learning_rate": 2.269574933702846e-06, - "loss": 0.0004, + "epoch": 2.528, + "grad_norm": 0.0016567673129543978, + "learning_rate": 1.2714537469383858e-06, + "loss": 0.0, "step": 1580 }, { - "epoch": 2.3667664670658684, - "grad_norm": 0.0856077332705348, - "learning_rate": 2.259328313302395e-06, - "loss": 0.0005, + "epoch": 2.5296, + "grad_norm": 0.024221766064813827, + "learning_rate": 1.263034245443473e-06, + "loss": 0.0002, "step": 1581 }, { - "epoch": 2.3682634730538923, - "grad_norm": 0.07142666648166325, - "learning_rate": 2.249101929275944e-06, - "loss": 0.0009, + "epoch": 2.5312, + "grad_norm": 0.004007303578627002, + "learning_rate": 1.254640833854477e-06, + "loss": 0.0001, "step": 1582 }, { - "epoch": 2.3697604790419162, - "grad_norm": 0.025286357606488962, - "learning_rate": 2.2388958083582423e-06, - "loss": 0.0002, + "epoch": 2.5328, + "grad_norm": 0.003292892988464015, + "learning_rate": 1.2462735372353996e-06, + "loss": 0.0, "step": 1583 }, { - "epoch": 2.37125748502994, - "grad_norm": 0.045952040023591774, - "learning_rate": 2.228709977231065e-06, + "epoch": 2.5343999999999998, + "grad_norm": 0.06401474580874286, + "learning_rate": 1.2379323805722575e-06, "loss": 0.0003, "step": 1584 }, { - "epoch": 2.372754491017964, - "grad_norm": 0.10954972663341103, - "learning_rate": 2.2185444625231557e-06, - "loss": 0.0007, + "epoch": 2.536, + "grad_norm": 0.02788742726917249, + "learning_rate": 1.2296173887730122e-06, + "loss": 0.0001, "step": 1585 }, { - "epoch": 2.374251497005988, - "grad_norm": 0.10645468456591504, - "learning_rate": 2.2083992908101236e-06, - "loss": 0.001, + "epoch": 2.5376, + "grad_norm": 0.0015182760956827556, + "learning_rate": 1.2213285866674908e-06, + "loss": 0.0, "step": 1586 }, { - "epoch": 2.375748502994012, - "grad_norm": 0.030096418586315985, - "learning_rate": 2.1982744886144157e-06, - "loss": 0.0004, + "epoch": 2.5392, + "grad_norm": 0.0228623292459383, + "learning_rate": 1.2130659990073146e-06, + "loss": 0.0001, "step": 1587 }, { - "epoch": 2.377245508982036, - "grad_norm": 0.09053718049959521, - "learning_rate": 2.1881700824052157e-06, - "loss": 0.0004, + "epoch": 2.5408, + "grad_norm": 0.0011974631021759586, + "learning_rate": 1.2048296504658208e-06, + "loss": 0.0, "step": 1588 }, { - "epoch": 2.37874251497006, - "grad_norm": 0.04106460054597611, - "learning_rate": 2.1780860985983853e-06, - "loss": 0.0002, + "epoch": 2.5423999999999998, + "grad_norm": 0.002917571446504748, + "learning_rate": 1.196619565638003e-06, + "loss": 0.0001, "step": 1589 }, { - "epoch": 2.3802395209580838, - "grad_norm": 0.017813026852897713, - "learning_rate": 2.1680225635564113e-06, - "loss": 0.0002, + "epoch": 2.544, + "grad_norm": 0.0021043187714803947, + "learning_rate": 1.1884357690404157e-06, + "loss": 0.0, "step": 1590 }, { - "epoch": 2.3817365269461077, - "grad_norm": 0.03383373318851492, - "learning_rate": 2.1579795035882955e-06, - "loss": 0.0003, + "epoch": 2.5456, + "grad_norm": 0.005396413474978706, + "learning_rate": 1.1802782851111206e-06, + "loss": 0.0001, "step": 1591 }, { - "epoch": 2.3832335329341316, - "grad_norm": 0.05074488961782638, - "learning_rate": 2.1479569449495406e-06, - "loss": 0.0004, + "epoch": 2.5472, + "grad_norm": 0.021121082445202475, + "learning_rate": 1.1721471382096028e-06, + "loss": 0.0001, "step": 1592 }, { - "epoch": 2.3847305389221556, - "grad_norm": 0.01182856111698649, - "learning_rate": 2.1379549138420265e-06, - "loss": 0.0001, + "epoch": 2.5488, + "grad_norm": 0.0020481328118915473, + "learning_rate": 1.1640423526166987e-06, + "loss": 0.0, "step": 1593 }, { - "epoch": 2.3862275449101795, - "grad_norm": 0.26118958396749364, - "learning_rate": 2.127973436413989e-06, - "loss": 0.0018, + "epoch": 2.5504, + "grad_norm": 0.0013258107292440985, + "learning_rate": 1.1559639525345313e-06, + "loss": 0.0, "step": 1594 }, { - "epoch": 2.3877245508982035, - "grad_norm": 0.05278864289511616, - "learning_rate": 2.1180125387599193e-06, - "loss": 0.0003, + "epoch": 2.552, + "grad_norm": 0.09574298006311367, + "learning_rate": 1.1479119620864277e-06, + "loss": 0.0008, "step": 1595 }, { - "epoch": 2.3892215568862274, - "grad_norm": 0.030355323786991278, - "learning_rate": 2.108072246920506e-06, - "loss": 0.0002, + "epoch": 2.5536, + "grad_norm": 0.004026929915154443, + "learning_rate": 1.1398864053168534e-06, + "loss": 0.0001, "step": 1596 }, { - "epoch": 2.3907185628742513, - "grad_norm": 0.1099369494009969, - "learning_rate": 2.0981525868825815e-06, - "loss": 0.0006, + "epoch": 2.5552, + "grad_norm": 0.03389784693920091, + "learning_rate": 1.1318873061913405e-06, + "loss": 0.0002, "step": 1597 }, { - "epoch": 2.3922155688622753, - "grad_norm": 0.1203596546841281, - "learning_rate": 2.0882535845790188e-06, - "loss": 0.0007, + "epoch": 2.5568, + "grad_norm": 0.00514572526232494, + "learning_rate": 1.123914688596409e-06, + "loss": 0.0001, "step": 1598 }, { - "epoch": 2.3937125748502996, - "grad_norm": 0.1093149086475125, - "learning_rate": 2.078375265888707e-06, - "loss": 0.0011, + "epoch": 2.5584, + "grad_norm": 0.016099931278681145, + "learning_rate": 1.1159685763395113e-06, + "loss": 0.0001, "step": 1599 }, { - "epoch": 2.3952095808383236, - "grad_norm": 0.10583209779661093, - "learning_rate": 2.0685176566364507e-06, - "loss": 0.0006, + "epoch": 2.56, + "grad_norm": 0.034261385652090005, + "learning_rate": 1.108048993148939e-06, + "loss": 0.0002, "step": 1600 }, { - "epoch": 2.3967065868263475, - "grad_norm": 0.047336815910952124, - "learning_rate": 2.0586807825929123e-06, - "loss": 0.0002, + "epoch": 2.5616, + "grad_norm": 0.006428809226415149, + "learning_rate": 1.1001559626737757e-06, + "loss": 0.0001, "step": 1601 }, { - "epoch": 2.3982035928143715, - "grad_norm": 0.09904610970762708, - "learning_rate": 2.0488646694745583e-06, - "loss": 0.0012, + "epoch": 2.5632, + "grad_norm": 0.005330511861859042, + "learning_rate": 1.0922895084838036e-06, + "loss": 0.0001, "step": 1602 }, { - "epoch": 2.3997005988023954, - "grad_norm": 0.09513445282250857, - "learning_rate": 2.0390693429435626e-06, - "loss": 0.0004, + "epoch": 2.5648, + "grad_norm": 0.03328716363011227, + "learning_rate": 1.0844496540694515e-06, + "loss": 0.0001, "step": 1603 }, { - "epoch": 2.4011976047904193, - "grad_norm": 0.020422046778283895, - "learning_rate": 2.029294828607773e-06, - "loss": 0.0001, + "epoch": 2.5664, + "grad_norm": 0.002380265262180709, + "learning_rate": 1.0766364228417148e-06, + "loss": 0.0, "step": 1604 }, { - "epoch": 2.4026946107784433, - "grad_norm": 0.14061343688126954, - "learning_rate": 2.01954115202062e-06, - "loss": 0.0012, + "epoch": 2.568, + "grad_norm": 0.02310696101922282, + "learning_rate": 1.0688498381320855e-06, + "loss": 0.0001, "step": 1605 }, { - "epoch": 2.404191616766467, - "grad_norm": 0.04251232089992607, - "learning_rate": 2.0098083386810596e-06, - "loss": 0.0002, + "epoch": 2.5696, + "grad_norm": 0.03340335879563055, + "learning_rate": 1.0610899231924887e-06, + "loss": 0.0001, "step": 1606 }, { - "epoch": 2.405688622754491, - "grad_norm": 0.05855588269503358, - "learning_rate": 2.0000964140335044e-06, - "loss": 0.0004, + "epoch": 2.5712, + "grad_norm": 0.10027517187003131, + "learning_rate": 1.0533567011952094e-06, + "loss": 0.0012, "step": 1607 }, { - "epoch": 2.407185628742515, - "grad_norm": 0.04678849099056471, - "learning_rate": 1.990405403467757e-06, - "loss": 0.0002, + "epoch": 2.5728, + "grad_norm": 0.004624394220847377, + "learning_rate": 1.0456501952328191e-06, + "loss": 0.0001, "step": 1608 }, { - "epoch": 2.408682634730539, - "grad_norm": 0.15089209688583194, - "learning_rate": 1.980735332318954e-06, - "loss": 0.0005, + "epoch": 2.5744, + "grad_norm": 0.06351784277481294, + "learning_rate": 1.037970428318118e-06, + "loss": 0.0003, "step": 1609 }, { - "epoch": 2.410179640718563, - "grad_norm": 0.04533706932582074, - "learning_rate": 1.9710862258674714e-06, - "loss": 0.0002, + "epoch": 2.576, + "grad_norm": 0.0030415667014872553, + "learning_rate": 1.0303174233840529e-06, + "loss": 0.0, "step": 1610 }, { - "epoch": 2.411676646706587, - "grad_norm": 0.05696073871947218, - "learning_rate": 1.9614581093388964e-06, - "loss": 0.0004, + "epoch": 2.5776, + "grad_norm": 0.0023718507402067097, + "learning_rate": 1.022691203283661e-06, + "loss": 0.0, "step": 1611 }, { - "epoch": 2.413173652694611, - "grad_norm": 0.044373523736829394, - "learning_rate": 1.951851007903931e-06, + "epoch": 2.5792, + "grad_norm": 0.09427354766258768, + "learning_rate": 1.0150917907899926e-06, "loss": 0.0003, "step": 1612 }, { - "epoch": 2.4146706586826348, - "grad_norm": 0.1647299050829959, - "learning_rate": 1.9422649466783427e-06, - "loss": 0.0009, + "epoch": 2.5808, + "grad_norm": 0.05245903646176129, + "learning_rate": 1.0075192085960451e-06, + "loss": 0.0003, "step": 1613 }, { - "epoch": 2.4161676646706587, - "grad_norm": 0.08092462607166259, - "learning_rate": 1.9326999507228916e-06, - "loss": 0.0004, + "epoch": 2.5824, + "grad_norm": 0.014390080017914319, + "learning_rate": 9.999734793146998e-07, + "loss": 0.0001, "step": 1614 }, { - "epoch": 2.4176646706586826, - "grad_norm": 0.08726562462534888, - "learning_rate": 1.923156045043265e-06, - "loss": 0.0007, + "epoch": 2.584, + "grad_norm": 0.0019412655789413306, + "learning_rate": 9.924546254786493e-07, + "loss": 0.0, "step": 1615 }, { - "epoch": 2.4191616766467066, - "grad_norm": 0.0684185106844218, - "learning_rate": 1.9136332545900215e-06, - "loss": 0.0013, + "epoch": 2.5856, + "grad_norm": 0.0034950463726222372, + "learning_rate": 9.849626695403326e-07, + "loss": 0.0, "step": 1616 }, { - "epoch": 2.4206586826347305, - "grad_norm": 0.028069249948623098, - "learning_rate": 1.9041316042585113e-06, - "loss": 0.0002, + "epoch": 2.5872, + "grad_norm": 0.015087065616657028, + "learning_rate": 9.77497633871868e-07, + "loss": 0.0001, "step": 1617 }, { - "epoch": 2.4221556886227544, - "grad_norm": 0.024791281262007244, - "learning_rate": 1.8946511188888194e-06, + "epoch": 2.5888, + "grad_norm": 0.08578051961216712, + "learning_rate": 9.700595407649805e-07, "loss": 0.0002, "step": 1618 }, { - "epoch": 2.4236526946107784, - "grad_norm": 0.07988190689532107, - "learning_rate": 1.8851918232657007e-06, - "loss": 0.0005, + "epoch": 2.5904, + "grad_norm": 0.006834638154714175, + "learning_rate": 9.62648412430951e-07, + "loss": 0.0001, "step": 1619 }, { - "epoch": 2.4251497005988023, - "grad_norm": 0.027087754543380946, - "learning_rate": 1.8757537421185145e-06, - "loss": 0.0002, + "epoch": 2.592, + "grad_norm": 0.027796454328559976, + "learning_rate": 9.5526427100053e-07, + "loss": 0.0001, "step": 1620 }, { - "epoch": 2.4266467065868262, - "grad_norm": 0.030964818451742402, - "learning_rate": 1.8663369001211552e-06, - "loss": 0.0002, + "epoch": 2.5936, + "grad_norm": 0.03503244641418847, + "learning_rate": 9.479071385238892e-07, + "loss": 0.0001, "step": 1621 }, { - "epoch": 2.42814371257485, - "grad_norm": 0.042357471381083395, - "learning_rate": 1.8569413218919986e-06, - "loss": 0.0002, + "epoch": 2.5952, + "grad_norm": 0.01657949893006547, + "learning_rate": 9.40577036970538e-07, + "loss": 0.0001, "step": 1622 }, { - "epoch": 2.429640718562874, - "grad_norm": 0.050604316712189235, - "learning_rate": 1.8475670319938266e-06, - "loss": 0.0005, + "epoch": 2.5968, + "grad_norm": 0.011288009328992788, + "learning_rate": 9.332739882292752e-07, + "loss": 0.0001, "step": 1623 }, { - "epoch": 2.431137724550898, - "grad_norm": 0.05075746766744105, - "learning_rate": 1.8382140549337678e-06, - "loss": 0.0004, + "epoch": 2.5984, + "grad_norm": 0.0013096920311549136, + "learning_rate": 9.259980141081115e-07, + "loss": 0.0, "step": 1624 }, { - "epoch": 2.432634730538922, - "grad_norm": 0.06111785771892014, - "learning_rate": 1.8288824151632311e-06, - "loss": 0.0003, + "epoch": 2.6, + "grad_norm": 0.007070503131366707, + "learning_rate": 9.187491363342094e-07, + "loss": 0.0001, "step": 1625 }, { - "epoch": 2.434131736526946, - "grad_norm": 0.07080928006997747, - "learning_rate": 1.8195721370778474e-06, - "loss": 0.0007, + "epoch": 2.6016, + "grad_norm": 0.020044458861338762, + "learning_rate": 9.115273765538202e-07, + "loss": 0.0001, "step": 1626 }, { - "epoch": 2.43562874251497, - "grad_norm": 0.027992654625571187, - "learning_rate": 1.810283245017398e-06, - "loss": 0.0002, + "epoch": 2.6032, + "grad_norm": 0.0021069777638839694, + "learning_rate": 9.043327563322113e-07, + "loss": 0.0, "step": 1627 }, { - "epoch": 2.437125748502994, - "grad_norm": 0.03162118910496192, - "learning_rate": 1.8010157632657544e-06, - "loss": 0.0002, + "epoch": 2.6048, + "grad_norm": 0.0010715545791748374, + "learning_rate": 8.971652971536149e-07, + "loss": 0.0, "step": 1628 }, { - "epoch": 2.4386227544910177, - "grad_norm": 0.0662498938932939, - "learning_rate": 1.791769716050823e-06, - "loss": 0.0005, + "epoch": 2.6064, + "grad_norm": 0.1043762984750472, + "learning_rate": 8.900250204211513e-07, + "loss": 0.0008, "step": 1629 }, { - "epoch": 2.440119760479042, - "grad_norm": 0.06063032513396334, - "learning_rate": 1.782545127544466e-06, - "loss": 0.0015, + "epoch": 2.608, + "grad_norm": 0.08801440909859667, + "learning_rate": 8.829119474567672e-07, + "loss": 0.001, "step": 1630 }, { - "epoch": 2.441616766467066, - "grad_norm": 0.031943908561759334, - "learning_rate": 1.7733420218624486e-06, - "loss": 0.0002, + "epoch": 2.6096, + "grad_norm": 0.024913048320730432, + "learning_rate": 8.758260995011825e-07, + "loss": 0.0001, "step": 1631 }, { - "epoch": 2.44311377245509, - "grad_norm": 0.10126704114641819, - "learning_rate": 1.7641604230643738e-06, - "loss": 0.0011, + "epoch": 2.6112, + "grad_norm": 0.02846927487988488, + "learning_rate": 8.687674977138116e-07, + "loss": 0.0001, "step": 1632 }, { - "epoch": 2.444610778443114, - "grad_norm": 0.03485971575240973, - "learning_rate": 1.7550003551536177e-06, - "loss": 0.0003, + "epoch": 2.6128, + "grad_norm": 0.003911423188008586, + "learning_rate": 8.617361631727139e-07, + "loss": 0.0001, "step": 1633 }, { - "epoch": 2.446107784431138, - "grad_norm": 0.05038807899416904, - "learning_rate": 1.7458618420772777e-06, - "loss": 0.0004, + "epoch": 2.6144, + "grad_norm": 0.0020972048086177397, + "learning_rate": 8.547321168745192e-07, + "loss": 0.0, "step": 1634 }, { - "epoch": 2.447604790419162, - "grad_norm": 0.034427102319247505, - "learning_rate": 1.7367449077260823e-06, + "epoch": 2.616, + "grad_norm": 0.004155733735707268, + "learning_rate": 8.477553797343729e-07, "loss": 0.0001, "step": 1635 }, { - "epoch": 2.4491017964071857, - "grad_norm": 0.040602438643352785, - "learning_rate": 1.7276495759343647e-06, - "loss": 0.0003, + "epoch": 2.6176, + "grad_norm": 0.007412239826328842, + "learning_rate": 8.40805972585872e-07, + "loss": 0.0001, "step": 1636 }, { - "epoch": 2.4505988023952097, - "grad_norm": 0.0300674690331923, - "learning_rate": 1.7185758704799738e-06, - "loss": 0.0002, + "epoch": 2.6192, + "grad_norm": 0.005724175649760873, + "learning_rate": 8.338839161809997e-07, + "loss": 0.0001, "step": 1637 }, { - "epoch": 2.4520958083832336, - "grad_norm": 0.04412630684877554, - "learning_rate": 1.709523815084222e-06, - "loss": 0.0002, + "epoch": 2.6208, + "grad_norm": 0.012984937813147655, + "learning_rate": 8.269892311900696e-07, + "loss": 0.0001, "step": 1638 }, { - "epoch": 2.4535928143712575, - "grad_norm": 0.09352208849724938, - "learning_rate": 1.700493433411825e-06, - "loss": 0.0008, + "epoch": 2.6224, + "grad_norm": 0.0017638026481396388, + "learning_rate": 8.201219382016556e-07, + "loss": 0.0, "step": 1639 }, { - "epoch": 2.4550898203592815, - "grad_norm": 0.09616189622897968, - "learning_rate": 1.6914847490708296e-06, - "loss": 0.0006, + "epoch": 2.624, + "grad_norm": 0.057650553661999726, + "learning_rate": 8.132820577225386e-07, + "loss": 0.0002, "step": 1640 }, { - "epoch": 2.4565868263473054, - "grad_norm": 0.04164673607392628, - "learning_rate": 1.6824977856125745e-06, - "loss": 0.0003, + "epoch": 2.6256, + "grad_norm": 0.0011505844788008868, + "learning_rate": 8.06469610177636e-07, + "loss": 0.0, "step": 1641 }, { - "epoch": 2.4580838323353293, - "grad_norm": 0.02769372221761515, - "learning_rate": 1.6735325665315927e-06, - "loss": 0.0002, + "epoch": 2.6272, + "grad_norm": 0.001737434059617232, + "learning_rate": 7.996846159099558e-07, + "loss": 0.0, "step": 1642 }, { - "epoch": 2.4595808383233533, - "grad_norm": 0.03785938337316224, - "learning_rate": 1.6645891152655903e-06, - "loss": 0.0002, + "epoch": 2.6288, + "grad_norm": 0.0014599876484965232, + "learning_rate": 7.92927095180518e-07, + "loss": 0.0, "step": 1643 }, { - "epoch": 2.461077844311377, - "grad_norm": 0.07580082376824222, - "learning_rate": 1.6556674551953555e-06, - "loss": 0.0009, + "epoch": 2.6304, + "grad_norm": 0.01040258046892802, + "learning_rate": 7.861970681683051e-07, + "loss": 0.0001, "step": 1644 }, { - "epoch": 2.462574850299401, - "grad_norm": 0.03577911495005629, - "learning_rate": 1.6467676096447083e-06, + "epoch": 2.632, + "grad_norm": 0.004601741813038693, + "learning_rate": 7.794945549701993e-07, "loss": 0.0001, "step": 1645 }, { - "epoch": 2.464071856287425, - "grad_norm": 0.04888563503909771, - "learning_rate": 1.637889601880448e-06, - "loss": 0.0003, + "epoch": 2.6336, + "grad_norm": 0.005137280826948722, + "learning_rate": 7.728195756009204e-07, + "loss": 0.0001, "step": 1646 }, { - "epoch": 2.465568862275449, - "grad_norm": 0.028629948760254195, - "learning_rate": 1.6290334551122678e-06, - "loss": 0.0002, + "epoch": 2.6352, + "grad_norm": 0.009799188810212509, + "learning_rate": 7.661721499929753e-07, + "loss": 0.0001, "step": 1647 }, { - "epoch": 2.467065868263473, - "grad_norm": 0.09745710567212712, - "learning_rate": 1.6201991924927252e-06, - "loss": 0.0008, + "epoch": 2.6368, + "grad_norm": 0.001969839919556338, + "learning_rate": 7.595522979965819e-07, + "loss": 0.0, "step": 1648 }, { - "epoch": 2.468562874251497, - "grad_norm": 0.007786179248720061, - "learning_rate": 1.6113868371171592e-06, - "loss": 0.0001, + "epoch": 2.6384, + "grad_norm": 0.0018773395021817333, + "learning_rate": 7.529600393796232e-07, + "loss": 0.0, "step": 1649 }, { - "epoch": 2.470059880239521, - "grad_norm": 0.09307414131421253, - "learning_rate": 1.6025964120236337e-06, - "loss": 0.0003, + "epoch": 2.64, + "grad_norm": 0.024320439130425177, + "learning_rate": 7.463953938275859e-07, + "loss": 0.0002, "step": 1650 }, { - "epoch": 2.4715568862275448, - "grad_norm": 0.08000883258976127, - "learning_rate": 1.5938279401928925e-06, - "loss": 0.0007, + "epoch": 2.6416, + "grad_norm": 0.004900882592944951, + "learning_rate": 7.398583809434944e-07, + "loss": 0.0001, "step": 1651 }, { - "epoch": 2.4730538922155687, - "grad_norm": 0.08529740066647153, - "learning_rate": 1.58508144454827e-06, - "loss": 0.0006, + "epoch": 2.6432, + "grad_norm": 0.003407605039889639, + "learning_rate": 7.333490202478666e-07, + "loss": 0.0, "step": 1652 }, { - "epoch": 2.4745508982035926, - "grad_norm": 0.1732944945698624, - "learning_rate": 1.576356947955665e-06, - "loss": 0.0007, + "epoch": 2.6448, + "grad_norm": 0.00694630203245316, + "learning_rate": 7.268673311786378e-07, + "loss": 0.0001, "step": 1653 }, { - "epoch": 2.4760479041916166, - "grad_norm": 0.05910999084092352, - "learning_rate": 1.5676544732234477e-06, - "loss": 0.0004, + "epoch": 2.6464, + "grad_norm": 0.006897439205802054, + "learning_rate": 7.204133330911179e-07, + "loss": 0.0001, "step": 1654 }, { - "epoch": 2.477544910179641, - "grad_norm": 0.03736554597645004, - "learning_rate": 1.5589740431024347e-06, - "loss": 0.0003, + "epoch": 2.648, + "grad_norm": 0.0036013533467608777, + "learning_rate": 7.1398704525792e-07, + "loss": 0.0001, "step": 1655 }, { - "epoch": 2.479041916167665, - "grad_norm": 0.05815438383385881, - "learning_rate": 1.5503156802857998e-06, - "loss": 0.0003, + "epoch": 2.6496, + "grad_norm": 0.00605619321620667, + "learning_rate": 7.07588486868922e-07, + "loss": 0.0001, "step": 1656 }, { - "epoch": 2.480538922155689, - "grad_norm": 0.009680132499123993, - "learning_rate": 1.5416794074090258e-06, + "epoch": 2.6512000000000002, + "grad_norm": 0.012095497687233375, + "learning_rate": 7.012176770311863e-07, "loss": 0.0001, "step": 1657 }, { - "epoch": 2.4820359281437128, - "grad_norm": 0.043831899612723345, - "learning_rate": 1.5330652470498574e-06, - "loss": 0.0003, + "epoch": 2.6528, + "grad_norm": 0.04516819018819963, + "learning_rate": 6.948746347689184e-07, + "loss": 0.0001, "step": 1658 }, { - "epoch": 2.4835329341317367, - "grad_norm": 0.0208540727997247, - "learning_rate": 1.5244732217282132e-06, + "epoch": 2.6544, + "grad_norm": 0.011096821132557921, + "learning_rate": 6.885593790234057e-07, "loss": 0.0001, "step": 1659 }, { - "epoch": 2.4850299401197606, - "grad_norm": 0.02609799149850617, - "learning_rate": 1.5159033539061607e-06, - "loss": 0.0002, + "epoch": 2.656, + "grad_norm": 0.008804471813855973, + "learning_rate": 6.8227192865296e-07, + "loss": 0.0001, "step": 1660 }, { - "epoch": 2.4865269461077846, - "grad_norm": 0.012809457260665172, - "learning_rate": 1.507355665987833e-06, - "loss": 0.0001, + "epoch": 2.6576, + "grad_norm": 0.0012046966278960764, + "learning_rate": 6.760123024328624e-07, + "loss": 0.0, "step": 1661 }, { - "epoch": 2.4880239520958085, - "grad_norm": 0.03092643008061081, - "learning_rate": 1.4988301803193773e-06, - "loss": 0.0002, + "epoch": 2.6592000000000002, + "grad_norm": 0.003173321378690416, + "learning_rate": 6.697805190553086e-07, + "loss": 0.0001, "step": 1662 }, { - "epoch": 2.4895209580838324, - "grad_norm": 0.11021032771407564, - "learning_rate": 1.4903269191889068e-06, - "loss": 0.0008, + "epoch": 2.6608, + "grad_norm": 0.007078437592975865, + "learning_rate": 6.635765971293484e-07, + "loss": 0.0001, "step": 1663 }, { - "epoch": 2.4910179640718564, - "grad_norm": 0.14645604895626768, - "learning_rate": 1.4818459048264179e-06, - "loss": 0.0008, + "epoch": 2.6624, + "grad_norm": 0.0018172268038980372, + "learning_rate": 6.574005551808338e-07, + "loss": 0.0, "step": 1664 }, { - "epoch": 2.4925149700598803, - "grad_norm": 0.08473709608733745, - "learning_rate": 1.4733871594037629e-06, - "loss": 0.0005, + "epoch": 2.664, + "grad_norm": 0.003387870664722808, + "learning_rate": 6.512524116523633e-07, + "loss": 0.0, "step": 1665 }, { - "epoch": 2.4940119760479043, - "grad_norm": 0.28509040224007653, - "learning_rate": 1.464950705034568e-06, - "loss": 0.0012, + "epoch": 2.6656, + "grad_norm": 0.002542377727573644, + "learning_rate": 6.451321849032289e-07, + "loss": 0.0, "step": 1666 }, { - "epoch": 2.495508982035928, - "grad_norm": 0.024381609398717058, - "learning_rate": 1.4565365637741868e-06, + "epoch": 2.6672000000000002, + "grad_norm": 0.00496977538975312, + "learning_rate": 6.390398932093555e-07, "loss": 0.0001, "step": 1667 }, { - "epoch": 2.497005988023952, - "grad_norm": 0.09142786032282342, - "learning_rate": 1.4481447576196372e-06, - "loss": 0.0013, + "epoch": 2.6688, + "grad_norm": 0.004811840381794632, + "learning_rate": 6.329755547632499e-07, + "loss": 0.0001, "step": 1668 }, { - "epoch": 2.498502994011976, - "grad_norm": 0.09847239340356413, - "learning_rate": 1.439775308509549e-06, - "loss": 0.0007, + "epoch": 2.6704, + "grad_norm": 0.01883487701635383, + "learning_rate": 6.269391876739494e-07, + "loss": 0.0001, "step": 1669 }, { - "epoch": 2.5, - "grad_norm": 0.03477841977609638, - "learning_rate": 1.4314282383241097e-06, - "loss": 0.0002, + "epoch": 2.672, + "grad_norm": 0.006518415776134661, + "learning_rate": 6.209308099669598e-07, + "loss": 0.0001, "step": 1670 }, { - "epoch": 2.501497005988024, - "grad_norm": 0.04628201955871934, - "learning_rate": 1.423103568884986e-06, - "loss": 0.0003, + "epoch": 2.6736, + "grad_norm": 0.002339312583530842, + "learning_rate": 6.149504395842087e-07, + "loss": 0.0, "step": 1671 }, { - "epoch": 2.502994011976048, - "grad_norm": 0.09307706909626917, - "learning_rate": 1.4148013219552992e-06, - "loss": 0.0007, + "epoch": 2.6752000000000002, + "grad_norm": 0.009995307579150481, + "learning_rate": 6.089980943839924e-07, + "loss": 0.0001, "step": 1672 }, { - "epoch": 2.504491017964072, - "grad_norm": 0.07777395668753506, - "learning_rate": 1.406521519239543e-06, - "loss": 0.0004, + "epoch": 2.6768, + "grad_norm": 0.0052798775377979335, + "learning_rate": 6.030737921409169e-07, + "loss": 0.0, "step": 1673 }, { - "epoch": 2.5059880239520957, - "grad_norm": 0.038725094614758614, - "learning_rate": 1.3982641823835385e-06, - "loss": 0.0002, + "epoch": 2.6784, + "grad_norm": 0.00539784506601244, + "learning_rate": 5.971775505458444e-07, + "loss": 0.0001, "step": 1674 }, { - "epoch": 2.5074850299401197, - "grad_norm": 0.09347821247400721, - "learning_rate": 1.3900293329743707e-06, - "loss": 0.0008, + "epoch": 2.68, + "grad_norm": 0.022433981764194488, + "learning_rate": 5.913093872058528e-07, + "loss": 0.0001, "step": 1675 }, { - "epoch": 2.5089820359281436, - "grad_norm": 0.011973171492511713, - "learning_rate": 1.381816992540338e-06, + "epoch": 2.6816, + "grad_norm": 0.028818795412867394, + "learning_rate": 5.854693196441641e-07, "loss": 0.0001, "step": 1676 }, { - "epoch": 2.5104790419161676, - "grad_norm": 0.03349515304688116, - "learning_rate": 1.3736271825508973e-06, - "loss": 0.0002, + "epoch": 2.6832000000000003, + "grad_norm": 0.004945354649552436, + "learning_rate": 5.796573653001091e-07, + "loss": 0.0001, "step": 1677 }, { - "epoch": 2.5119760479041915, - "grad_norm": 0.01810852300179121, - "learning_rate": 1.3654599244166e-06, + "epoch": 2.6848, + "grad_norm": 0.0030640210420676706, + "learning_rate": 5.738735415290642e-07, "loss": 0.0001, "step": 1678 }, { - "epoch": 2.5134730538922154, - "grad_norm": 0.04446933710711642, - "learning_rate": 1.3573152394890398e-06, - "loss": 0.0003, + "epoch": 2.6864, + "grad_norm": 0.020235631967633456, + "learning_rate": 5.681178656024055e-07, + "loss": 0.0001, "step": 1679 }, { - "epoch": 2.5149700598802394, - "grad_norm": 0.05815871825675632, - "learning_rate": 1.3491931490608023e-06, - "loss": 0.0002, + "epoch": 2.6879999999999997, + "grad_norm": 0.006967025977732761, + "learning_rate": 5.62390354707455e-07, + "loss": 0.0001, "step": 1680 }, { - "epoch": 2.5164670658682633, - "grad_norm": 0.13415081479279004, - "learning_rate": 1.341093674365398e-06, - "loss": 0.0017, + "epoch": 2.6896, + "grad_norm": 0.0014463248530798556, + "learning_rate": 5.56691025947429e-07, + "loss": 0.0, "step": 1681 }, { - "epoch": 2.5179640718562872, - "grad_norm": 0.026418150113077105, - "learning_rate": 1.333016836577219e-06, - "loss": 0.0002, + "epoch": 2.6912000000000003, + "grad_norm": 0.033003970062304676, + "learning_rate": 5.510198963413882e-07, + "loss": 0.0001, "step": 1682 }, { - "epoch": 2.519461077844311, - "grad_norm": 0.05637402946123024, - "learning_rate": 1.3249626568114716e-06, - "loss": 0.0004, + "epoch": 2.6928, + "grad_norm": 0.005260609035935109, + "learning_rate": 5.453769828241872e-07, + "loss": 0.0001, "step": 1683 }, { - "epoch": 2.520958083832335, - "grad_norm": 0.03094599306924757, - "learning_rate": 1.3169311561241371e-06, - "loss": 0.0002, + "epoch": 2.6944, + "grad_norm": 0.002722753065014123, + "learning_rate": 5.397623022464227e-07, + "loss": 0.0, "step": 1684 }, { - "epoch": 2.522455089820359, - "grad_norm": 0.027335429413912742, - "learning_rate": 1.308922355511899e-06, - "loss": 0.0002, + "epoch": 2.6959999999999997, + "grad_norm": 0.01670882765259882, + "learning_rate": 5.341758713743828e-07, + "loss": 0.0001, "step": 1685 }, { - "epoch": 2.523952095808383, - "grad_norm": 0.1488861125220278, - "learning_rate": 1.300936275912098e-06, - "loss": 0.001, + "epoch": 2.6976, + "grad_norm": 0.002879707146007582, + "learning_rate": 5.286177068899989e-07, + "loss": 0.0, "step": 1686 }, { - "epoch": 2.525449101796407, - "grad_norm": 0.05818953193532018, - "learning_rate": 1.2929729382026757e-06, - "loss": 0.0004, + "epoch": 2.6992000000000003, + "grad_norm": 0.0020607402388883823, + "learning_rate": 5.230878253907911e-07, + "loss": 0.0, "step": 1687 }, { - "epoch": 2.5269461077844313, - "grad_norm": 0.07088482664758781, - "learning_rate": 1.285032363202121e-06, - "loss": 0.0007, + "epoch": 2.7008, + "grad_norm": 0.00563594549364586, + "learning_rate": 5.175862433898282e-07, + "loss": 0.0001, "step": 1688 }, { - "epoch": 2.5284431137724552, - "grad_norm": 0.09878385939626243, - "learning_rate": 1.277114571669411e-06, - "loss": 0.0005, + "epoch": 2.7024, + "grad_norm": 0.005836874900768736, + "learning_rate": 5.121129773156663e-07, + "loss": 0.0001, "step": 1689 }, { - "epoch": 2.529940119760479, - "grad_norm": 0.08834537093128993, - "learning_rate": 1.2692195843039668e-06, - "loss": 0.0005, + "epoch": 2.7039999999999997, + "grad_norm": 0.016843751964607703, + "learning_rate": 5.066680435123106e-07, + "loss": 0.0001, "step": 1690 }, { - "epoch": 2.531437125748503, - "grad_norm": 0.028045651170751838, - "learning_rate": 1.2613474217455856e-06, - "loss": 0.0004, + "epoch": 2.7056, + "grad_norm": 0.071560272626772, + "learning_rate": 5.012514582391592e-07, + "loss": 0.0003, "step": 1691 }, { - "epoch": 2.532934131736527, - "grad_norm": 0.08064194210729632, - "learning_rate": 1.2534981045743987e-06, - "loss": 0.0003, + "epoch": 2.7072000000000003, + "grad_norm": 0.022576956052241456, + "learning_rate": 4.95863237670956e-07, + "loss": 0.0001, "step": 1692 }, { - "epoch": 2.534431137724551, - "grad_norm": 0.06878912490099846, - "learning_rate": 1.2456716533108094e-06, - "loss": 0.0003, + "epoch": 2.7088, + "grad_norm": 0.0030571293977662457, + "learning_rate": 4.905033978977492e-07, + "loss": 0.0, "step": 1693 }, { - "epoch": 2.535928143712575, - "grad_norm": 0.09475527318363501, - "learning_rate": 1.2378680884154437e-06, - "loss": 0.0009, + "epoch": 2.7104, + "grad_norm": 0.01487082783820577, + "learning_rate": 4.851719549248301e-07, + "loss": 0.0001, "step": 1694 }, { - "epoch": 2.537425149700599, - "grad_norm": 0.06988351944817416, - "learning_rate": 1.230087430289103e-06, - "loss": 0.0005, + "epoch": 2.7119999999999997, + "grad_norm": 0.002060687324143398, + "learning_rate": 4.798689246727006e-07, + "loss": 0.0001, "step": 1695 }, { - "epoch": 2.538922155688623, - "grad_norm": 0.06025644551142922, - "learning_rate": 1.2223296992726885e-06, - "loss": 0.0003, + "epoch": 2.7136, + "grad_norm": 0.0040990121477614315, + "learning_rate": 4.7459432297701224e-07, + "loss": 0.0, "step": 1696 }, { - "epoch": 2.5404191616766467, - "grad_norm": 0.05586088137246275, - "learning_rate": 1.2145949156471781e-06, - "loss": 0.0003, + "epoch": 2.7152, + "grad_norm": 0.0399491811244669, + "learning_rate": 4.693481655885257e-07, + "loss": 0.0001, "step": 1697 }, { - "epoch": 2.5419161676646707, - "grad_norm": 0.019554709115490496, - "learning_rate": 1.2068830996335512e-06, - "loss": 0.0001, + "epoch": 2.7168, + "grad_norm": 0.002561971861201848, + "learning_rate": 4.6413046817306404e-07, + "loss": 0.0, "step": 1698 }, { - "epoch": 2.5434131736526946, - "grad_norm": 0.060450587058907566, - "learning_rate": 1.1991942713927452e-06, - "loss": 0.0003, + "epoch": 2.7184, + "grad_norm": 0.055233561660464434, + "learning_rate": 4.58941246311464e-07, + "loss": 0.0004, "step": 1699 }, { - "epoch": 2.5449101796407185, - "grad_norm": 0.05151733193244522, - "learning_rate": 1.1915284510255998e-06, - "loss": 0.0003, + "epoch": 2.7199999999999998, + "grad_norm": 0.020471596392817745, + "learning_rate": 4.5378051549952783e-07, + "loss": 0.0001, "step": 1700 }, { - "epoch": 2.5464071856287425, - "grad_norm": 0.08154849069751298, - "learning_rate": 1.1838856585728042e-06, - "loss": 0.0008, + "epoch": 2.7216, + "grad_norm": 0.0053295203443414355, + "learning_rate": 4.4864829114798394e-07, + "loss": 0.0001, "step": 1701 }, { - "epoch": 2.5479041916167664, - "grad_norm": 0.025258520621501784, - "learning_rate": 1.1762659140148524e-06, - "loss": 0.0002, + "epoch": 2.7232, + "grad_norm": 0.001419225495881021, + "learning_rate": 4.4354458858242857e-07, + "loss": 0.0, "step": 1702 }, { - "epoch": 2.5494011976047903, - "grad_norm": 0.08809425431554686, - "learning_rate": 1.1686692372719721e-06, - "loss": 0.0006, + "epoch": 2.7248, + "grad_norm": 0.017354646612918568, + "learning_rate": 4.384694230432984e-07, + "loss": 0.0001, "step": 1703 }, { - "epoch": 2.5508982035928143, - "grad_norm": 0.09231116438170844, - "learning_rate": 1.1610956482040992e-06, - "loss": 0.0005, + "epoch": 2.7264, + "grad_norm": 0.0007440305948982215, + "learning_rate": 4.3342280968580287e-07, + "loss": 0.0, "step": 1704 }, { - "epoch": 2.552395209580838, - "grad_norm": 0.020971139154724327, - "learning_rate": 1.1535451666108022e-06, - "loss": 0.0002, + "epoch": 2.7279999999999998, + "grad_norm": 0.00373706251755768, + "learning_rate": 4.2840476357989825e-07, + "loss": 0.0001, "step": 1705 }, { - "epoch": 2.553892215568862, - "grad_norm": 0.07893486730149059, - "learning_rate": 1.1460178122312404e-06, - "loss": 0.0006, + "epoch": 2.7296, + "grad_norm": 0.007354008374441261, + "learning_rate": 4.2341529971023253e-07, + "loss": 0.0001, "step": 1706 }, { - "epoch": 2.555389221556886, - "grad_norm": 0.019260558530191367, - "learning_rate": 1.1385136047441214e-06, - "loss": 0.0001, + "epoch": 2.7312, + "grad_norm": 0.04433771596419746, + "learning_rate": 4.184544329761009e-07, + "loss": 0.0003, "step": 1707 }, { - "epoch": 2.55688622754491, - "grad_norm": 0.022801882208881713, - "learning_rate": 1.1310325637676233e-06, - "loss": 0.0002, + "epoch": 2.7328, + "grad_norm": 0.0019717761021554323, + "learning_rate": 4.1352217819140337e-07, + "loss": 0.0, "step": 1708 }, { - "epoch": 2.5583832335329344, - "grad_norm": 0.006273256765097044, - "learning_rate": 1.1235747088593773e-06, - "loss": 0.0001, + "epoch": 2.7344, + "grad_norm": 0.004624100015738466, + "learning_rate": 4.0861855008460403e-07, + "loss": 0.0, "step": 1709 }, { - "epoch": 2.5598802395209583, - "grad_norm": 0.025272286951359548, - "learning_rate": 1.1161400595163885e-06, - "loss": 0.0002, + "epoch": 2.7359999999999998, + "grad_norm": 0.011242224418644715, + "learning_rate": 4.037435632986786e-07, + "loss": 0.0001, "step": 1710 }, { - "epoch": 2.5613772455089823, - "grad_norm": 0.06144529667834819, - "learning_rate": 1.1087286351749981e-06, - "loss": 0.0005, + "epoch": 2.7376, + "grad_norm": 0.0031526690950823715, + "learning_rate": 3.988972323910778e-07, + "loss": 0.0, "step": 1711 }, { - "epoch": 2.562874251497006, - "grad_norm": 0.018647360603314567, - "learning_rate": 1.1013404552108365e-06, - "loss": 0.0001, + "epoch": 2.7392, + "grad_norm": 0.0013638216653592268, + "learning_rate": 3.9407957183368093e-07, + "loss": 0.0, "step": 1712 }, { - "epoch": 2.56437125748503, - "grad_norm": 0.040545615112091975, - "learning_rate": 1.0939755389387552e-06, - "loss": 0.0003, + "epoch": 2.7408, + "grad_norm": 0.003093293558689636, + "learning_rate": 3.8929059601275463e-07, + "loss": 0.0, "step": 1713 }, { - "epoch": 2.565868263473054, - "grad_norm": 0.024320530278532197, - "learning_rate": 1.0866339056127995e-06, - "loss": 0.0002, + "epoch": 2.7424, + "grad_norm": 0.013540525963096324, + "learning_rate": 3.845303192289074e-07, + "loss": 0.0001, "step": 1714 }, { - "epoch": 2.567365269461078, - "grad_norm": 0.092450238455231, - "learning_rate": 1.0793155744261352e-06, - "loss": 0.0005, + "epoch": 2.7439999999999998, + "grad_norm": 0.08169187849279333, + "learning_rate": 3.797987556970495e-07, + "loss": 0.0007, "step": 1715 }, { - "epoch": 2.568862275449102, - "grad_norm": 0.05655287368538696, - "learning_rate": 1.0720205645110193e-06, - "loss": 0.0004, + "epoch": 2.7456, + "grad_norm": 0.0022705415448893134, + "learning_rate": 3.750959195463466e-07, + "loss": 0.0001, "step": 1716 }, { - "epoch": 2.570359281437126, - "grad_norm": 0.08988286767798637, - "learning_rate": 1.064748894938734e-06, - "loss": 0.0007, + "epoch": 2.7472, + "grad_norm": 0.0012207756210360957, + "learning_rate": 3.7042182482018074e-07, + "loss": 0.0, "step": 1717 }, { - "epoch": 2.57185628742515, - "grad_norm": 0.07500638380677978, - "learning_rate": 1.0575005847195442e-06, + "epoch": 2.7488, + "grad_norm": 0.07545932558598065, + "learning_rate": 3.6577648547611033e-07, "loss": 0.0004, "step": 1718 }, { - "epoch": 2.5733532934131738, - "grad_norm": 0.0925835650680482, - "learning_rate": 1.0502756528026526e-06, - "loss": 0.0005, + "epoch": 2.7504, + "grad_norm": 0.0015078232572364363, + "learning_rate": 3.611599153858214e-07, + "loss": 0.0, "step": 1719 }, { - "epoch": 2.5748502994011977, - "grad_norm": 0.06643680023971794, - "learning_rate": 1.0430741180761306e-06, - "loss": 0.0004, + "epoch": 2.752, + "grad_norm": 0.0012544209214220055, + "learning_rate": 3.5657212833509313e-07, + "loss": 0.0, "step": 1720 }, { - "epoch": 2.5763473053892216, - "grad_norm": 0.07290249607472542, - "learning_rate": 1.0358959993668981e-06, - "loss": 0.0005, + "epoch": 2.7536, + "grad_norm": 0.003472156993994934, + "learning_rate": 3.520131380237546e-07, + "loss": 0.0001, "step": 1721 }, { - "epoch": 2.5778443113772456, - "grad_norm": 0.01775705006856804, - "learning_rate": 1.0287413154406478e-06, - "loss": 0.0003, + "epoch": 2.7552, + "grad_norm": 0.0016692784689792757, + "learning_rate": 3.474829580656436e-07, + "loss": 0.0, "step": 1722 }, { - "epoch": 2.5793413173652695, - "grad_norm": 0.08273329531565872, - "learning_rate": 1.0216100850018117e-06, - "loss": 0.0006, + "epoch": 2.7568, + "grad_norm": 0.010967164559959268, + "learning_rate": 3.429816019885657e-07, + "loss": 0.0001, "step": 1723 }, { - "epoch": 2.5808383233532934, - "grad_norm": 0.028537281972692816, - "learning_rate": 1.0145023266935073e-06, - "loss": 0.0003, + "epoch": 2.7584, + "grad_norm": 0.0039539416570045305, + "learning_rate": 3.385090832342497e-07, + "loss": 0.0, "step": 1724 }, { - "epoch": 2.5823353293413174, - "grad_norm": 0.10369851400849742, - "learning_rate": 1.0074180590974847e-06, - "loss": 0.0012, + "epoch": 2.76, + "grad_norm": 0.001166654064799598, + "learning_rate": 3.3406541515832e-07, + "loss": 0.0, "step": 1725 }, { - "epoch": 2.5838323353293413, - "grad_norm": 0.08409029120428357, - "learning_rate": 1.0003573007340928e-06, - "loss": 0.0005, + "epoch": 2.7616, + "grad_norm": 0.016979602442845958, + "learning_rate": 3.296506110302422e-07, + "loss": 0.0001, "step": 1726 }, { - "epoch": 2.5853293413173652, - "grad_norm": 0.021599660351613818, - "learning_rate": 9.93320070062207e-07, - "loss": 0.0002, + "epoch": 2.7632, + "grad_norm": 0.005761879588366482, + "learning_rate": 3.252646840332918e-07, + "loss": 0.0, "step": 1727 }, { - "epoch": 2.586826347305389, - "grad_norm": 0.03987969894778265, - "learning_rate": 9.863063854792055e-07, - "loss": 0.0004, + "epoch": 2.7648, + "grad_norm": 0.015844050587905473, + "learning_rate": 3.209076472645112e-07, + "loss": 0.0001, "step": 1728 }, { - "epoch": 2.588323353293413, - "grad_norm": 0.0763711713078856, - "learning_rate": 9.793162653209053e-07, - "loss": 0.0005, + "epoch": 2.7664, + "grad_norm": 0.0008940851940196189, + "learning_rate": 3.16579513734675e-07, + "loss": 0.0, "step": 1729 }, { - "epoch": 2.589820359281437, - "grad_norm": 0.06750300685949333, - "learning_rate": 9.72349727861519e-07, - "loss": 0.0002, + "epoch": 2.768, + "grad_norm": 0.10241057217425732, + "learning_rate": 3.1228029636824477e-07, + "loss": 0.0004, "step": 1730 }, { - "epoch": 2.591317365269461, - "grad_norm": 0.1192723175715262, - "learning_rate": 9.654067913136079e-07, - "loss": 0.0011, + "epoch": 2.7696, + "grad_norm": 0.0011100895364327215, + "learning_rate": 3.080100080033388e-07, + "loss": 0.0, "step": 1731 }, { - "epoch": 2.592814371257485, - "grad_norm": 0.06166676002980035, - "learning_rate": 9.584874738280315e-07, - "loss": 0.0004, + "epoch": 2.7712, + "grad_norm": 0.008733426113929346, + "learning_rate": 3.037686613916857e-07, + "loss": 0.0001, "step": 1732 }, { - "epoch": 2.594311377245509, - "grad_norm": 0.08400165230894974, - "learning_rate": 9.515917934939089e-07, - "loss": 0.0005, + "epoch": 2.7728, + "grad_norm": 0.030213343478735843, + "learning_rate": 2.995562691985898e-07, + "loss": 0.0001, "step": 1733 }, { - "epoch": 2.595808383233533, - "grad_norm": 0.02748157877354551, - "learning_rate": 9.447197683385579e-07, - "loss": 0.0002, + "epoch": 2.7744, + "grad_norm": 0.01515621419653102, + "learning_rate": 2.9537284400289354e-07, + "loss": 0.0001, "step": 1734 }, { - "epoch": 2.5973053892215567, - "grad_norm": 0.060188118708937415, - "learning_rate": 9.378714163274571e-07, - "loss": 0.0004, + "epoch": 2.776, + "grad_norm": 0.06292426446852466, + "learning_rate": 2.9121839829693857e-07, + "loss": 0.0003, "step": 1735 }, { - "epoch": 2.5988023952095807, - "grad_norm": 0.07502732321886861, - "learning_rate": 9.310467553641967e-07, - "loss": 0.0011, + "epoch": 2.7776, + "grad_norm": 0.0024209705442198126, + "learning_rate": 2.8709294448653223e-07, + "loss": 0.0, "step": 1736 }, { - "epoch": 2.6002994011976046, - "grad_norm": 0.05755762200466036, - "learning_rate": 9.242458032904311e-07, - "loss": 0.001, + "epoch": 2.7792, + "grad_norm": 0.00296748529685002, + "learning_rate": 2.829964948909048e-07, + "loss": 0.0001, "step": 1737 }, { - "epoch": 2.6017964071856285, - "grad_norm": 0.049212105593266754, - "learning_rate": 9.174685778858306e-07, - "loss": 0.0003, + "epoch": 2.7808, + "grad_norm": 0.0022259173904971598, + "learning_rate": 2.7892906174267653e-07, + "loss": 0.0, "step": 1738 }, { - "epoch": 2.6032934131736525, - "grad_norm": 0.08540752717152685, - "learning_rate": 9.107150968680445e-07, - "loss": 0.0009, + "epoch": 2.7824, + "grad_norm": 0.001630202727036233, + "learning_rate": 2.748906571878207e-07, + "loss": 0.0, "step": 1739 }, { - "epoch": 2.6047904191616764, - "grad_norm": 0.08879092487810307, - "learning_rate": 9.0398537789264e-07, - "loss": 0.0005, + "epoch": 2.784, + "grad_norm": 0.0015752150368830234, + "learning_rate": 2.708812932856253e-07, + "loss": 0.0, "step": 1740 }, { - "epoch": 2.6062874251497004, - "grad_norm": 0.059349429063367105, - "learning_rate": 8.972794385530658e-07, - "loss": 0.0003, + "epoch": 2.7856, + "grad_norm": 0.0008131726024755656, + "learning_rate": 2.6690098200866097e-07, + "loss": 0.0, "step": 1741 }, { - "epoch": 2.6077844311377243, - "grad_norm": 0.08005756215182938, - "learning_rate": 8.905972963806042e-07, - "loss": 0.0004, + "epoch": 2.7872, + "grad_norm": 0.002585841509949017, + "learning_rate": 2.6294973524274127e-07, + "loss": 0.0, "step": 1742 }, { - "epoch": 2.6092814371257482, - "grad_norm": 0.10352902840191881, - "learning_rate": 8.839389688443234e-07, - "loss": 0.0009, + "epoch": 2.7888, + "grad_norm": 0.0009328843269095667, + "learning_rate": 2.5902756478688674e-07, + "loss": 0.0, "step": 1743 }, { - "epoch": 2.6107784431137726, - "grad_norm": 0.0167977465176402, - "learning_rate": 8.773044733510338e-07, + "epoch": 2.7904, + "grad_norm": 0.00836227744998053, + "learning_rate": 2.551344823532964e-07, "loss": 0.0001, "step": 1744 }, { - "epoch": 2.6122754491017965, - "grad_norm": 0.04717208793979273, - "learning_rate": 8.706938272452414e-07, - "loss": 0.0003, + "epoch": 2.792, + "grad_norm": 0.009232310436466376, + "learning_rate": 2.5127049956730207e-07, + "loss": 0.0001, "step": 1745 }, { - "epoch": 2.6137724550898205, - "grad_norm": 0.15766515627181438, - "learning_rate": 8.641070478091063e-07, - "loss": 0.0015, + "epoch": 2.7936, + "grad_norm": 0.10369910632104141, + "learning_rate": 2.474356279673462e-07, + "loss": 0.0002, "step": 1746 }, { - "epoch": 2.6152694610778444, - "grad_norm": 0.05933348420600678, - "learning_rate": 8.575441522623906e-07, - "loss": 0.0005, + "epoch": 2.7952, + "grad_norm": 0.049261560321665294, + "learning_rate": 2.436298790049363e-07, + "loss": 0.0002, "step": 1747 }, { - "epoch": 2.6167664670658684, - "grad_norm": 0.16483126615536, - "learning_rate": 8.510051577624179e-07, - "loss": 0.0008, + "epoch": 2.7968, + "grad_norm": 0.04480099200664115, + "learning_rate": 2.398532640446161e-07, + "loss": 0.0002, "step": 1748 }, { - "epoch": 2.6182634730538923, - "grad_norm": 0.04305673654571666, - "learning_rate": 8.444900814040258e-07, - "loss": 0.0002, + "epoch": 2.7984, + "grad_norm": 0.0059019930564683494, + "learning_rate": 2.3610579436392999e-07, + "loss": 0.0, "step": 1749 }, { - "epoch": 2.6197604790419162, - "grad_norm": 0.0866720060609538, - "learning_rate": 8.379989402195255e-07, - "loss": 0.0004, + "epoch": 2.8, + "grad_norm": 0.0012335086772580393, + "learning_rate": 2.3238748115339327e-07, + "loss": 0.0, "step": 1750 }, { - "epoch": 2.62125748502994, - "grad_norm": 0.04999929716635118, - "learning_rate": 8.315317511786547e-07, - "loss": 0.0003, + "epoch": 2.8016, + "grad_norm": 0.0818394652065608, + "learning_rate": 2.2869833551645293e-07, + "loss": 0.0004, "step": 1751 }, { - "epoch": 2.622754491017964, - "grad_norm": 0.11556476223978758, - "learning_rate": 8.250885311885315e-07, - "loss": 0.0004, + "epoch": 2.8032, + "grad_norm": 0.022323579639708446, + "learning_rate": 2.2503836846945792e-07, + "loss": 0.0001, "step": 1752 }, { - "epoch": 2.624251497005988, - "grad_norm": 0.138338003099327, - "learning_rate": 8.186692970936127e-07, - "loss": 0.0008, + "epoch": 2.8048, + "grad_norm": 0.017704204237551684, + "learning_rate": 2.2140759094162468e-07, + "loss": 0.0001, "step": 1753 }, { - "epoch": 2.625748502994012, - "grad_norm": 0.09873045242936929, - "learning_rate": 8.1227406567565e-07, - "loss": 0.0005, + "epoch": 2.8064, + "grad_norm": 0.07857249008934615, + "learning_rate": 2.178060137750071e-07, + "loss": 0.0003, "step": 1754 }, { - "epoch": 2.627245508982036, - "grad_norm": 0.07887672568470554, - "learning_rate": 8.059028536536407e-07, - "loss": 0.0006, + "epoch": 2.808, + "grad_norm": 0.0017730005725699683, + "learning_rate": 2.1423364772445886e-07, + "loss": 0.0, "step": 1755 }, { - "epoch": 2.62874251497006, - "grad_norm": 0.07738930354523124, - "learning_rate": 7.995556776837964e-07, - "loss": 0.0005, + "epoch": 2.8096, + "grad_norm": 0.008609459644754248, + "learning_rate": 2.106905034576112e-07, + "loss": 0.0001, "step": 1756 }, { - "epoch": 2.6302395209580838, - "grad_norm": 0.035685651132636415, - "learning_rate": 7.932325543594821e-07, - "loss": 0.0002, + "epoch": 2.8112, + "grad_norm": 0.00913059018052819, + "learning_rate": 2.071765915548274e-07, + "loss": 0.0001, "step": 1757 }, { - "epoch": 2.6317365269461077, - "grad_norm": 0.11039538575586764, - "learning_rate": 7.86933500211191e-07, - "loss": 0.001, + "epoch": 2.8128, + "grad_norm": 0.003241407222481324, + "learning_rate": 2.036919225091827e-07, + "loss": 0.0, "step": 1758 }, { - "epoch": 2.6332335329341316, - "grad_norm": 0.049192018098694695, - "learning_rate": 7.806585317064852e-07, - "loss": 0.0004, + "epoch": 2.8144, + "grad_norm": 0.0014824563661412647, + "learning_rate": 2.002365067264289e-07, + "loss": 0.0, "step": 1759 }, { - "epoch": 2.6347305389221556, - "grad_norm": 0.1291838197111577, - "learning_rate": 7.744076652499655e-07, - "loss": 0.0005, + "epoch": 2.816, + "grad_norm": 0.002193225007399062, + "learning_rate": 1.9681035452496112e-07, + "loss": 0.0, "step": 1760 }, { - "epoch": 2.6362275449101795, - "grad_norm": 0.08020464788569276, - "learning_rate": 7.681809171832177e-07, - "loss": 0.0009, + "epoch": 2.8176, + "grad_norm": 0.016277810063011915, + "learning_rate": 1.9341347613579086e-07, + "loss": 0.0001, "step": 1761 }, { - "epoch": 2.6377245508982035, - "grad_norm": 0.1341437369451868, - "learning_rate": 7.619783037847783e-07, - "loss": 0.001, + "epoch": 2.8192, + "grad_norm": 0.0030854031883880886, + "learning_rate": 1.900458817025097e-07, + "loss": 0.0, "step": 1762 }, { - "epoch": 2.6392215568862274, - "grad_norm": 0.09583181308733467, - "learning_rate": 7.557998412700929e-07, - "loss": 0.0006, + "epoch": 2.8208, + "grad_norm": 0.08264368069386306, + "learning_rate": 1.867075812812691e-07, + "loss": 0.0002, "step": 1763 }, { - "epoch": 2.6407185628742518, - "grad_norm": 0.06724799411288579, - "learning_rate": 7.496455457914576e-07, - "loss": 0.0006, + "epoch": 2.8224, + "grad_norm": 0.03472191587129353, + "learning_rate": 1.8339858484073935e-07, + "loss": 0.0002, "step": 1764 }, { - "epoch": 2.6422155688622757, - "grad_norm": 0.009341167893632577, - "learning_rate": 7.435154334380001e-07, + "epoch": 2.824, + "grad_norm": 0.005196227636129316, + "learning_rate": 1.8011890226208527e-07, "loss": 0.0001, "step": 1765 }, { - "epoch": 2.6437125748502996, - "grad_norm": 0.005557298748028384, - "learning_rate": 7.374095202356235e-07, - "loss": 0.0001, + "epoch": 2.8256, + "grad_norm": 0.001364559398626262, + "learning_rate": 1.7686854333893833e-07, + "loss": 0.0, "step": 1766 }, { - "epoch": 2.6452095808383236, - "grad_norm": 0.06652628053372737, - "learning_rate": 7.313278221469621e-07, - "loss": 0.0004, + "epoch": 2.8272, + "grad_norm": 0.004988283718200237, + "learning_rate": 1.7364751777736334e-07, + "loss": 0.0001, "step": 1767 }, { - "epoch": 2.6467065868263475, - "grad_norm": 0.054699524495385746, - "learning_rate": 7.252703550713557e-07, - "loss": 0.0003, + "epoch": 2.8288, + "grad_norm": 0.016502732136393267, + "learning_rate": 1.7045583519583075e-07, + "loss": 0.0001, "step": 1768 }, { - "epoch": 2.6482035928143715, - "grad_norm": 0.03120350774247115, - "learning_rate": 7.192371348447835e-07, - "loss": 0.0002, + "epoch": 2.8304, + "grad_norm": 0.018912545591399193, + "learning_rate": 1.6729350512519006e-07, + "loss": 0.0001, "step": 1769 }, { - "epoch": 2.6497005988023954, - "grad_norm": 0.04231080059683171, - "learning_rate": 7.132281772398486e-07, - "loss": 0.0002, + "epoch": 2.832, + "grad_norm": 0.0019077932914240384, + "learning_rate": 1.6416053700863965e-07, + "loss": 0.0, "step": 1770 }, { - "epoch": 2.6511976047904193, - "grad_norm": 0.2930775130526807, - "learning_rate": 7.072434979657139e-07, - "loss": 0.0008, + "epoch": 2.8336, + "grad_norm": 0.0014420267763700873, + "learning_rate": 1.6105694020169594e-07, + "loss": 0.0, "step": 1771 }, { - "epoch": 2.6526946107784433, - "grad_norm": 0.06888790537173271, - "learning_rate": 7.012831126680808e-07, - "loss": 0.0006, + "epoch": 2.8352, + "grad_norm": 0.00361458803357153, + "learning_rate": 1.5798272397217097e-07, + "loss": 0.0001, "step": 1772 }, { - "epoch": 2.654191616766467, - "grad_norm": 0.10410134244700359, - "learning_rate": 6.953470369291349e-07, - "loss": 0.0008, + "epoch": 2.8368, + "grad_norm": 0.0036026081389549636, + "learning_rate": 1.5493789750014032e-07, + "loss": 0.0001, "step": 1773 }, { - "epoch": 2.655688622754491, - "grad_norm": 0.03504060316417563, - "learning_rate": 6.894352862675069e-07, - "loss": 0.0003, + "epoch": 2.8384, + "grad_norm": 0.014148198586583473, + "learning_rate": 1.519224698779198e-07, + "loss": 0.0001, "step": 1774 }, { - "epoch": 2.657185628742515, - "grad_norm": 0.07689984954156745, - "learning_rate": 6.835478761382442e-07, - "loss": 0.0005, + "epoch": 2.84, + "grad_norm": 0.0033210044607744184, + "learning_rate": 1.489364501100332e-07, + "loss": 0.0001, "step": 1775 }, { - "epoch": 2.658682634730539, - "grad_norm": 0.11005571108502613, - "learning_rate": 6.776848219327469e-07, - "loss": 0.0004, + "epoch": 2.8416, + "grad_norm": 0.0028672151551928813, + "learning_rate": 1.459798471131868e-07, + "loss": 0.0, "step": 1776 }, { - "epoch": 2.660179640718563, - "grad_norm": 0.06839160088182221, - "learning_rate": 6.718461389787534e-07, - "loss": 0.0005, + "epoch": 2.8432, + "grad_norm": 0.003315190926113845, + "learning_rate": 1.430526697162482e-07, + "loss": 0.0, "step": 1777 }, { - "epoch": 2.661676646706587, - "grad_norm": 0.1288662945687303, - "learning_rate": 6.660318425402846e-07, - "loss": 0.0011, + "epoch": 2.8448, + "grad_norm": 0.01717408122754632, + "learning_rate": 1.4015492666021313e-07, + "loss": 0.0001, "step": 1778 }, { - "epoch": 2.663173652694611, - "grad_norm": 0.07138445232695517, - "learning_rate": 6.602419478176037e-07, - "loss": 0.0005, + "epoch": 2.8464, + "grad_norm": 0.01088849960193917, + "learning_rate": 1.3728662659818205e-07, + "loss": 0.0001, "step": 1779 }, { - "epoch": 2.6646706586826348, - "grad_norm": 0.014327931523637798, - "learning_rate": 6.544764699471906e-07, - "loss": 0.0001, + "epoch": 2.848, + "grad_norm": 0.1375883157992815, + "learning_rate": 1.344477780953346e-07, + "loss": 0.001, "step": 1780 }, { - "epoch": 2.6661676646706587, - "grad_norm": 0.27063193324926954, - "learning_rate": 6.487354240016807e-07, - "loss": 0.0014, + "epoch": 2.8496, + "grad_norm": 0.007703529092860496, + "learning_rate": 1.3163838962890196e-07, + "loss": 0.0001, "step": 1781 }, { - "epoch": 2.6676646706586826, - "grad_norm": 0.023863898462553983, - "learning_rate": 6.430188249898473e-07, - "loss": 0.0002, + "epoch": 2.8512, + "grad_norm": 0.001354706538486727, + "learning_rate": 1.2885846958814673e-07, + "loss": 0.0, "step": 1782 }, { - "epoch": 2.6691616766467066, - "grad_norm": 0.031269105141780405, - "learning_rate": 6.373266878565454e-07, - "loss": 0.0002, + "epoch": 2.8528000000000002, + "grad_norm": 0.00390980837126583, + "learning_rate": 1.2610802627432972e-07, + "loss": 0.0, "step": 1783 }, { - "epoch": 2.6706586826347305, - "grad_norm": 0.01870608880096565, - "learning_rate": 6.316590274826828e-07, - "loss": 0.0002, + "epoch": 2.8544, + "grad_norm": 0.0018654410421585208, + "learning_rate": 1.2338706790069433e-07, + "loss": 0.0, "step": 1784 }, { - "epoch": 2.6721556886227544, - "grad_norm": 0.07554082317016642, - "learning_rate": 6.260158586851761e-07, - "loss": 0.0004, + "epoch": 2.856, + "grad_norm": 0.013021255953897921, + "learning_rate": 1.206956025924333e-07, + "loss": 0.0001, "step": 1785 }, { - "epoch": 2.6736526946107784, - "grad_norm": 0.031568014750176135, - "learning_rate": 6.203971962169153e-07, - "loss": 0.0002, + "epoch": 2.8576, + "grad_norm": 0.0018423331706124276, + "learning_rate": 1.1803363838667092e-07, + "loss": 0.0, "step": 1786 }, { - "epoch": 2.6751497005988023, - "grad_norm": 0.07930375079821568, - "learning_rate": 6.148030547667272e-07, - "loss": 0.0005, + "epoch": 2.8592, + "grad_norm": 0.0026056273680482773, + "learning_rate": 1.1540118323243866e-07, + "loss": 0.0, "step": 1787 }, { - "epoch": 2.6766467065868262, - "grad_norm": 0.03552785332381245, - "learning_rate": 6.092334489593233e-07, - "loss": 0.0002, + "epoch": 2.8608000000000002, + "grad_norm": 0.07989533085392884, + "learning_rate": 1.1279824499064396e-07, + "loss": 0.0013, "step": 1788 }, { - "epoch": 2.67814371257485, - "grad_norm": 0.031851214767568116, - "learning_rate": 6.036883933552806e-07, - "loss": 0.0002, + "epoch": 2.8624, + "grad_norm": 0.0018641526095015037, + "learning_rate": 1.1022483143405705e-07, + "loss": 0.0, "step": 1789 }, { - "epoch": 2.679640718562874, - "grad_norm": 0.04962775237902228, - "learning_rate": 5.981679024509935e-07, - "loss": 0.0002, + "epoch": 2.864, + "grad_norm": 0.009751574403888156, + "learning_rate": 1.0768095024728309e-07, + "loss": 0.0001, "step": 1790 }, { - "epoch": 2.681137724550898, - "grad_norm": 0.04407787101186428, - "learning_rate": 5.926719906786348e-07, - "loss": 0.0004, + "epoch": 2.8656, + "grad_norm": 0.007089527501608644, + "learning_rate": 1.0516660902673448e-07, + "loss": 0.0001, "step": 1791 }, { - "epoch": 2.682634730538922, - "grad_norm": 0.07555818249783224, - "learning_rate": 5.872006724061208e-07, - "loss": 0.0016, + "epoch": 2.8672, + "grad_norm": 0.006357592150038937, + "learning_rate": 1.0268181528061749e-07, + "loss": 0.0, "step": 1792 }, { - "epoch": 2.684131736526946, - "grad_norm": 0.02537315182664898, - "learning_rate": 5.817539619370727e-07, - "loss": 0.0002, + "epoch": 2.8688000000000002, + "grad_norm": 0.030832002709942744, + "learning_rate": 1.0022657642890232e-07, + "loss": 0.0001, "step": 1793 }, { - "epoch": 2.68562874251497, - "grad_norm": 0.018001211033930535, - "learning_rate": 5.763318735107826e-07, + "epoch": 2.8704, + "grad_norm": 0.002785122491385037, + "learning_rate": 9.780089980330643e-08, "loss": 0.0001, "step": 1794 }, { - "epoch": 2.687125748502994, - "grad_norm": 0.053445457775856076, - "learning_rate": 5.709344213021706e-07, - "loss": 0.0005, + "epoch": 2.872, + "grad_norm": 0.008951434154069501, + "learning_rate": 9.540479264726676e-08, + "loss": 0.0001, "step": 1795 }, { - "epoch": 2.6886227544910177, - "grad_norm": 0.05535585142367004, - "learning_rate": 5.655616194217517e-07, - "loss": 0.0003, + "epoch": 2.8736, + "grad_norm": 0.0010888299635037437, + "learning_rate": 9.303826211592316e-08, + "loss": 0.0, "step": 1796 }, { - "epoch": 2.6901197604790417, - "grad_norm": 0.04892377036746475, - "learning_rate": 5.602134819155969e-07, - "loss": 0.0003, + "epoch": 2.8752, + "grad_norm": 0.006382911098808215, + "learning_rate": 9.070131527609604e-08, + "loss": 0.0001, "step": 1797 }, { - "epoch": 2.6916167664670656, - "grad_norm": 0.020075394906988262, - "learning_rate": 5.548900227652987e-07, + "epoch": 2.8768000000000002, + "grad_norm": 0.02174671615525393, + "learning_rate": 8.839395910626214e-08, "loss": 0.0001, "step": 1798 }, { - "epoch": 2.69311377245509, - "grad_norm": 0.07645566349863471, - "learning_rate": 5.495912558879312e-07, - "loss": 0.0003, + "epoch": 2.8784, + "grad_norm": 0.003927400796810354, + "learning_rate": 8.61162004965388e-08, + "loss": 0.0, "step": 1799 }, { - "epoch": 2.694610778443114, - "grad_norm": 0.05819348736751214, - "learning_rate": 5.44317195136016e-07, - "loss": 0.0003, + "epoch": 2.88, + "grad_norm": 0.0014568560469440488, + "learning_rate": 8.386804624865851e-08, + "loss": 0.0, "step": 1800 }, { - "epoch": 2.696107784431138, - "grad_norm": 0.06457619663171396, - "learning_rate": 5.390678542974903e-07, - "loss": 0.0006, + "epoch": 2.8816, + "grad_norm": 0.003110881397941878, + "learning_rate": 8.16495030759501e-08, + "loss": 0.0, "step": 1801 }, { - "epoch": 2.697604790419162, - "grad_norm": 0.05017770166227597, - "learning_rate": 5.33843247095659e-07, - "loss": 0.0004, + "epoch": 2.8832, + "grad_norm": 0.005511663709665823, + "learning_rate": 7.946057760332193e-08, + "loss": 0.0001, "step": 1802 }, { - "epoch": 2.6991017964071857, - "grad_norm": 0.06428649128731666, - "learning_rate": 5.286433871891706e-07, - "loss": 0.0003, + "epoch": 2.8848000000000003, + "grad_norm": 0.007081682196621321, + "learning_rate": 7.730127636723539e-08, + "loss": 0.0001, "step": 1803 }, { - "epoch": 2.7005988023952097, - "grad_norm": 0.021010239698939993, - "learning_rate": 5.234682881719766e-07, - "loss": 0.0002, + "epoch": 2.8864, + "grad_norm": 0.0077041810174473075, + "learning_rate": 7.517160581569371e-08, + "loss": 0.0001, "step": 1804 }, { - "epoch": 2.7020958083832336, - "grad_norm": 0.03672140143952641, - "learning_rate": 5.183179635732937e-07, - "loss": 0.0002, + "epoch": 2.888, + "grad_norm": 0.006456232346309572, + "learning_rate": 7.307157230821426e-08, + "loss": 0.0001, "step": 1805 }, { - "epoch": 2.7035928143712575, - "grad_norm": 0.024124471357117365, - "learning_rate": 5.131924268575739e-07, - "loss": 0.0001, + "epoch": 2.8895999999999997, + "grad_norm": 0.034445837063582806, + "learning_rate": 7.100118211581852e-08, + "loss": 0.0002, "step": 1806 }, { - "epoch": 2.7050898203592815, - "grad_norm": 0.08077960062382916, - "learning_rate": 5.080916914244671e-07, - "loss": 0.0008, + "epoch": 2.8912, + "grad_norm": 0.0017820580940200157, + "learning_rate": 6.896044142100433e-08, + "loss": 0.0, "step": 1807 }, { - "epoch": 2.7065868263473054, - "grad_norm": 0.10237393145848973, - "learning_rate": 5.030157706087812e-07, - "loss": 0.0006, + "epoch": 2.8928000000000003, + "grad_norm": 0.06907803624791439, + "learning_rate": 6.694935631773259e-08, + "loss": 0.0004, "step": 1808 }, { - "epoch": 2.7080838323353293, - "grad_norm": 0.07472554370165986, - "learning_rate": 4.979646776804559e-07, - "loss": 0.0002, + "epoch": 2.8944, + "grad_norm": 0.005581726166082013, + "learning_rate": 6.496793281141056e-08, + "loss": 0.0001, "step": 1809 }, { - "epoch": 2.7095808383233533, - "grad_norm": 0.05873174733684072, - "learning_rate": 4.929384258445213e-07, - "loss": 0.0003, + "epoch": 2.896, + "grad_norm": 0.003311865968052793, + "learning_rate": 6.301617681886863e-08, + "loss": 0.0001, "step": 1810 }, { - "epoch": 2.711077844311377, - "grad_norm": 0.014282050561278747, - "learning_rate": 4.879370282410633e-07, - "loss": 0.0001, + "epoch": 2.8975999999999997, + "grad_norm": 0.0015380255213800762, + "learning_rate": 6.109409416834689e-08, + "loss": 0.0, "step": 1811 }, { - "epoch": 2.712574850299401, - "grad_norm": 0.06788628860680303, - "learning_rate": 4.829604979451997e-07, - "loss": 0.0003, + "epoch": 2.8992, + "grad_norm": 0.10057737534756946, + "learning_rate": 5.920169059947412e-08, + "loss": 0.0006, "step": 1812 }, { - "epoch": 2.714071856287425, - "grad_norm": 0.12438103093166794, - "learning_rate": 4.780088479670275e-07, - "loss": 0.0006, + "epoch": 2.9008000000000003, + "grad_norm": 0.04435905365195514, + "learning_rate": 5.7338971763256646e-08, + "loss": 0.0001, "step": 1813 }, { - "epoch": 2.715568862275449, - "grad_norm": 0.10531200528785536, - "learning_rate": 4.7308209125160654e-07, - "loss": 0.0004, + "epoch": 2.9024, + "grad_norm": 0.0919263972543488, + "learning_rate": 5.5505943222055046e-08, + "loss": 0.0005, "step": 1814 }, { - "epoch": 2.717065868263473, - "grad_norm": 0.1130560374315321, - "learning_rate": 4.68180240678916e-07, - "loss": 0.0009, + "epoch": 2.904, + "grad_norm": 0.007016794466427644, + "learning_rate": 5.37026104495697e-08, + "loss": 0.0001, "step": 1815 }, { - "epoch": 2.718562874251497, - "grad_norm": 0.02886495639077637, - "learning_rate": 4.6330330906382394e-07, - "loss": 0.0002, + "epoch": 2.9055999999999997, + "grad_norm": 0.004893016628908267, + "learning_rate": 5.192897883082748e-08, + "loss": 0.0001, "step": 1816 }, { - "epoch": 2.720059880239521, - "grad_norm": 0.02889119584742746, - "learning_rate": 4.5845130915605165e-07, - "loss": 0.0002, + "epoch": 2.9072, + "grad_norm": 0.004737890190966583, + "learning_rate": 5.0185053662161756e-08, + "loss": 0.0001, "step": 1817 }, { - "epoch": 2.7215568862275448, - "grad_norm": 0.03501511903364185, - "learning_rate": 4.536242536401403e-07, - "loss": 0.0002, + "epoch": 2.9088000000000003, + "grad_norm": 0.0022874384419645805, + "learning_rate": 4.8470840151195745e-08, + "loss": 0.0, "step": 1818 }, { - "epoch": 2.7230538922155687, - "grad_norm": 0.08233337203210803, - "learning_rate": 4.488221551354266e-07, - "loss": 0.0005, + "epoch": 2.9104, + "grad_norm": 0.0021399578018113266, + "learning_rate": 4.678634341683252e-08, + "loss": 0.0, "step": 1819 }, { - "epoch": 2.724550898203593, - "grad_norm": 0.056588559658037704, - "learning_rate": 4.440450261959905e-07, - "loss": 0.0006, + "epoch": 2.912, + "grad_norm": 0.005709094685496079, + "learning_rate": 4.513156848923616e-08, + "loss": 0.0001, "step": 1820 }, { - "epoch": 2.726047904191617, - "grad_norm": 0.07016765453031978, - "learning_rate": 4.3929287931064544e-07, - "loss": 0.0003, + "epoch": 2.9135999999999997, + "grad_norm": 0.008577173987016086, + "learning_rate": 4.350652030981395e-08, + "loss": 0.0001, "step": 1821 }, { - "epoch": 2.727544910179641, - "grad_norm": 0.0983340591609416, - "learning_rate": 4.34565726902888e-07, - "loss": 0.0006, + "epoch": 2.9152, + "grad_norm": 0.027217531196427575, + "learning_rate": 4.19112037312075e-08, + "loss": 0.0001, "step": 1822 }, { - "epoch": 2.729041916167665, - "grad_norm": 0.012487690080718029, - "learning_rate": 4.298635813308727e-07, - "loss": 0.0001, + "epoch": 2.9168, + "grad_norm": 0.000788668357959617, + "learning_rate": 4.0345623517273894e-08, + "loss": 0.0, "step": 1823 }, { - "epoch": 2.730538922155689, - "grad_norm": 0.11737318912857932, - "learning_rate": 4.2518645488738296e-07, - "loss": 0.0012, + "epoch": 2.9184, + "grad_norm": 0.01309458165842595, + "learning_rate": 3.8809784343072364e-08, + "loss": 0.0001, "step": 1824 }, { - "epoch": 2.7320359281437128, - "grad_norm": 0.059519890359665956, - "learning_rate": 4.2053435979978794e-07, - "loss": 0.0004, + "epoch": 2.92, + "grad_norm": 0.002016117446819547, + "learning_rate": 3.7303690794854296e-08, + "loss": 0.0, "step": 1825 }, { - "epoch": 2.7335329341317367, - "grad_norm": 0.09479516367922022, - "learning_rate": 4.1590730823002466e-07, - "loss": 0.0005, + "epoch": 2.9215999999999998, + "grad_norm": 0.033086944748702746, + "learning_rate": 3.582734737004101e-08, + "loss": 0.0001, "step": 1826 }, { - "epoch": 2.7350299401197606, - "grad_norm": 0.03731826461903811, - "learning_rate": 4.113053122745536e-07, - "loss": 0.0003, + "epoch": 2.9232, + "grad_norm": 0.0013975054108561335, + "learning_rate": 3.438075847721933e-08, + "loss": 0.0, "step": 1827 }, { - "epoch": 2.7365269461077846, - "grad_norm": 0.08798270041960435, - "learning_rate": 4.0672838396433436e-07, - "loss": 0.0005, + "epoch": 2.9248, + "grad_norm": 0.005798801462413593, + "learning_rate": 3.2963928436122726e-08, + "loss": 0.0001, "step": 1828 }, { - "epoch": 2.7380239520958085, - "grad_norm": 0.12804962939354766, - "learning_rate": 4.0217653526479663e-07, - "loss": 0.0008, + "epoch": 2.9264, + "grad_norm": 0.00517366116128266, + "learning_rate": 3.157686147762129e-08, + "loss": 0.0001, "step": 1829 }, { - "epoch": 2.7395209580838324, - "grad_norm": 0.12923264052846514, - "learning_rate": 3.9764977807579484e-07, - "loss": 0.0007, + "epoch": 2.928, + "grad_norm": 0.0017879877759022927, + "learning_rate": 3.0219561743707326e-08, + "loss": 0.0, "step": 1830 }, { - "epoch": 2.7410179640718564, - "grad_norm": 0.05668518180462198, - "learning_rate": 3.931481242315993e-07, - "loss": 0.0003, + "epoch": 2.9295999999999998, + "grad_norm": 0.00835238037225453, + "learning_rate": 2.8892033287484245e-08, + "loss": 0.0001, "step": 1831 }, { - "epoch": 2.7425149700598803, - "grad_norm": 0.12235579377751511, - "learning_rate": 3.886715855008405e-07, - "loss": 0.0005, + "epoch": 2.9312, + "grad_norm": 0.005400086869686289, + "learning_rate": 2.7594280073152123e-08, + "loss": 0.0001, "step": 1832 }, { - "epoch": 2.7440119760479043, - "grad_norm": 0.03866968344424915, - "learning_rate": 3.842201735865003e-07, - "loss": 0.0003, + "epoch": 2.9328, + "grad_norm": 0.011053432246126335, + "learning_rate": 2.6326305976001054e-08, + "loss": 0.0001, "step": 1833 }, { - "epoch": 2.745508982035928, - "grad_norm": 0.11192895051075528, - "learning_rate": 3.797939001258677e-07, - "loss": 0.0005, + "epoch": 2.9344, + "grad_norm": 0.0013304345302983858, + "learning_rate": 2.508811478239226e-08, + "loss": 0.0, "step": 1834 }, { - "epoch": 2.747005988023952, - "grad_norm": 0.08805745160822599, - "learning_rate": 3.753927766905108e-07, - "loss": 0.0007, + "epoch": 2.936, + "grad_norm": 0.004423950780095136, + "learning_rate": 2.3879710189753657e-08, + "loss": 0.0, "step": 1835 }, { - "epoch": 2.748502994011976, - "grad_norm": 0.013429115927834216, - "learning_rate": 3.710168147862547e-07, - "loss": 0.0002, + "epoch": 2.9375999999999998, + "grad_norm": 0.010711142940809274, + "learning_rate": 2.2701095806565432e-08, + "loss": 0.0001, "step": 1836 }, { - "epoch": 2.75, - "grad_norm": 0.04732482701187792, - "learning_rate": 3.6666602585313404e-07, - "loss": 0.0003, + "epoch": 2.9392, + "grad_norm": 0.006095942388119627, + "learning_rate": 2.1552275152346702e-08, + "loss": 0.0001, "step": 1837 }, { - "epoch": 2.751497005988024, - "grad_norm": 0.14685436958806133, - "learning_rate": 3.6234042126538584e-07, - "loss": 0.0011, + "epoch": 2.9408, + "grad_norm": 0.024063537013872153, + "learning_rate": 2.0433251657653307e-08, + "loss": 0.0004, "step": 1838 }, { - "epoch": 2.752994011976048, - "grad_norm": 0.015481759689920974, - "learning_rate": 3.5804001233139983e-07, - "loss": 0.0001, + "epoch": 2.9424, + "grad_norm": 0.12475801443444066, + "learning_rate": 1.9344028664056715e-08, + "loss": 0.0002, "step": 1839 }, { - "epoch": 2.754491017964072, - "grad_norm": 0.09709654767571617, - "learning_rate": 3.5376481029369967e-07, - "loss": 0.0006, + "epoch": 2.944, + "grad_norm": 0.0013781151293487824, + "learning_rate": 1.8284609424142897e-08, + "loss": 0.0, "step": 1840 }, { - "epoch": 2.7559880239520957, - "grad_norm": 0.09473213466103927, - "learning_rate": 3.495148263289117e-07, - "loss": 0.0011, + "epoch": 2.9455999999999998, + "grad_norm": 0.011620312784639906, + "learning_rate": 1.7254997101500137e-08, + "loss": 0.0001, "step": 1841 }, { - "epoch": 2.7574850299401197, - "grad_norm": 0.029982568028086066, - "learning_rate": 3.452900715477314e-07, - "loss": 0.0002, + "epoch": 2.9472, + "grad_norm": 0.004925652464999688, + "learning_rate": 1.6255194770704586e-08, + "loss": 0.0, "step": 1842 }, { - "epoch": 2.7589820359281436, - "grad_norm": 0.037697726669431046, - "learning_rate": 3.4109055699490526e-07, - "loss": 0.0002, + "epoch": 2.9488, + "grad_norm": 0.01084143644638827, + "learning_rate": 1.528520541731915e-08, + "loss": 0.0001, "step": 1843 }, { - "epoch": 2.7604790419161676, - "grad_norm": 0.0178343543145911, - "learning_rate": 3.3691629364918323e-07, + "epoch": 2.9504, + "grad_norm": 0.006584911349587544, + "learning_rate": 1.4345031937879061e-08, "loss": 0.0001, "step": 1844 }, { - "epoch": 2.7619760479041915, - "grad_norm": 0.0868752163044962, - "learning_rate": 3.327672924233116e-07, - "loss": 0.0007, + "epoch": 2.952, + "grad_norm": 0.024693535314543748, + "learning_rate": 1.3434677139885222e-08, + "loss": 0.0001, "step": 1845 }, { - "epoch": 2.7634730538922154, - "grad_norm": 0.06282759355626048, - "learning_rate": 3.2864356416398957e-07, - "loss": 0.0003, + "epoch": 2.9536, + "grad_norm": 0.008233169511446503, + "learning_rate": 1.2554143741795311e-08, + "loss": 0.0001, "step": 1846 }, { - "epoch": 2.7649700598802394, - "grad_norm": 0.023830143819205972, - "learning_rate": 3.2454511965184453e-07, - "loss": 0.0001, + "epoch": 2.9552, + "grad_norm": 0.0012915978308971087, + "learning_rate": 1.170343437301491e-08, + "loss": 0.0, "step": 1847 }, { - "epoch": 2.7664670658682633, - "grad_norm": 0.14715683380007932, - "learning_rate": 3.204719696014069e-07, - "loss": 0.0005, + "epoch": 2.9568, + "grad_norm": 0.004358855559623065, + "learning_rate": 1.0882551573891953e-08, + "loss": 0.0001, "step": 1848 }, { - "epoch": 2.7679640718562872, - "grad_norm": 0.02531860275827708, - "learning_rate": 3.164241246610789e-07, - "loss": 0.0003, + "epoch": 2.9584, + "grad_norm": 0.03165947917388015, + "learning_rate": 1.0091497795706728e-08, + "loss": 0.0002, "step": 1849 }, { - "epoch": 2.769461077844311, - "grad_norm": 0.0726235081299919, - "learning_rate": 3.1240159541310764e-07, - "loss": 0.0007, + "epoch": 2.96, + "grad_norm": 0.004520262535504451, + "learning_rate": 9.330275400666334e-09, + "loss": 0.0, "step": 1850 }, { - "epoch": 2.770958083832335, - "grad_norm": 0.10837249282039897, - "learning_rate": 3.0840439237356025e-07, - "loss": 0.0013, + "epoch": 2.9616, + "grad_norm": 0.05056987291700106, + "learning_rate": 8.59888666189579e-09, + "loss": 0.0004, "step": 1851 }, { - "epoch": 2.772455089820359, - "grad_norm": 0.046374751147364626, - "learning_rate": 3.0443252599228865e-07, - "loss": 0.0004, + "epoch": 2.9632, + "grad_norm": 0.01759805043355931, + "learning_rate": 7.897333763433601e-09, + "loss": 0.0002, "step": 1852 }, { - "epoch": 2.773952095808383, - "grad_norm": 0.04955860777056503, - "learning_rate": 3.004860066529125e-07, - "loss": 0.0003, + "epoch": 2.9648, + "grad_norm": 0.0037291101477257473, + "learning_rate": 7.225618800222878e-09, + "loss": 0.0001, "step": 1853 }, { - "epoch": 2.775449101796407, - "grad_norm": 0.04175690267753176, - "learning_rate": 2.96564844672782e-07, - "loss": 0.0002, + "epoch": 2.9664, + "grad_norm": 0.0018585810182493614, + "learning_rate": 6.583743778106888e-09, + "loss": 0.0, "step": 1854 }, { - "epoch": 2.7769461077844313, - "grad_norm": 0.01101306053826792, - "learning_rate": 2.9266905030296164e-07, - "loss": 0.0001, + "epoch": 2.968, + "grad_norm": 0.09601546724932118, + "learning_rate": 5.971710613821291e-09, + "loss": 0.0003, "step": 1855 }, { - "epoch": 2.7784431137724552, - "grad_norm": 0.061584971883424364, - "learning_rate": 2.887986337281934e-07, - "loss": 0.0005, + "epoch": 2.9696, + "grad_norm": 0.0012696300863838544, + "learning_rate": 5.3895211349896946e-09, + "loss": 0.0, "step": 1856 }, { - "epoch": 2.779940119760479, - "grad_norm": 0.038380836869109265, - "learning_rate": 2.8495360506687444e-07, - "loss": 0.0002, + "epoch": 2.9712, + "grad_norm": 0.004006260952148794, + "learning_rate": 4.837177080119215e-09, + "loss": 0.0, "step": 1857 }, { - "epoch": 2.781437125748503, - "grad_norm": 0.036721797893085474, - "learning_rate": 2.811339743710328e-07, + "epoch": 2.9728, + "grad_norm": 0.00635367367348536, + "learning_rate": 4.314680098592705e-09, "loss": 0.0001, "step": 1858 }, { - "epoch": 2.782934131736527, - "grad_norm": 0.10482041789183853, - "learning_rate": 2.7733975162629655e-07, - "loss": 0.0004, + "epoch": 2.9744, + "grad_norm": 0.004998163224516518, + "learning_rate": 3.8220317506654226e-09, + "loss": 0.0001, "step": 1859 }, { - "epoch": 2.784431137724551, - "grad_norm": 0.06852021780595845, - "learning_rate": 2.735709467518699e-07, - "loss": 0.0003, + "epoch": 2.976, + "grad_norm": 0.0011077026332547931, + "learning_rate": 3.3592335074594805e-09, + "loss": 0.0, "step": 1860 }, { - "epoch": 2.785928143712575, - "grad_norm": 0.007301911146011518, - "learning_rate": 2.6982756960050925e-07, + "epoch": 2.9776, + "grad_norm": 0.0263071065239791, + "learning_rate": 2.9262867509605164e-09, "loss": 0.0001, "step": 1861 }, { - "epoch": 2.787425149700599, - "grad_norm": 0.0928237098781843, - "learning_rate": 2.661096299584953e-07, - "loss": 0.0005, + "epoch": 2.9792, + "grad_norm": 0.0038767720735516454, + "learning_rate": 2.5231927740154705e-09, + "loss": 0.0001, "step": 1862 }, { - "epoch": 2.788922155688623, - "grad_norm": 0.037133885536317905, - "learning_rate": 2.624171375456064e-07, + "epoch": 2.9808, + "grad_norm": 0.05274699982948644, + "learning_rate": 2.149952780321485e-09, "loss": 0.0002, "step": 1863 }, { - "epoch": 2.7904191616766467, - "grad_norm": 0.10006499255445611, - "learning_rate": 2.58750102015094e-07, - "loss": 0.0007, + "epoch": 2.9824, + "grad_norm": 0.02529985251576128, + "learning_rate": 1.8065678844314538e-09, + "loss": 0.0001, "step": 1864 }, { - "epoch": 2.7919161676646707, - "grad_norm": 0.04256690893240319, - "learning_rate": 2.5510853295366067e-07, - "loss": 0.0003, + "epoch": 2.984, + "grad_norm": 0.010248827513058067, + "learning_rate": 1.4930391117451427e-09, + "loss": 0.0001, "step": 1865 }, { - "epoch": 2.7934131736526946, - "grad_norm": 0.0684594948441174, - "learning_rate": 2.514924398814278e-07, - "loss": 0.0003, + "epoch": 2.9856, + "grad_norm": 0.029882135251723525, + "learning_rate": 1.209367398504746e-09, + "loss": 0.0001, "step": 1866 }, { - "epoch": 2.7949101796407185, - "grad_norm": 0.09626127387021133, - "learning_rate": 2.479018322519189e-07, - "loss": 0.0006, + "epoch": 2.9872, + "grad_norm": 0.0012465966402100829, + "learning_rate": 9.555535917993297e-10, + "loss": 0.0, "step": 1867 }, { - "epoch": 2.7964071856287425, - "grad_norm": 0.01741566020372036, - "learning_rate": 2.4433671945203076e-07, + "epoch": 2.9888, + "grad_norm": 0.004171632635330529, + "learning_rate": 7.315984495548378e-10, "loss": 0.0001, "step": 1868 }, { - "epoch": 2.7979041916167664, - "grad_norm": 0.07213965555140582, - "learning_rate": 2.4079711080200683e-07, - "loss": 0.0005, + "epoch": 2.9904, + "grad_norm": 0.004683951364801004, + "learning_rate": 5.375026405352035e-10, + "loss": 0.0, "step": 1869 }, { - "epoch": 2.7994011976047903, - "grad_norm": 0.06007506249091625, - "learning_rate": 2.3728301555541845e-07, - "loss": 0.0003, + "epoch": 2.992, + "grad_norm": 0.003909613062024481, + "learning_rate": 3.732667443390181e-10, + "loss": 0.0, "step": 1870 }, { - "epoch": 2.8008982035928143, - "grad_norm": 0.06125607539660623, - "learning_rate": 2.3379444289913344e-07, - "loss": 0.0003, + "epoch": 2.9936, + "grad_norm": 0.008748115478093845, + "learning_rate": 2.388912514017516e-10, + "loss": 0.0001, "step": 1871 }, { - "epoch": 2.802395209580838, - "grad_norm": 0.015232735958729315, - "learning_rate": 2.3033140195329985e-07, - "loss": 0.0001, + "epoch": 2.9952, + "grad_norm": 0.23714353606083233, + "learning_rate": 1.3437656298687096e-10, + "loss": 0.0009, "step": 1872 }, { - "epoch": 2.803892215568862, - "grad_norm": 0.03010125402384828, - "learning_rate": 2.2689390177131787e-07, - "loss": 0.0002, + "epoch": 2.9968, + "grad_norm": 0.001146936780494521, + "learning_rate": 5.972299119250124e-11, + "loss": 0.0, "step": 1873 }, { - "epoch": 2.805389221556886, - "grad_norm": 0.05324858893449288, - "learning_rate": 2.2348195133981344e-07, - "loss": 0.0003, - "step": 1874 - }, - { - "epoch": 2.80688622754491, - "grad_norm": 0.026758821808148425, - "learning_rate": 2.200955595786214e-07, - "loss": 0.0002, - "step": 1875 - }, - { - "epoch": 2.8083832335329344, - "grad_norm": 0.04605830316424714, - "learning_rate": 2.1673473534075895e-07, - "loss": 0.0004, - "step": 1876 - }, - { - "epoch": 2.8098802395209583, - "grad_norm": 0.04122472187922105, - "learning_rate": 2.1339948741240013e-07, - "loss": 0.0002, - "step": 1877 - }, - { - "epoch": 2.8113772455089823, - "grad_norm": 0.026915377348818586, - "learning_rate": 2.1008982451285687e-07, - "loss": 0.0003, - "step": 1878 - }, - { - "epoch": 2.812874251497006, - "grad_norm": 0.030265578451253612, - "learning_rate": 2.0680575529455017e-07, - "loss": 0.0004, - "step": 1879 - }, - { - "epoch": 2.81437125748503, - "grad_norm": 0.07654024267241427, - "learning_rate": 2.035472883430001e-07, - "loss": 0.0005, - "step": 1880 - }, - { - "epoch": 2.815868263473054, - "grad_norm": 0.02468323185102218, - "learning_rate": 2.0031443217678582e-07, - "loss": 0.0003, - "step": 1881 - }, - { - "epoch": 2.817365269461078, - "grad_norm": 0.14330870714264016, - "learning_rate": 1.9710719524753896e-07, - "loss": 0.0008, - "step": 1882 - }, - { - "epoch": 2.818862275449102, - "grad_norm": 0.06963422959188056, - "learning_rate": 1.939255859399114e-07, - "loss": 0.0004, - "step": 1883 - }, - { - "epoch": 2.820359281437126, - "grad_norm": 0.03669033456219445, - "learning_rate": 1.9076961257155745e-07, - "loss": 0.0002, - "step": 1884 - }, - { - "epoch": 2.82185628742515, - "grad_norm": 0.08034552402519764, - "learning_rate": 1.876392833931151e-07, - "loss": 0.0008, - "step": 1885 - }, - { - "epoch": 2.8233532934131738, - "grad_norm": 0.011926808859062795, - "learning_rate": 1.8453460658817367e-07, - "loss": 0.0001, - "step": 1886 - }, - { - "epoch": 2.8248502994011977, - "grad_norm": 0.07355376029482306, - "learning_rate": 1.814555902732673e-07, - "loss": 0.0006, - "step": 1887 - }, - { - "epoch": 2.8263473053892216, - "grad_norm": 0.029801616810496537, - "learning_rate": 1.7840224249784154e-07, - "loss": 0.0002, - "step": 1888 - }, - { - "epoch": 2.8278443113772456, - "grad_norm": 0.12698015721022915, - "learning_rate": 1.7537457124423896e-07, - "loss": 0.0004, - "step": 1889 - }, - { - "epoch": 2.8293413173652695, - "grad_norm": 0.06530833972448825, - "learning_rate": 1.723725844276747e-07, - "loss": 0.0004, - "step": 1890 - }, - { - "epoch": 2.8308383233532934, - "grad_norm": 0.12703576094207422, - "learning_rate": 1.693962898962187e-07, - "loss": 0.0005, - "step": 1891 - }, - { - "epoch": 2.8323353293413174, - "grad_norm": 0.06299450353431471, - "learning_rate": 1.664456954307736e-07, - "loss": 0.0004, - "step": 1892 - }, - { - "epoch": 2.8338323353293413, - "grad_norm": 0.0345823141248644, - "learning_rate": 1.635208087450524e-07, - "loss": 0.0002, - "step": 1893 - }, - { - "epoch": 2.8353293413173652, - "grad_norm": 0.07171038665684742, - "learning_rate": 1.6062163748556402e-07, - "loss": 0.0005, - "step": 1894 - }, - { - "epoch": 2.836826347305389, - "grad_norm": 0.040539395882641424, - "learning_rate": 1.5774818923158687e-07, - "loss": 0.0004, - "step": 1895 - }, - { - "epoch": 2.838323353293413, - "grad_norm": 0.01721308734367943, - "learning_rate": 1.5490047149515186e-07, - "loss": 0.0001, - "step": 1896 - }, - { - "epoch": 2.839820359281437, - "grad_norm": 0.03712780760613839, - "learning_rate": 1.5207849172102827e-07, - "loss": 0.0002, - "step": 1897 - }, - { - "epoch": 2.841317365269461, - "grad_norm": 0.017202811427165027, - "learning_rate": 1.492822572866892e-07, - "loss": 0.0001, - "step": 1898 - }, - { - "epoch": 2.842814371257485, - "grad_norm": 0.08062656343801933, - "learning_rate": 1.4651177550231043e-07, - "loss": 0.0004, - "step": 1899 - }, - { - "epoch": 2.844311377245509, - "grad_norm": 0.03769317002353988, - "learning_rate": 1.4376705361073828e-07, - "loss": 0.0004, - "step": 1900 - }, - { - "epoch": 2.845808383233533, - "grad_norm": 0.0595900137707606, - "learning_rate": 1.410480987874774e-07, - "loss": 0.0004, - "step": 1901 - }, - { - "epoch": 2.8473053892215567, - "grad_norm": 0.0456203631893289, - "learning_rate": 1.3835491814066847e-07, - "loss": 0.0003, - "step": 1902 - }, - { - "epoch": 2.8488023952095807, - "grad_norm": 0.06469454614345947, - "learning_rate": 1.3568751871107068e-07, - "loss": 0.0003, - "step": 1903 - }, - { - "epoch": 2.8502994011976046, - "grad_norm": 0.14575247322591645, - "learning_rate": 1.3304590747204582e-07, - "loss": 0.0018, - "step": 1904 - }, - { - "epoch": 2.8517964071856285, - "grad_norm": 0.09067521256579207, - "learning_rate": 1.3043009132953533e-07, - "loss": 0.0009, - "step": 1905 - }, - { - "epoch": 2.8532934131736525, - "grad_norm": 0.013493249897349183, - "learning_rate": 1.2784007712204562e-07, - "loss": 0.0001, - "step": 1906 - }, - { - "epoch": 2.8547904191616764, - "grad_norm": 0.11691368942179577, - "learning_rate": 1.2527587162062927e-07, - "loss": 0.0009, - "step": 1907 - }, - { - "epoch": 2.8562874251497004, - "grad_norm": 0.037850088975618104, - "learning_rate": 1.2273748152886844e-07, - "loss": 0.0003, - "step": 1908 - }, - { - "epoch": 2.8577844311377243, - "grad_norm": 0.1141808276161799, - "learning_rate": 1.2022491348285482e-07, - "loss": 0.0006, - "step": 1909 - }, - { - "epoch": 2.8592814371257482, - "grad_norm": 0.02925451773697013, - "learning_rate": 1.1773817405117405e-07, - "loss": 0.0002, - "step": 1910 - }, - { - "epoch": 2.8607784431137726, - "grad_norm": 0.05017111181234729, - "learning_rate": 1.1527726973488918e-07, - "loss": 0.0003, - "step": 1911 - }, - { - "epoch": 2.8622754491017965, - "grad_norm": 0.04552663871537052, - "learning_rate": 1.1284220696752168e-07, + "epoch": 2.9984, + "grad_norm": 0.04094181628426927, + "learning_rate": 1.4930758944764477e-11, "loss": 0.0002, - "step": 1912 - }, - { - "epoch": 2.8637724550898205, - "grad_norm": 0.1413296347951028, - "learning_rate": 1.1043299211503489e-07, - "loss": 0.0007, - "step": 1913 - }, - { - "epoch": 2.8652694610778444, - "grad_norm": 0.01983528978302256, - "learning_rate": 1.080496314758217e-07, - "loss": 0.0001, - "step": 1914 - }, - { - "epoch": 2.8667664670658684, - "grad_norm": 0.10833415515320645, - "learning_rate": 1.0569213128068024e-07, - "loss": 0.0003, - "step": 1915 - }, - { - "epoch": 2.8682634730538923, - "grad_norm": 0.04839502505058121, - "learning_rate": 1.0336049769280377e-07, - "loss": 0.0004, - "step": 1916 - }, - { - "epoch": 2.8697604790419162, - "grad_norm": 0.018562019071949903, - "learning_rate": 1.0105473680776301e-07, - "loss": 0.0001, - "step": 1917 + "step": 1874 }, { - "epoch": 2.87125748502994, - "grad_norm": 0.029694138913736658, - "learning_rate": 9.877485465349057e-08, + "epoch": 3.0, + "grad_norm": 0.07295115851459594, + "learning_rate": 0.0, "loss": 0.0003, - "step": 1918 - }, - { - "epoch": 2.872754491017964, - "grad_norm": 0.04427064392263348, - "learning_rate": 9.652085719026205e-08, - "loss": 0.0005, - "step": 1919 - }, - { - "epoch": 2.874251497005988, - "grad_norm": 0.07693934381799596, - "learning_rate": 9.429275031068608e-08, - "loss": 0.0006, - "step": 1920 - }, - { - "epoch": 2.875748502994012, - "grad_norm": 0.09527796665446048, - "learning_rate": 9.209053983968209e-08, - "loss": 0.0007, - "step": 1921 - }, - { - "epoch": 2.877245508982036, - "grad_norm": 0.058776929014960504, - "learning_rate": 8.991423153447143e-08, - "loss": 0.0003, - "step": 1922 - }, - { - "epoch": 2.87874251497006, - "grad_norm": 0.03288997471143238, - "learning_rate": 8.776383108455966e-08, - "loss": 0.0002, - "step": 1923 - }, - { - "epoch": 2.8802395209580838, - "grad_norm": 0.05757102855593304, - "learning_rate": 8.563934411171981e-08, - "loss": 0.0002, - "step": 1924 - }, - { - "epoch": 2.8817365269461077, - "grad_norm": 0.0387920150897497, - "learning_rate": 8.354077616998247e-08, - "loss": 0.0003, - "step": 1925 - }, - { - "epoch": 2.8832335329341316, - "grad_norm": 0.1232602412566441, - "learning_rate": 8.14681327456146e-08, - "loss": 0.0008, - "step": 1926 - }, - { - "epoch": 2.8847305389221556, - "grad_norm": 0.09510268541356108, - "learning_rate": 7.942141925711078e-08, - "loss": 0.0008, - "step": 1927 - }, - { - "epoch": 2.8862275449101795, - "grad_norm": 0.034899399930684916, - "learning_rate": 7.740064105517753e-08, - "loss": 0.0002, - "step": 1928 - }, - { - "epoch": 2.8877245508982035, - "grad_norm": 0.08538587733278336, - "learning_rate": 7.540580342272008e-08, - "loss": 0.0005, - "step": 1929 - }, - { - "epoch": 2.8892215568862274, - "grad_norm": 0.036141222171045685, - "learning_rate": 7.343691157482347e-08, - "loss": 0.0003, - "step": 1930 - }, - { - "epoch": 2.8907185628742518, - "grad_norm": 0.048494060524911266, - "learning_rate": 7.149397065874697e-08, - "loss": 0.0004, - "step": 1931 - }, - { - "epoch": 2.8922155688622757, - "grad_norm": 0.04348773500793645, - "learning_rate": 6.957698575390415e-08, - "loss": 0.0003, - "step": 1932 - }, - { - "epoch": 2.8937125748502996, - "grad_norm": 0.019348415703767035, - "learning_rate": 6.768596187185394e-08, - "loss": 0.0001, - "step": 1933 - }, - { - "epoch": 2.8952095808383236, - "grad_norm": 0.07990531211236819, - "learning_rate": 6.582090395628294e-08, - "loss": 0.0005, - "step": 1934 - }, - { - "epoch": 2.8967065868263475, - "grad_norm": 0.028337210973887937, - "learning_rate": 6.398181688299753e-08, - "loss": 0.0003, - "step": 1935 - }, - { - "epoch": 2.8982035928143715, - "grad_norm": 0.029288080250697478, - "learning_rate": 6.21687054599085e-08, - "loss": 0.0001, - "step": 1936 - }, - { - "epoch": 2.8997005988023954, - "grad_norm": 0.0868655909482861, - "learning_rate": 6.038157442701642e-08, - "loss": 0.0004, - "step": 1937 - }, - { - "epoch": 2.9011976047904193, - "grad_norm": 0.07224510015715654, - "learning_rate": 5.862042845640403e-08, - "loss": 0.0005, - "step": 1938 - }, - { - "epoch": 2.9026946107784433, - "grad_norm": 0.014587726019568977, - "learning_rate": 5.688527215222062e-08, - "loss": 0.0001, - "step": 1939 - }, - { - "epoch": 2.904191616766467, - "grad_norm": 0.0913841204532074, - "learning_rate": 5.517611005067092e-08, - "loss": 0.0007, - "step": 1940 - }, - { - "epoch": 2.905688622754491, - "grad_norm": 0.02053119590322276, - "learning_rate": 5.3492946620005146e-08, - "loss": 0.0004, - "step": 1941 - }, - { - "epoch": 2.907185628742515, - "grad_norm": 0.048364628173414546, - "learning_rate": 5.183578626050012e-08, - "loss": 0.0003, - "step": 1942 - }, - { - "epoch": 2.908682634730539, - "grad_norm": 0.11064582121615582, - "learning_rate": 5.020463330445813e-08, - "loss": 0.0007, - "step": 1943 - }, - { - "epoch": 2.910179640718563, - "grad_norm": 0.02823565491289188, - "learning_rate": 4.8599492016189184e-08, - "loss": 0.0003, - "step": 1944 - }, - { - "epoch": 2.911676646706587, - "grad_norm": 0.08021570823602606, - "learning_rate": 4.7020366592001046e-08, - "loss": 0.0005, - "step": 1945 - }, - { - "epoch": 2.913173652694611, - "grad_norm": 0.06441990843786155, - "learning_rate": 4.546726116018696e-08, - "loss": 0.0004, - "step": 1946 - }, - { - "epoch": 2.9146706586826348, - "grad_norm": 0.07772901840599725, - "learning_rate": 4.394017978101905e-08, - "loss": 0.0004, - "step": 1947 - }, - { - "epoch": 2.9161676646706587, - "grad_norm": 0.012281701982242639, - "learning_rate": 4.243912644673276e-08, - "loss": 0.0002, - "step": 1948 - }, - { - "epoch": 2.9176646706586826, - "grad_norm": 0.0893888921305495, - "learning_rate": 4.0964105081519046e-08, - "loss": 0.0004, - "step": 1949 - }, - { - "epoch": 2.9191616766467066, - "grad_norm": 0.007291028241031858, - "learning_rate": 3.951511954151554e-08, - "loss": 0.0001, - "step": 1950 - }, - { - "epoch": 2.9206586826347305, - "grad_norm": 0.04531637743292414, - "learning_rate": 3.809217361479212e-08, - "loss": 0.0004, - "step": 1951 - }, - { - "epoch": 2.9221556886227544, - "grad_norm": 0.10016872974204939, - "learning_rate": 3.669527102134418e-08, - "loss": 0.0006, - "step": 1952 - }, - { - "epoch": 2.9236526946107784, - "grad_norm": 0.10136592233723844, - "learning_rate": 3.532441541308384e-08, - "loss": 0.0009, - "step": 1953 - }, - { - "epoch": 2.9251497005988023, - "grad_norm": 0.099063980397416, - "learning_rate": 3.397961037382547e-08, - "loss": 0.0007, - "step": 1954 - }, - { - "epoch": 2.9266467065868262, - "grad_norm": 0.0494457607619648, - "learning_rate": 3.266085941928454e-08, - "loss": 0.0004, - "step": 1955 - }, - { - "epoch": 2.92814371257485, - "grad_norm": 0.10780498117653974, - "learning_rate": 3.136816599705883e-08, - "loss": 0.0006, - "step": 1956 - }, - { - "epoch": 2.929640718562874, - "grad_norm": 0.0658842139801036, - "learning_rate": 3.0101533486626146e-08, - "loss": 0.0005, - "step": 1957 - }, - { - "epoch": 2.931137724550898, - "grad_norm": 0.04093767820581611, - "learning_rate": 2.8860965199334346e-08, - "loss": 0.0002, - "step": 1958 - }, - { - "epoch": 2.932634730538922, - "grad_norm": 0.05070774966783894, - "learning_rate": 2.7646464378390248e-08, - "loss": 0.0004, - "step": 1959 - }, - { - "epoch": 2.934131736526946, - "grad_norm": 0.01789433543858203, - "learning_rate": 2.6458034198852954e-08, - "loss": 0.0001, - "step": 1960 - }, - { - "epoch": 2.93562874251497, - "grad_norm": 0.04336770967449903, - "learning_rate": 2.5295677767627202e-08, - "loss": 0.0002, - "step": 1961 - }, - { - "epoch": 2.937125748502994, - "grad_norm": 0.0865926476879689, - "learning_rate": 2.415939812345003e-08, - "loss": 0.0003, - "step": 1962 - }, - { - "epoch": 2.9386227544910177, - "grad_norm": 0.09911546956818736, - "learning_rate": 2.304919823688856e-08, - "loss": 0.0005, - "step": 1963 - }, - { - "epoch": 2.9401197604790417, - "grad_norm": 0.07993502202450768, - "learning_rate": 2.1965081010328903e-08, - "loss": 0.0007, - "step": 1964 - }, - { - "epoch": 2.9416167664670656, - "grad_norm": 0.04287570725916258, - "learning_rate": 2.0907049277971714e-08, - "loss": 0.0002, - "step": 1965 - }, - { - "epoch": 2.94311377245509, - "grad_norm": 0.041199397205273956, - "learning_rate": 1.987510580581664e-08, - "loss": 0.0003, - "step": 1966 - }, - { - "epoch": 2.944610778443114, - "grad_norm": 0.12923583448830503, - "learning_rate": 1.8869253291669e-08, - "loss": 0.0007, - "step": 1967 - }, - { - "epoch": 2.946107784431138, - "grad_norm": 0.03545184716551114, - "learning_rate": 1.7889494365118665e-08, - "loss": 0.0002, - "step": 1968 - }, - { - "epoch": 2.947604790419162, - "grad_norm": 0.05849241385118926, - "learning_rate": 1.6935831587541197e-08, - "loss": 0.0003, - "step": 1969 - }, - { - "epoch": 2.9491017964071857, - "grad_norm": 0.11944886195630253, - "learning_rate": 1.6008267452090054e-08, - "loss": 0.0014, - "step": 1970 - }, - { - "epoch": 2.9505988023952097, - "grad_norm": 0.023633602514857067, - "learning_rate": 1.5106804383688834e-08, - "loss": 0.0002, - "step": 1971 - }, - { - "epoch": 2.9520958083832336, - "grad_norm": 0.06252240701759237, - "learning_rate": 1.4231444739023492e-08, - "loss": 0.0005, - "step": 1972 - }, - { - "epoch": 2.9535928143712575, - "grad_norm": 0.08288723195823507, - "learning_rate": 1.338219080654013e-08, - "loss": 0.0004, - "step": 1973 - }, - { - "epoch": 2.9550898203592815, - "grad_norm": 0.02470100516563367, - "learning_rate": 1.2559044806437215e-08, - "loss": 0.0002, - "step": 1974 - }, - { - "epoch": 2.9565868263473054, - "grad_norm": 0.035384313511806896, - "learning_rate": 1.1762008890657816e-08, - "loss": 0.0002, - "step": 1975 - }, - { - "epoch": 2.9580838323353293, - "grad_norm": 0.0726241495411833, - "learning_rate": 1.099108514288627e-08, - "loss": 0.0005, - "step": 1976 - }, - { - "epoch": 2.9595808383233533, - "grad_norm": 0.05304485849070656, - "learning_rate": 1.024627557854152e-08, - "loss": 0.0002, - "step": 1977 - }, - { - "epoch": 2.961077844311377, - "grad_norm": 0.11794290635555527, - "learning_rate": 9.527582144773785e-09, - "loss": 0.0006, - "step": 1978 - }, - { - "epoch": 2.962574850299401, - "grad_norm": 0.012966785537484036, - "learning_rate": 8.835006720457895e-09, - "loss": 0.0001, - "step": 1979 - }, - { - "epoch": 2.964071856287425, - "grad_norm": 0.05050410993770117, - "learning_rate": 8.16855111618775e-09, - "loss": 0.0003, - "step": 1980 - }, - { - "epoch": 2.965568862275449, - "grad_norm": 0.0697386602951542, - "learning_rate": 7.528217074272982e-09, - "loss": 0.0004, - "step": 1981 - }, - { - "epoch": 2.967065868263473, - "grad_norm": 0.0324564659125771, - "learning_rate": 6.914006268732287e-09, - "loss": 0.0003, - "step": 1982 - }, - { - "epoch": 2.968562874251497, - "grad_norm": 0.03373070903115685, - "learning_rate": 6.325920305292332e-09, - "loss": 0.0002, - "step": 1983 - }, - { - "epoch": 2.970059880239521, - "grad_norm": 0.011788534817898908, - "learning_rate": 5.763960721382189e-09, - "loss": 0.0002, - "step": 1984 - }, - { - "epoch": 2.9715568862275448, - "grad_norm": 0.08221326208806214, - "learning_rate": 5.228128986127789e-09, - "loss": 0.0006, - "step": 1985 - }, - { - "epoch": 2.9730538922155687, - "grad_norm": 0.1270884872336597, - "learning_rate": 4.718426500349705e-09, - "loss": 0.0005, - "step": 1986 - }, - { - "epoch": 2.974550898203593, - "grad_norm": 0.058075307647141146, - "learning_rate": 4.2348545965575914e-09, - "loss": 0.0002, - "step": 1987 - }, - { - "epoch": 2.976047904191617, - "grad_norm": 0.03512480614793991, - "learning_rate": 3.7774145389524176e-09, - "loss": 0.0003, - "step": 1988 - }, - { - "epoch": 2.977544910179641, - "grad_norm": 0.07371078403449538, - "learning_rate": 3.3461075234131336e-09, - "loss": 0.0001, - "step": 1989 - }, - { - "epoch": 2.979041916167665, - "grad_norm": 0.022109669320123617, - "learning_rate": 2.9409346775033376e-09, - "loss": 0.0001, - "step": 1990 - }, - { - "epoch": 2.980538922155689, - "grad_norm": 0.048672724122854005, - "learning_rate": 2.561897060462393e-09, - "loss": 0.0004, - "step": 1991 - }, - { - "epoch": 2.9820359281437128, - "grad_norm": 0.01931500903511454, - "learning_rate": 2.208995663206537e-09, - "loss": 0.0002, - "step": 1992 - }, - { - "epoch": 2.9835329341317367, - "grad_norm": 0.04749199082195791, - "learning_rate": 1.8822314083222216e-09, - "loss": 0.0004, - "step": 1993 - }, - { - "epoch": 2.9850299401197606, - "grad_norm": 0.09729739241328733, - "learning_rate": 1.5816051500661122e-09, - "loss": 0.001, - "step": 1994 - }, - { - "epoch": 2.9865269461077846, - "grad_norm": 0.044947007048880785, - "learning_rate": 1.3071176743628677e-09, - "loss": 0.0003, - "step": 1995 - }, - { - "epoch": 2.9880239520958085, - "grad_norm": 0.13515173564413327, - "learning_rate": 1.0587696988029195e-09, - "loss": 0.0008, - "step": 1996 - }, - { - "epoch": 2.9895209580838324, - "grad_norm": 0.06726018374004353, - "learning_rate": 8.365618726413616e-10, - "loss": 0.0005, - "step": 1997 - }, - { - "epoch": 2.9910179640718564, - "grad_norm": 0.1032600661241297, - "learning_rate": 6.404947767935099e-10, - "loss": 0.0011, - "step": 1998 - }, - { - "epoch": 2.9925149700598803, - "grad_norm": 0.08635105614698947, - "learning_rate": 4.705689238360123e-10, - "loss": 0.0009, - "step": 1999 - }, - { - "epoch": 2.9940119760479043, - "grad_norm": 0.10265564812162384, - "learning_rate": 3.267847580046279e-10, - "loss": 0.0004, - "step": 2000 - }, - { - "epoch": 2.995508982035928, - "grad_norm": 0.040089587915776735, - "learning_rate": 2.091426551931175e-10, - "loss": 0.0002, - "step": 2001 - }, - { - "epoch": 2.997005988023952, - "grad_norm": 0.05526462283205673, - "learning_rate": 1.1764292295213254e-10, - "loss": 0.0002, - "step": 2002 - }, - { - "epoch": 2.998502994011976, - "grad_norm": 0.046950315243127365, - "learning_rate": 5.2285800488105495e-11, - "loss": 0.0003, - "step": 2003 - }, - { - "epoch": 3.0, - "grad_norm": 0.02255507610632034, - "learning_rate": 1.3071458665470105e-11, - "loss": 0.0001, - "step": 2004 + "step": 1875 }, { "epoch": 3.0, - "step": 2004, - "total_flos": 160061552422912.0, - "train_loss": 0.02935138660071818, - "train_runtime": 23628.3662, - "train_samples_per_second": 1.356, - "train_steps_per_second": 0.085 + "step": 1875, + "total_flos": 75591737456640.0, + "train_loss": 0.04129861024404333, + "train_runtime": 14343.312, + "train_samples_per_second": 2.092, + "train_steps_per_second": 0.131 } ], "logging_steps": 1.0, - "max_steps": 2004, + "max_steps": 1875, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 50000, - "total_flos": 160061552422912.0, + "total_flos": 75591737456640.0, "train_batch_size": 8, "trial_name": null, "trial_params": null